Mahdi Safsafi 225 Posted March 3, 2019 (edited) Hello guys, I always thought that the Delphi compiler takes the inlined function and TRIES to insert it (without making a call instruction) where the call to that function occurs. However for some circumstances it can do better ! it TRIES to evaluate the function (just like constexpr in c++) ! Here is an example : program Console1; {$APPTYPE CONSOLE} {$R *.res} uses System.SysUtils; function Max(a, b: Integer): Integer; inline; begin if (a > b) then Result := a else Result := b; end; var a: Integer = 5; b: Integer = 9; i: Integer; begin Writeln('--------------------------------'); i := Max(5, 9); // evaluated Writeln(Format('max=%d', [i])); i := Max(a, b); // not evaluated Writeln(Format('max=%d', [i])); Writeln('--------------------------------'); Readln; end. After compiling the above example (x64, Release mode), the compiler generates the following code: //------------------------------------------------------ Delphi ------------------------------------------------------------ 0000000000428510 | 48:8B0D 59880000 | mov rcx,qword ptr ds:[430D70] | 0000000000428517 | 48:8D15 6E010000 | lea rdx,qword ptr ds:[42868C] | rdx:EntryPoint, 000000000042868C:L"--------------------------------" 000000000042851E | E8 2DEFFDFF | call <console1.sub_407450> | 0000000000428523 | 48:89C1 | mov rcx,rax | rax:EntryPoint 0000000000428526 | E8 85F0FDFF | call <console1.sub_4075B0> | 000000000042852B | E8 60D4FDFF | call <console1.sub_405990> | 0000000000428530 | B8 09000000 | mov eax,9 | eax:EntryPoint, 9:'\t' 0000000000428535 | 8905 3D420100 | mov dword ptr ds:[43C778],eax | eax:EntryPoint 000000000042853B | 8B05 37420100 | mov eax,dword ptr ds:[43C778] | eax:EntryPoint 0000000000428541 | 8945 28 | mov dword ptr ss:[rbp+28],eax | eax:EntryPoint 0000000000428544 | C645 30 00 | mov byte ptr ss:[rbp+30],0 | 0000000000428548 | 48:8D4D 38 | lea rcx,qword ptr ss:[rbp+38] | 000000000042854C | 48:8D15 89010000 | lea rdx,qword ptr ds:[<sub_4286DC>] | rdx:EntryPoint, 00000000004286DC:L"max=%d" 0000000000428553 | 4C:8D45 28 | lea r8,qword ptr ss:[rbp+28] | 0000000000428557 | 4D:33C9 | xor r9,r9 | r9:EntryPoint 000000000042855A | E8 C16AFFFF | call <console1.sub_41F020> | 000000000042855F | 48:8B0D 0A880000 | mov rcx,qword ptr ds:[430D70] | 0000000000428566 | 48:8B55 38 | mov rdx,qword ptr ss:[rbp+38] | rdx:EntryPoint 000000000042856A | E8 E1EEFDFF | call <console1.sub_407450> | 000000000042856F | 48:89C1 | mov rcx,rax | rax:EntryPoint 0000000000428572 | E8 39F0FDFF | call <console1.sub_4075B0> | 0000000000428577 | E8 14D4FDFF | call <console1.sub_405990> | 000000000042857C | 8B05 72860000 | mov eax,dword ptr ds:[430BF4] | eax:EntryPoint 0000000000428582 | 3B05 70860000 | cmp eax,dword ptr ds:[430BF8] | eax:EntryPoint 0000000000428588 | 7E 08 | jle console1.428592 | 000000000042858A | 8B05 64860000 | mov eax,dword ptr ds:[430BF4] | eax:EntryPoint 0000000000428590 | EB 06 | jmp console1.428598 | 0000000000428592 | 8B05 60860000 | mov eax,dword ptr ds:[430BF8] | eax:EntryPoint 0000000000428598 | 8905 DA410100 | mov dword ptr ds:[43C778],eax | eax:EntryPoint 000000000042859E | 8B05 D4410100 | mov eax,dword ptr ds:[43C778] | eax:EntryPoint 00000000004285A4 | 8945 28 | mov dword ptr ss:[rbp+28],eax | eax:EntryPoint 00000000004285A7 | C645 30 00 | mov byte ptr ss:[rbp+30],0 | 00000000004285AB | 48:8D4D 20 | lea rcx,qword ptr ss:[rbp+20] | 00000000004285AF | 48:8D15 26010000 | lea rdx,qword ptr ds:[<sub_4286DC>] | rdx:EntryPoint, 00000000004286DC:L"max=%d" 00000000004285B6 | 4C:8D45 28 | lea r8,qword ptr ss:[rbp+28] | 00000000004285BA | 4D:33C9 | xor r9,r9 | r9:EntryPoint 00000000004285BD | E8 5E6AFFFF | call <console1.sub_41F020> | 00000000004285C2 | 48:8B0D A7870000 | mov rcx,qword ptr ds:[430D70] | 00000000004285C9 | 48:8B55 20 | mov rdx,qword ptr ss:[rbp+20] | rdx:EntryPoint 00000000004285CD | E8 7EEEFDFF | call <console1.sub_407450> | 00000000004285D2 | 48:89C1 | mov rcx,rax | rax:EntryPoint 00000000004285D5 | E8 D6EFFDFF | call <console1.sub_4075B0> | 00000000004285DA | E8 B1D3FDFF | call <console1.sub_405990> | 00000000004285DF | 48:8B0D 8A870000 | mov rcx,qword ptr ds:[430D70] | 00000000004285E6 | 48:8D15 9F000000 | lea rdx,qword ptr ds:[42868C] | rdx:EntryPoint, 000000000042868C:L"--------------------------------" 00000000004285ED | E8 5EEEFDFF | call <console1.sub_407450> | 00000000004285F2 | 48:89C1 | mov rcx,rax | rax:EntryPoint 00000000004285F5 | E8 B6EFFDFF | call <console1.sub_4075B0> | 00000000004285FA | E8 91D3FDFF | call <console1.sub_405990> | 00000000004285FF | 48:8B0D 9A880000 | mov rcx,qword ptr ds:[430EA0] | 0000000000428606 | E8 45E6FDFF | call <console1.sub_406C50> | 000000000042860B | E8 80D3FDFF | call <console1.sub_405990> | 0000000000428610 | 90 | nop | You can see that the compiler was able to evaluate the first expression "i := Max(5, 9);" and just generated one single instruction "mov eax, 9". For the second call "i := Max(a, b);" the compiler didn't make any evaluation and it just inlined the function. This is really impressive ! the first call was completely evaluated at the compiling time. I also tested the above example with MSVC and the result was quite different (I was expecting that before running my debugger): #include "pch.h" #include <iostream> inline int max(int a, int b) { return a > b ? a : b; } constexpr int max2(int a, int b) { return a > b ? a : b; } int a = 5; int b = 9; int i; int main() { printf("--------------------------------\n"); i = max(5, 9); // evaluated printf("max=%d\n", i); i = max(a, b); // evaluated printf("max=%d\n", i); i = max2(5, 9); // evaluated printf("max=%d\n", i); i = max2(a, b); // evaluated printf("max=%d\n", i); printf("--------------------------------\n"); } //------------------------------------------------------ CPP ------------------------------------------------------------ 00007FF7DB431070 | 48:83EC 28 | sub rsp,28 | consoleapplication4.cpp:19 00007FF7DB431074 | 48:8D0D 85110000 | lea rcx,qword ptr ds:[7FF7DB432200] | consoleapplication4.cpp:20, 00007FF7DB432200:"--------------------------------\n" 00007FF7DB43107B | E8 90FFFFFF | call <consoleapplication4.printf> | 00007FF7DB431080 | BA 09000000 | mov edx,9 | consoleapplication4.cpp:24, 9:'\t' 00007FF7DB431085 | C705 91250000 09000000 | mov dword ptr ds:[<i>],9 | 9:'\t' 00007FF7DB43108F | 48:8D0D 92110000 | lea rcx,qword ptr ds:[7FF7DB432228] | 00007FF7DB432228:"max=%d\n" 00007FF7DB431096 | E8 75FFFFFF | call <consoleapplication4.printf> | 00007FF7DB43109B | BA 09000000 | mov edx,9 | consoleapplication4.cpp:26, 9:'\t' 00007FF7DB4310A0 | C705 76250000 09000000 | mov dword ptr ds:[<i>],9 | 9:'\t' 00007FF7DB4310AA | 48:8D0D 77110000 | lea rcx,qword ptr ds:[7FF7DB432228] | 00007FF7DB432228:"max=%d\n" 00007FF7DB4310B1 | E8 5AFFFFFF | call <consoleapplication4.printf> | 00007FF7DB4310B6 | BA 09000000 | mov edx,9 | consoleapplication4.cpp:28, 9:'\t' 00007FF7DB4310BB | C705 5B250000 09000000 | mov dword ptr ds:[<i>],9 | 9:'\t' 00007FF7DB4310C5 | 48:8D0D 5C110000 | lea rcx,qword ptr ds:[7FF7DB432228] | 00007FF7DB432228:"max=%d\n" 00007FF7DB4310CC | E8 3FFFFFFF | call <consoleapplication4.printf> | 00007FF7DB4310D1 | BA 09000000 | mov edx,9 | consoleapplication4.cpp:30, 9:'\t' 00007FF7DB4310D6 | C705 40250000 09000000 | mov dword ptr ds:[<i>],9 | 9:'\t' 00007FF7DB4310E0 | 48:8D0D 41110000 | lea rcx,qword ptr ds:[7FF7DB432228] | 00007FF7DB432228:"max=%d\n" 00007FF7DB4310E7 | E8 24FFFFFF | call <consoleapplication4.printf> | 00007FF7DB4310EC | 48:8D0D 0D110000 | lea rcx,qword ptr ds:[7FF7DB432200] | consoleapplication4.cpp:31, 00007FF7DB432200:"--------------------------------\n" 00007FF7DB4310F3 | E8 18FFFFFF | call <consoleapplication4.printf> | 00007FF7DB4310F8 | 33C0 | xor eax,eax | consoleapplication4.cpp:32 00007FF7DB4310FA | 48:83C4 28 | add rsp,28 | 00007FF7DB4310FE | C3 | ret | Note that MSVC also generated a very short routine compared to what Delphi generated! Edited March 3, 2019 by Mahdi Safsafi Share this post Link to post
David Heffernan 2347 Posted March 3, 2019 C++ compilers are generally far better at optimisation than any Delphi compilers Share this post Link to post
Mahdi Safsafi 225 Posted March 3, 2019 10 minutes ago, David Heffernan said: C++ compilers are generally far better at optimisation than any Delphi compilers of course ! But you know : the MSVC's output didn't impressed me since I was expecting to get such result(especially with the constexpr specifier). However, Delphi did ! I never thought that it can do function evaluation on the fly. Share this post Link to post
David Heffernan 2347 Posted March 4, 2019 Somewhat ironic that the output that performs worse is the one that you found impressive. 1 Share this post Link to post
Remy Lebeau 1405 Posted March 5, 2019 (edited) On 3/3/2019 at 1:55 PM, Mahdi Safsafi said: I never thought that it can do function evaluation on the fly. Why does that surprise you? ANY compiler worth its salt will perform optimizations, especially when dealing with inline functions, and would be smart enough to recognize when basic arithmetic operations are being performed on compile-time literals and just output code for the result of those operations at compile-time instead of generating code to perform those operations at runtime. "5 > 9" is ALWAYS false, so the compiler can just output False instead of outputting code to compare 5 to 9. And "if False Then" will ALWAYS skip the IF block and jump to the ELSE block. so the compiler can ignore any of the code in the IF block. As such, your call to Max(5, 9) gets optimized to simply "Result := 9" when inlined, and so the compiler can simply replace the entire call to Max(5, 9) with just the literal 9. Edited March 5, 2019 by Remy Lebeau Share this post Link to post