Difference between revisions of "Inline Specifier"
Proj user8 (talk | contribs) (→Inline Specifier With a Recursive Function) |
Proj user8 (talk | contribs) (→Basic Example of the inline Specifier) |
||
Line 23: | Line 23: | ||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> | ||
− | The following | + | The following was created by Linux g++ with an optimization of '''-O1'''. |
<syntaxhighlight lang="NASM"> | <syntaxhighlight lang="NASM"> | ||
00000000 <print_NON_inline()>: | 00000000 <print_NON_inline()>: |
Revision as of 01:56, 15 December 2016
inline is a specifier for a function in C/C++ that tells the compiler to replace the function call in assembly with the contents of the function. It is most often used when tying to speed up a program as it takes out the overhead of a function call. it is particularly useful with small functions that are called often. However, the specifier will also increase the size of your program as it takes away the benefit of re-usability at the assembly level.
Basic Example of the inline Specifier
In the following example there are two functions that do the same thing. Except one uses the inline specifier and the other does not. When compiled using compiler optimizations, the function with the inline specifier will not have a function call. Rather, the functions contents will be put directly in the main function of the assembly output. Note: If compiled without any optimizations -O0, the inline specifier will be ignored.
#include "stdio.h"
void print_NON_inline()
{
printf("%d", 2);
}
inline void print_WITH_inline()
{
printf("%d", 3);
}
int main(void)
{
print_NON_inline();
print_WITH_inline();
return 1;
}
The following was created by Linux g++ with an optimization of -O1.
00000000 <print_NON_inline()>:
_Z16print_NON_inlinev():
0: 83 ec 10 sub $0x10,%esp
3: 6a 02 push $0x2
5: 68 00 00 00 00 push $0x0 6: R_386_32 .rodata.str1.1
a: 6a 01 push $0x1
c: e8 fc ff ff ff call d <print_NON_inline()+0xd> d: R_386_PC32 __printf_chk
11: 83 c4 1c add $0x1c,%esp
14: c3 ret
00000015 <main>:
main():
15: 8d 4c 24 04 lea 0x4(%esp),%ecx
19: 83 e4 f0 and $0xfffffff0,%esp
1c: ff 71 fc pushl -0x4(%ecx)
1f: 55 push %ebp
20: 89 e5 mov %esp,%ebp
22: 51 push %ecx
23: 83 ec 04 sub $0x4,%esp
26: e8 fc ff ff ff call 27 <main+0x12> 27: R_386_PC32 print_NON_inline()
2b: 83 ec 04 sub $0x4,%esp
2e: 6a 03 push $0x3
30: 68 00 00 00 00 push $0x0 31: R_386_32 .rodata.str1.1
35: 6a 01 push $0x1
37: e8 fc ff ff ff call 38 <main+0x23> 38: R_386_PC32 __printf_chk
3c: 83 c4 10 add $0x10,%esp
3f: b8 01 00 00 00 mov $0x1,%eax
44: 8b 4d fc mov -0x4(%ebp),%ecx
47: c9 leave
48: 8d 61 fc lea -0x4(%ecx),%esp
4b: c3 ret
Inline Specifier With a Recursive Function
For recursive functions, the inline specifier will attempt to take out as many recursive calls as possible but it cannot get rid of all of them. In the below example, there are two recursive factorial functions, one without the inline specifier and one with. The implementation without the inline specifier, the compiler simply creates a recursive assembly function call. The implementation with the inline specifier, the compiler optimized to use as many registers as it can to more quickly run the factorial calculations. Instead of recursively calling itself each time there is a factorial call, it computes five factorials using four registers and then if needed will call itself again. The following was created by Linux g++ with an optimization of -O1.
#include "stdio.h"
int factorial_WITHOUT(int a)
{
if(a < 2){
return 1;
}
else
{
return a * factorial_WITHOUT(a-1);
}
}
inline int factorial_WITH(int a)
{
if(a < 2){
return 1;
}
else
{
return a * factorial_WITH(a-1);
}
}
int main(void)
{
printf("%d\n", factorial_WITHOUT(7));
printf("%d\n", factorial_WITH(7));
return 1;
}
Disassembly of section .text:
00000000 <factorial_WITHOUT(int)>:
_Z17factorial_WITHOUTi():
0: 53 push %ebx
1: 83 ec 08 sub $0x8,%esp
4: 8b 5c 24 10 mov 0x10(%esp),%ebx
8: b8 01 00 00 00 mov $0x1,%eax
d: 83 fb 01 cmp $0x1,%ebx
10: 7e 12 jle 24 <factorial_WITHOUT(int)+0x24>
12: 83 ec 0c sub $0xc,%esp
15: 8d 43 ff lea -0x1(%ebx),%eax
18: 50 push %eax
19: e8 fc ff ff ff call 1a <factorial_WITHOUT(int)+0x1a> 1a: R_386_PC32 factorial_WITHOUT(int)
1e: 83 c4 10 add $0x10,%esp
21: 0f af c3 imul %ebx,%eax
24: 83 c4 08 add $0x8,%esp
27: 5b pop %ebx
28: c3 ret
00000029 <main>:
main():
29: 8d 4c 24 04 lea 0x4(%esp),%ecx
2d: 83 e4 f0 and $0xfffffff0,%esp
30: ff 71 fc pushl -0x4(%ecx)
33: 55 push %ebp
34: 89 e5 mov %esp,%ebp
36: 51 push %ecx
37: 83 ec 10 sub $0x10,%esp
3a: 6a 07 push $0x7
3c: e8 fc ff ff ff call 3d <main+0x14> 3d: R_386_PC32 factorial_WITHOUT(int)
41: 83 c4 0c add $0xc,%esp
44: 50 push %eax
45: 68 00 00 00 00 push $0x0 46: R_386_32 .rodata.str1.1
4a: 6a 01 push $0x1
4c: e8 fc ff ff ff call 4d <main+0x24> 4d: R_386_PC32 __printf_chk
51: c7 04 24 05 00 00 00 movl $0x5,(%esp)
58: e8 fc ff ff ff call 59 <main+0x30> 59: R_386_PC32 factorial_WITH(int)
5d: 83 c4 0c add $0xc,%esp
60: 8d 04 40 lea (%eax,%eax,2),%eax
63: 6b c0 0e imul $0xe,%eax,%eax
66: 50 push %eax
67: 68 00 00 00 00 push $0x0 68: R_386_32 .rodata.str1.1
6c: 6a 01 push $0x1
6e: e8 fc ff ff ff call 6f <main+0x46> 6f: R_386_PC32 __printf_chk
73: 83 c4 10 add $0x10,%esp
76: b8 01 00 00 00 mov $0x1,%eax
7b: 8b 4d fc mov -0x4(%ebp),%ecx
7e: c9 leave
7f: 8d 61 fc lea -0x4(%ecx),%esp
82: c3 ret
Disassembly of section .text._Z14factorial_WITHi:
00000000 <factorial_WITH(int)>:
_Z14factorial_WITHi():
0: 55 push %ebp
1: 57 push %edi
2: 56 push %esi
3: 53 push %ebx
4: 83 ec 1c sub $0x1c,%esp
7: 8b 5c 24 30 mov 0x30(%esp),%ebx
b: b8 01 00 00 00 mov $0x1,%eax
10: 83 fb 01 cmp $0x1,%ebx
13: 7e 4b jle 60 <factorial_WITH(int)+0x60>
15: 8d 73 ff lea -0x1(%ebx),%esi
18: 83 fe 01 cmp $0x1,%esi
1b: 7e 40 jle 5d <factorial_WITH(int)+0x5d>
1d: 8d 7b fe lea -0x2(%ebx),%edi
20: 83 ff 01 cmp $0x1,%edi
23: 7e 35 jle 5a <factorial_WITH(int)+0x5a>
25: 8d 6b fd lea -0x3(%ebx),%ebp
28: 83 fd 01 cmp $0x1,%ebp
2b: 7e 2a jle 57 <factorial_WITH(int)+0x57>
2d: 8d 43 fc lea -0x4(%ebx),%eax
30: 89 c2 mov %eax,%edx
32: b8 01 00 00 00 mov $0x1,%eax
37: 89 54 24 0c mov %edx,0xc(%esp)
3b: 83 fa 01 cmp $0x1,%edx
3e: 7e 14 jle 54 <factorial_WITH(int)+0x54>
40: 83 ec 0c sub $0xc,%esp
43: 8d 43 fb lea -0x5(%ebx),%eax
46: 50 push %eax
47: e8 fc ff ff ff call 48 <factorial_WITH(int)+0x48> 48: R_386_PC32 factorial_WITH(int)
4c: 83 c4 10 add $0x10,%esp
4f: 0f af 44 24 0c imul 0xc(%esp),%eax
54: 0f af c5 imul %ebp,%eax
57: 0f af c7 imul %edi,%eax
5a: 0f af c6 imul %esi,%eax
5d: 0f af c3 imul %ebx,%eax
60: 83 c4 1c add $0x1c,%esp
63: 5b pop %ebx
64: 5e pop %esi
65: 5f pop %edi
66: 5d pop %ebp
67: c3 ret