Inline Specifier

From Embedded Systems Learning Academy
Jump to: navigation, search

inline is a specifier for a function in C/C++ that tells a compiler to replace a function call in assembly with the contents of that function definition. It is most often used when trying to speed up a program as it takes out the overhead of a function call. it is particularly useful with small functions that are called often. However, the specifier will also increase the size of your program as it takes away the benefit of re-usability at the assembly level.

Basic Example of the Inline Specifier

In the following example, there are two functions that do the same thing. Except one uses the inline specifier and the other does not. When compiled using compiler optimizations, the function with the inline specifier will not have a function call. Rather, the contents of the function will be put directly in the main function of the assembly output. Note: If compiled without any optimizations -O0, the inline specifier will be ignored.

#include "stdio.h"

void print_NON_inline()
{
	printf("%d", 2);
}

inline void print_WITH_inline()
{
	printf("%d", 3);
}

int main(void)
{
	print_NON_inline();
	print_WITH_inline(); 
	return 1;
}

The following was created by Linux g++ with an optimization of -O1.

00000000 <print_NON_inline()>:
_Z16print_NON_inlinev():
   0:	83 ec 10             	sub    $0x10,%esp
   3:	6a 02                	push   $0x2
   5:	68 00 00 00 00       	push   $0x0	6: R_386_32	.rodata.str1.1
   a:	6a 01                	push   $0x1
   c:	e8 fc ff ff ff       	call   d <print_NON_inline()+0xd>	d: R_386_PC32	__printf_chk
  11:	83 c4 1c             	add    $0x1c,%esp
  14:	c3                   	ret    

00000015 <main>:
main():
  15:	8d 4c 24 04          	lea    0x4(%esp),%ecx
  19:	83 e4 f0             	and    $0xfffffff0,%esp
  1c:	ff 71 fc             	pushl  -0x4(%ecx)
  1f:	55                   	push   %ebp
  20:	89 e5                	mov    %esp,%ebp
  22:	51                   	push   %ecx
  23:	83 ec 04             	sub    $0x4,%esp
  26:	e8 fc ff ff ff       	call   27 <main+0x12>	27: R_386_PC32	print_NON_inline()
  2b:	83 ec 04             	sub    $0x4,%esp
  2e:	6a 03                	push   $0x3
  30:	68 00 00 00 00       	push   $0x0	31: R_386_32	.rodata.str1.1
  35:	6a 01                	push   $0x1
  37:	e8 fc ff ff ff       	call   38 <main+0x23>	38: R_386_PC32	__printf_chk
  3c:	83 c4 10             	add    $0x10,%esp
  3f:	b8 01 00 00 00       	mov    $0x1,%eax
  44:	8b 4d fc             	mov    -0x4(%ebp),%ecx
  47:	c9                   	leave  
  48:	8d 61 fc             	lea    -0x4(%ecx),%esp
  4b:	c3                   	ret

Inline Specifier With a Recursive Function

For recursive functions, the inline specifier will attempt to take out as many recursive calls as possible but it cannot get rid of all of them. In the below example, there are two recursive factorial functions, one without the inline specifier and one with. The implementation without the inline specifier, the compiler simply creates a recursive assembly function call. The implementation with the inline specifier, the compiler optimizes to use as many registers as it can to more quickly run the factorial calculations. It computes five factorials using four registers and then if needed will call itself again. Note: This is only an example. Different compilers will optimize in different ways and the optimization will vary depending on how aggressive the optimization is. The following was created by Linux g++ with an optimization of -O1.

#include "stdio.h"

int factorial_WITHOUT(int a)
{
    if(a < 2){
        return 1;
    }
    else
    {
        return a * factorial_WITHOUT(a-1);  
    }
}

inline int factorial_WITH(int a)
{
    if(a < 2){
        return 1;
    }
    else
    {
        return a * factorial_WITH(a-1);
    }
}
int main(void)
{
    printf("%d\n", factorial_WITHOUT(7));
    printf("%d\n", factorial_WITH(7));
    return 1;
}
Disassembly of section .text:

00000000 <factorial_WITHOUT(int)>:
_Z17factorial_WITHOUTi():
   0:   53                      push   %ebx
   1:   83 ec 08                sub    $0x8,%esp
   4:   8b 5c 24 10             mov    0x10(%esp),%ebx
   8:   b8 01 00 00 00          mov    $0x1,%eax
   d:   83 fb 01                cmp    $0x1,%ebx
  10:   7e 12                   jle    24 <factorial_WITHOUT(int)+0x24>
  12:   83 ec 0c                sub    $0xc,%esp
  15:   8d 43 ff                lea    -0x1(%ebx),%eax
  18:   50                      push   %eax
  19:   e8 fc ff ff ff          call   1a <factorial_WITHOUT(int)+0x1a> 1a: R_386_PC32  factorial_WITHOUT(int)
  1e:   83 c4 10                add    $0x10,%esp
  21:   0f af c3                imul   %ebx,%eax
  24:   83 c4 08                add    $0x8,%esp
  27:   5b                      pop    %ebx
  28:   c3                      ret    

00000029 <main>:
main():
  29:   8d 4c 24 04             lea    0x4(%esp),%ecx
  2d:   83 e4 f0                and    $0xfffffff0,%esp
  30:   ff 71 fc                pushl  -0x4(%ecx)
  33:   55                      push   %ebp
  34:   89 e5                   mov    %esp,%ebp
  36:   51                      push   %ecx
  37:   83 ec 10                sub    $0x10,%esp
  3a:   6a 07                   push   $0x7
  3c:   e8 fc ff ff ff          call   3d <main+0x14>   3d: R_386_PC32  factorial_WITHOUT(int)
  41:   83 c4 0c                add    $0xc,%esp
  44:   50                      push   %eax
  45:   68 00 00 00 00          push   $0x0 46: R_386_32    .rodata.str1.1
  4a:   6a 01                   push   $0x1
  4c:   e8 fc ff ff ff          call   4d <main+0x24>   4d: R_386_PC32  __printf_chk
  51:   c7 04 24 05 00 00 00    movl   $0x5,(%esp)
  58:   e8 fc ff ff ff          call   59 <main+0x30>   59: R_386_PC32  factorial_WITH(int)
  5d:   83 c4 0c                add    $0xc,%esp
  60:   8d 04 40                lea    (%eax,%eax,2),%eax
  63:   6b c0 0e                imul   $0xe,%eax,%eax
  66:   50                      push   %eax
  67:   68 00 00 00 00          push   $0x0 68: R_386_32    .rodata.str1.1
  6c:   6a 01                   push   $0x1
  6e:   e8 fc ff ff ff          call   6f <main+0x46>   6f: R_386_PC32  __printf_chk
  73:   83 c4 10                add    $0x10,%esp
  76:   b8 01 00 00 00          mov    $0x1,%eax
  7b:   8b 4d fc                mov    -0x4(%ebp),%ecx
  7e:   c9                      leave  
  7f:   8d 61 fc                lea    -0x4(%ecx),%esp
  82:   c3                      ret    

Disassembly of section .text._Z14factorial_WITHi:

00000000 <factorial_WITH(int)>:
_Z14factorial_WITHi():
   0:   55                      push   %ebp
   1:   57                      push   %edi
   2:   56                      push   %esi
   3:   53                      push   %ebx
   4:   83 ec 1c                sub    $0x1c,%esp
   7:   8b 5c 24 30             mov    0x30(%esp),%ebx
   b:   b8 01 00 00 00          mov    $0x1,%eax
  10:   83 fb 01                cmp    $0x1,%ebx
  13:   7e 4b                   jle    60 <factorial_WITH(int)+0x60>
  15:   8d 73 ff                lea    -0x1(%ebx),%esi
  18:   83 fe 01                cmp    $0x1,%esi
  1b:   7e 40                   jle    5d <factorial_WITH(int)+0x5d>
  1d:   8d 7b fe                lea    -0x2(%ebx),%edi
  20:   83 ff 01                cmp    $0x1,%edi
  23:   7e 35                   jle    5a <factorial_WITH(int)+0x5a>
  25:   8d 6b fd                lea    -0x3(%ebx),%ebp
  28:   83 fd 01                cmp    $0x1,%ebp
  2b:   7e 2a                   jle    57 <factorial_WITH(int)+0x57>
  2d:   8d 43 fc                lea    -0x4(%ebx),%eax
  30:   89 c2                   mov    %eax,%edx
  32:   b8 01 00 00 00          mov    $0x1,%eax
  37:   89 54 24 0c             mov    %edx,0xc(%esp)
  3b:   83 fa 01                cmp    $0x1,%edx
  3e:   7e 14                   jle    54 <factorial_WITH(int)+0x54>
  40:   83 ec 0c                sub    $0xc,%esp
  43:   8d 43 fb                lea    -0x5(%ebx),%eax
  46:   50                      push   %eax
  47:   e8 fc ff ff ff          call   48 <factorial_WITH(int)+0x48>    48: R_386_PC32  factorial_WITH(int)
  4c:   83 c4 10                add    $0x10,%esp
  4f:   0f af 44 24 0c          imul   0xc(%esp),%eax
  54:   0f af c5                imul   %ebp,%eax
  57:   0f af c7                imul   %edi,%eax
  5a:   0f af c6                imul   %esi,%eax
  5d:   0f af c3                imul   %ebx,%eax
  60:   83 c4 1c                add    $0x1c,%esp
  63:   5b                      pop    %ebx
  64:   5e                      pop    %esi
  65:   5f                      pop    %edi
  66:   5d                      pop    %ebp
  67:   c3                      ret