G ++足够聪明,可以优化它吗?

Lin*_*ios 2 c++ optimization performance g++

想象一下以下情况:

struct Args
{
  bool arg;
};
void thing(Args arg)
{
  if(arg.arg)
    cout<<"arg.arg is true\n";
  else
    cout<<"arg.arg is false\n";
}
int main()
{
  Args a;
  a.arg=false;
  thing(a);
}
Run Code Online (Sandbox Code Playgroud)

编译器是否足够智能删除switch,if以及else在程序过程中显然永远不会被调用的分支?控制这些陈述的变量必须是const?最后,除了使用preproccesor之外,完全不使用变量是正确的(我对这段代码的想法很害怕)?

只是为了澄清,真实情况是我正在编写一个类,程序员可以选择是否启用某个功能.禁用该功能可以在服务器上节省大量处理时间,使类和服务器之间有一些带宽.我试图弄清楚是否应该使用变量作为构造函数参数,前处理程序派生或其他解决方案.如果禁用该功能,如果启用该功能,我甚至不想考虑逻辑分支.我知道使用preproccessor解决方案会做到这一点,但我想避免大量使用#ifdef,#elseif我希望能够重用一个编译的共享对象.对程序员开放的源是没有问题的,因为这将是开源的.

编辑:我测试了波纹管编译线并查看了组件.我无法完全理解它,但我确实看到了一个跳转指令(jne).这是装配,如果有人可以做到:

    .file   "blah.cpp"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "arg.arg is true\n"
.LC1:
    .string "arg.arg is false\n"
    .text
    .p2align 4,,15
.globl _Z5thing4Args
    .type   _Z5thing4Args, @function
_Z5thing4Args:
.LFB1003:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    movl    %esp, %ebp
    .cfi_offset 5, -8
    .cfi_def_cfa_register 5
    subl    $24, %esp
    cmpb    $0, 8(%ebp)
    jne .L5
    movl    $17, 8(%esp)
    movl    $.LC1, 4(%esp)
    movl    $_ZSt4cout, (%esp)
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    leave
    .cfi_remember_state
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .p2align 4,,7
    .p2align 3
.L5:
    .cfi_restore_state
    movl    $16, 8(%esp)
    movl    $.LC0, 4(%esp)
    movl    $_ZSt4cout, (%esp)
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    leave
    .cfi_def_cfa 4, 4
    .cfi_restore 5
    ret
    .cfi_endproc
.LFE1003:
    .size   _Z5thing4Args, .-_Z5thing4Args
    .p2align 4,,15
.globl main
    .type   main, @function
main:
.LFB1004:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    movl    %esp, %ebp
    .cfi_offset 5, -8
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    subl    $16, %esp
    movl    $17, 8(%esp)
    movl    $.LC1, 4(%esp)
    movl    $_ZSt4cout, (%esp)
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE1004:
    .size   main, .-main
    .p2align 4,,15
    .type   _GLOBAL__I__Z5thing4Args, @function
_GLOBAL__I__Z5thing4Args:
.LFB1009:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    movl    %esp, %ebp
    .cfi_offset 5, -8
    .cfi_def_cfa_register 5
    subl    $24, %esp
    movl    $_ZStL8__ioinit, (%esp)
    call    _ZNSt8ios_base4InitC1Ev
    movl    $__dso_handle, 8(%esp)
    movl    $_ZStL8__ioinit, 4(%esp)
    movl    $_ZNSt8ios_base4InitD1Ev, (%esp)
    call    __cxa_atexit
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE1009:
    .size   _GLOBAL__I__Z5thing4Args, .-_GLOBAL__I__Z5thing4Args
    .section    .ctors,"aw",@progbits
    .align 4
    .long   _GLOBAL__I__Z5thing4Args
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .weakref    _ZL20__gthrw_pthread_oncePiPFvvE,pthread_once
    .weakref    _ZL27__gthrw_pthread_getspecificj,pthread_getspecific
    .weakref    _ZL27__gthrw_pthread_setspecificjPKv,pthread_setspecific
    .weakref    _ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_,pthread_create
    .weakref    _ZL20__gthrw_pthread_joinmPPv,pthread_join
    .weakref    _ZL21__gthrw_pthread_equalmm,pthread_equal
    .weakref    _ZL20__gthrw_pthread_selfv,pthread_self
    .weakref    _ZL22__gthrw_pthread_detachm,pthread_detach
    .weakref    _ZL22__gthrw_pthread_cancelm,pthread_cancel
    .weakref    _ZL19__gthrw_sched_yieldv,sched_yield
    .weakref    _ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t,pthread_mutex_lock
    .weakref    _ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t,pthread_mutex_trylock
    .weakref    _ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec,pthread_mutex_timedlock
    .weakref    _ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t,pthread_mutex_unlock
    .weakref    _ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t,pthread_mutex_init
    .weakref    _ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t,pthread_mutex_destroy
    .weakref    _ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t,pthread_cond_broadcast
    .weakref    _ZL27__gthrw_pthread_cond_signalP14pthread_cond_t,pthread_cond_signal
    .weakref    _ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t,pthread_cond_wait
    .weakref    _ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec,pthread_cond_timedwait
    .weakref    _ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t,pthread_cond_destroy
    .weakref    _ZL26__gthrw_pthread_key_createPjPFvPvE,pthread_key_create
    .weakref    _ZL26__gthrw_pthread_key_deletej,pthread_key_delete
    .weakref    _ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t,pthread_mutexattr_init
    .weakref    _ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti,pthread_mutexattr_settype
    .weakref    _ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t,pthread_mutexattr_destroy
    .ident  "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2"
    .section    .note.GNU-stack,"",@progbits
Run Code Online (Sandbox Code Playgroud)

编辑:我asm("#aksdjfh")按照建议添加后更多地查看了程序集,我发现编译器没有摆脱它.这是#ifdef唯一的选择吗?或者jne指令是否可以有效地忽略性能?

R. *_*des 7

亲自尝试一下:

$ g++ -O3 -S test.cpp -o test.s
Run Code Online (Sandbox Code Playgroud)

-O3打开优化,-S告诉编译器在生成汇编代码后停止,并-o选择放置输出的位置.然后,您可以检查"test.s"文件,看看它是否进行了优化.显然这需要一些装配知识.您可能也想要,-masm=intel如果您像我一样,发现AT&T语法不可读并且更喜欢Intel语法.

添加类似于asm("# this is something")代码的行可能会有所帮助.这些将在生成的程序集中显示为注释,这可以使您更容易识别您感兴趣的部分.

在我的机器上,GCC 4.8的快照似乎并没有优化死代码.我在每个分支中添加了其中一个asm注释来识别它们并生成了这个:

    .file   "test.cpp"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "arg.arg is true\n"
.LC1:
    .string "arg.arg is false\n"
    .text
    .p2align 4,,15
    .globl  _Z5thing4Args
    .type   _Z5thing4Args, @function
_Z5thing4Args:
.LFB1215:
    .cfi_startproc
    sub esp, 28
    .cfi_def_cfa_offset 32
    cmp BYTE PTR [esp+32], 0
    jne .L6
#APP
# 13 "test.cpp" 1
    This is the false branch
# 0 "" 2
#NO_APP
    mov DWORD PTR [esp+8], 17
    mov DWORD PTR [esp+4], OFFSET FLAT:.LC1
    mov DWORD PTR [esp], OFFSET FLAT:_ZSt4cout
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    add esp, 28
    .cfi_remember_state
    .cfi_def_cfa_offset 4
    ret
    .p2align 4,,7
    .p2align 3
.L6:
    .cfi_restore_state
#APP
# 10 "test.cpp" 1
    This is the true branch
# 0 "" 2
#NO_APP
    mov DWORD PTR [esp+8], 16
    mov DWORD PTR [esp+4], OFFSET FLAT:.LC0
    mov DWORD PTR [esp], OFFSET FLAT:_ZSt4cout
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
    add esp, 28
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
.LFE1215:
    .size   _Z5thing4Args, .-_Z5thing4Args
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB1216:
    .cfi_startproc
    push    ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    mov ebp, esp
    .cfi_def_cfa_register 5
    and esp, -16
    sub esp, 16
    mov BYTE PTR [esp], 0
    call    _Z5thing4Args
    xor eax, eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE1216:
    .size   main, .-main
    .p2align 4,,15
    .type   _GLOBAL__sub_I__Z5thing4Args, @function
_GLOBAL__sub_I__Z5thing4Args:
.LFB1367:
    .cfi_startproc
    sub esp, 28
    .cfi_def_cfa_offset 32
    mov DWORD PTR [esp], OFFSET FLAT:_ZStL8__ioinit
    call    _ZNSt8ios_base4InitC1Ev
    mov DWORD PTR [esp+8], OFFSET FLAT:__dso_handle
    mov DWORD PTR [esp+4], OFFSET FLAT:_ZStL8__ioinit
    mov DWORD PTR [esp], OFFSET FLAT:_ZNSt8ios_base4InitD1Ev
    call    __cxa_atexit
    add esp, 28
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
.LFE1367:
    .size   _GLOBAL__sub_I__Z5thing4Args, .-_GLOBAL__sub_I__Z5thing4Args
    .section    .init_array,"aw"
    .align 4
    .long   _GLOBAL__sub_I__Z5thing4Args
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .hidden __dso_handle
    .ident  "GCC: (GNU) 4.8.0 20120311 (experimental)"
    .section    .note.GNU-stack,"",@progbits
Run Code Online (Sandbox Code Playgroud)

如果您查找这些注释,您会发现它们都会调用某个std::cout成员函数.

发生这种情况的原因在于,该功能在其他翻译单元上可见:如果您现在nasty.cpp使用声明void thing(Args arg);和带有值的调用创建文件true,则代码必须存在.

所以我进一步尝试了一下.如果我将函数标记为static,意味着它是该转换单元的内部函数,GCC确实优化了死代码:

    .file   "test.cpp"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "arg.arg is false\n"
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB1216:
    .cfi_startproc
    push    ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    mov ebp, esp
    .cfi_def_cfa_register 5
    and esp, -16
    sub esp, 16
#APP
# 13 "test.cpp" 1
    This is the false branch
# 0 "" 2
#NO_APP
    mov DWORD PTR [esp+4], OFFSET FLAT:.LC0
    mov DWORD PTR [esp], OFFSET FLAT:_ZSt4cout
    call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
    xor eax, eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE1216:
    .size   main, .-main
    .p2align 4,,15
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1367:
    .cfi_startproc
    sub esp, 28
    .cfi_def_cfa_offset 32
    mov DWORD PTR [esp], OFFSET FLAT:_ZStL8__ioinit
    call    _ZNSt8ios_base4InitC1Ev
    mov DWORD PTR [esp+8], OFFSET FLAT:__dso_handle
    mov DWORD PTR [esp+4], OFFSET FLAT:_ZStL8__ioinit
    mov DWORD PTR [esp], OFFSET FLAT:_ZNSt8ios_base4InitD1Ev
    call    __cxa_atexit
    add esp, 28
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
.LFE1367:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .init_array,"aw"
    .align 4
    .long   _GLOBAL__sub_I_main
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .hidden __dso_handle
    .ident  "GCC: (GNU) 4.8.0 20120311 (experimental)"
    .section    .note.GNU-stack,"",@progbits
Run Code Online (Sandbox Code Playgroud)

你不会在该代码中找到"这是真正的分支".另外,请注意假分支如何移入main函数并且thing函数不再存在.GCC只是简单地编写了函数的代码并且没有费心生成它,因为它不会在我添加的任何其他地方使用static.

如果我将其标记为inline,它仍然可以在外面看到,但显然这足以让GCC对其进行优化.但是,如果执行此操作,则必须确保其他翻译单元看到相同的定义,以便可以根据需要为每个单元生成代码.