有没有让 GCC 优化掉冗余指令的技巧?

Chr*_* BB 3 c gcc arm

编译gcc -mcpu=cortex-m0 -mthumb -Os 会发出冗余指令,如本说明性示例所示:

void memzero(void* p, int n)
{
    n -= 4;
    do
    {
        *(int*)((char*)p + n) = 0;
        n -= 4;
    }
    while(n > 0);
}
Run Code Online (Sandbox Code Playgroud)

结果是:

memzero:
  movs r3, #0
  subs r1, r1, #4
.L2:
  str r3, [r0, r1]
  subs r1, r1, #4
  cmp r1, #0
  bgt .L2
  bx lr
Run Code Online (Sandbox Code Playgroud)

显然,显式比较本质上是一个 nop。有没有办法打开更多优化来解决这个问题?

kri*_*isz 5

删除比较会改变函数的行为。

BGT指令跳转如果Z == 0 and N == V。这在n溢出时很重要。

考虑使用n = -2147483644(如果int是 32 位)调用函数:

memzero:
        movs    r3, #0
        subs    r1, r1, #4    ; n = -2147483648
.L2:
        str     r3, [r0, r1]
        subs    r1, r1, #4    ; n = 2147483644, Z = 0, N = 0, V = 1
        ;cmp     r1, #0       ; (would set Z = 0, N = 0, V = 0)
        bgt     .L2           ; doesn't jump, even though n is positive
        bx      lr
Run Code Online (Sandbox Code Playgroud)

如果我们进行测试,优化会起作用,n >= 0因为有一条指令会在以下情况下跳转N == 0

memzero:
        movs    r3, #0
        subs    r1, r1, #4
.L2:
        str     r3, [r0, r1]
        subs    r1, r1, #4
        bpl     .L2
        bx      lr
Run Code Online (Sandbox Code Playgroud)

测试程序

#include <stdio.h>
#include <limits.h>

__attribute__((noinline)) int with_cmp(int n) {
    asm("L1:\n\t"
        "subs    %[n], #4\n\t"
        "cmp     %[n], #0\n\t"
        "bgt     L1"
        : [n] "+r" (n));
    return n;
}

__attribute__((noinline)) int without_cmp(int n) {
    asm("L2:\n\t"
        "subs    %[n], #4\n\t"
        "bgt     L2"
        : [n] "+r" (n));
    return n;
}

int main() {
    printf("with cmp: %d\nwithout cmp: %d\n", with_cmp(INT_MIN), without_cmp(INT_MIN));
}
Run Code Online (Sandbox Code Playgroud)

输出:

with cmp: 0              // loops as long as n > 0
without cmp: 2147483644  // immediately returns with positive n
Run Code Online (Sandbox Code Playgroud)