Tav*_*nes 10 c optimization gcc integer-overflow clang
这是一个将int加到另一个的 C 函数,如果发生溢出则失败:
int safe_add(int *value, int delta) {
if (*value >= 0) {
if (delta > INT_MAX - *value) {
return -1;
}
} else {
if (delta < INT_MIN - *value) {
return -1;
}
}
*value += delta;
return 0;
}
Run Code Online (Sandbox Code Playgroud)
safe_add(int*, int):
movl (%rdi), %eax
testl %eax, %eax
js .L2
movl $2147483647, %edx
subl %eax, %edx
cmpl %esi, %edx
jl .L6
.L4:
addl %esi, %eax
movl %eax, (%rdi)
xorl %eax, %eax
ret
.L2:
movl $-2147483648, %edx
subl %eax, %edx
cmpl %esi, %edx
jle .L4
.L6:
movl $-1, %eax
ret
Run Code Online (Sandbox Code Playgroud)
这个版本与 __builtin_add_overflow()
int safe_add(int *value, int delta) {
int result;
if (__builtin_add_overflow(*value, delta, &result)) {
return -1;
} else {
*value = result;
return 0;
}
}
Run Code Online (Sandbox Code Playgroud)
为更好地优化:
safe_add(int*, int):
xorl %eax, %eax
addl (%rdi), %esi
seto %al
jo .L5
movl %esi, (%rdi)
ret
.L5:
movl $-1, %eax
ret
Run Code Online (Sandbox Code Playgroud)
但我很好奇是否有一种不使用内置函数的方法可以通过 GCC 或 Clang 进行模式匹配。
我想出的最好的方法是,如果您无法访问架构的溢出标志,那么在unsigned. 想想这里的所有位算术,因为我们只对最高位感兴趣,当解释为有符号值时,它是符号位。
(所有模符号错误,我没有仔细检查,但我希望这个想法很清楚)
#include <stdbool.h>
bool overadd(int a[static 1], int b) {
unsigned A = a[0];
unsigned B = b;
// This computation will be done anyhow
unsigned AB = A + B;
// See if the sign bits are equal
unsigned AeB = ~(A^B);
unsigned AuAB = (A^AB);
// The function result according to these should be:
//
// AeB \ AuAB | false | true
//------------+-------+------
// false | false | false
// true | false | true
//
// So the expression to compute from the sign bits is (AeB & AuAB)
// This is INT_MAX
unsigned M = -1U/2;
bool ret = (AeB & AuAB) > M;
if (!ret) a[0] += b;
return ret;
}
Run Code Online (Sandbox Code Playgroud)
如果您找到一个没有 UB 的加法版本,例如原子版本,则汇编器甚至没有分支(但带有锁定前缀)
#include <stdbool.h>
#include <stdatomic.h>
bool overadd(_Atomic(int) a[static 1], int b) {
unsigned A = a[0];
atomic_fetch_add_explicit(a, b, memory_order_relaxed);
unsigned B = b;
// This computation will be done anyhow
unsigned AB = A + B;
// See if the sign bits are equal
unsigned AeB = ~(A^B);
unsigned AuAB = (A^AB);
// The function result according to these should be:
//
// AeB \ AuAB | false | true
//------------+-------+------
// false | false | false
// true | false | true
//
// So the expression to compute from the sign bits is (AeB & AuAB)
// This is INT_MAX
unsigned M = -1U/2;
bool ret = (AeB & AuAB) > M;
return ret;
}
Run Code Online (Sandbox Code Playgroud)
所以如果我们有这样的手术,但更“放松”,这可以进一步改善情况。
Take3:如果我们使用从未签名结果到签名结果的特殊“转换”,则现在是无分支的:
#include <stdbool.h>
#include <stdatomic.h>
bool overadd(int a[static 1], int b) {
unsigned A = a[0];
//atomic_fetch_add_explicit(a, b, memory_order_relaxed);
unsigned B = b;
// This computation will be done anyhow
unsigned AB = A + B;
// See if the sign bits are equal
unsigned AeB = ~(A^B);
unsigned AuAB = (A^AB);
// The function result according to these should be:
//
// AeB \ AuAB | false | true
//------------+-------+------
// false | false | false
// true | false | true
//
// So the expression to compute from the sign bits is (AeB & AuAB)
// This is INT_MAX
unsigned M = -1U/2;
unsigned res = (AeB & AuAB);
signed N = M-1;
N = -N - 1;
a[0] = ((AB > M) ? -(int)(-AB) : ((AB != M) ? (int)AB : N));
return res > M;
}
Run Code Online (Sandbox Code Playgroud)
我能想到的最好的版本是:
int safe_add(int *value, int delta) {
long long t = *value + (long long)delta;
if (t != ((int)t))
return -1;
*value = (int) t;
return 0;
}
Run Code Online (Sandbox Code Playgroud)
其产生:
safe_add(int*, int):
movslq %esi, %rax
movslq (%rdi), %rsi
addq %rax, %rsi
movslq %esi, %rax
cmpq %rsi, %rax
jne .L3
movl %eax, (%rdi)
xorl %eax, %eax
ret
.L3:
movl $-1, %eax
ret
Run Code Online (Sandbox Code Playgroud)