有符号位域的多重不一致行为

chq*_*lie 7 c gcc clang language-lawyer bit-fields

我在签名位域上遇到了一个奇怪的行为:

#include <stdio.h>

struct S {
    long long a31 : 31;
    long long a32 : 32;
    long long a33 : 33;
    long long : 0;
    unsigned long long b31 : 31;
    unsigned long long b32 : 32;
    unsigned long long b33 : 33;
};

long long f31(struct S *p) { return p->a31 + p->b31; }
long long f32(struct S *p) { return p->a32 + p->b32; }
long long f33(struct S *p) { return p->a33 + p->b33; }

int main() {
    struct S s = { -2, -2, -2, 1, 1, 1 };
    long long a32 = -2;
    unsigned long long b32 = 1;
    printf("f31(&s)       => %lld\n", f31(&s));
    printf("f32(&s)       => %lld\n", f32(&s));
    printf("f33(&s)       => %lld\n", f33(&s));
    printf("s.a31 + s.b31 => %lld\n", s.a31 + s.b31);
    printf("s.a32 + s.b32 => %lld\n", s.a32 + s.b32);
    printf("s.a33 + s.b33 => %lld\n", s.a33 + s.b33);
    printf("  a32 +   b32 => %lld\n",   a32 +   b32);
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

在 OS/X 上使用 Clang,我得到这个输出:

f31(&s) => -1
f32(&s) => 4294967295
f33(&s) => -1
s.a31 + s.b31 => 4294967295
s.a32 + s.b32 => 4294967295
s.a33 + s.b33 => -1
  a32 + b32 => -1

在 Linux 上使用 GCC,我得到了这个:

f31(&s) => -1
f32(&s) => 4294967295
f33(&s) => 8589934591
s.a31 + s.b31 => 4294967295
s.a32 + s.b32 => 4294967295
s.a33 + s.b33 => 8589934591
  a32 + b32 => -1

上面的输出显示了 3 种类型的不一致:

  • 不同编译器的不同行为;
  • 不同位域宽度的不同行为;
  • 内联表达式和封装在函数中的等效表达式的不同行为。

C 标准有这样的语言:

6.7.2 类型说明符

...

每个逗号分隔的多重集指定相同的类型,除了位域之外,它是实现定义的,说明符int指定与 相同的类型signed int还是与 相同的类型unsigned int

众所周知,位域在许多较旧的编译器中都被破坏了……
Clang 和 GCC 的行为是否符合要求,或者这些不一致是否是一个或多个错误的结果?

sid*_*der 0

请查看建议的代码,其工作正常且符合预期。

出于实际目的,我建议,只需确保

  • 添加了兼容类型,
  • 返回正确的类型并且
  • printf 语句中包含正确的类型。

就是这样。

有关更多信息,另请参阅参考文献[1] 和[2],如下。

#include <stdio.h>

struct S {
    long long a31 : 31;
    long long a32 : 32;
    long long a33 : 33;
    
    unsigned long long b31 : 31;
    unsigned long long b32 : 32;
    unsigned long long b33 : 33;
};

long long f31(struct S *p) { return ((long long)p->a31 + (long long)p->b31); }
long long f32(struct S *p) { return ((long long)p->a32 + (long long)p->b32); }
long long f33(struct S *p) { return ((long long)p->a33 + (long long)p->b33); }

int main() {
    struct S s = { -2, -2, -2, 1, 1, 1 };
    long long a32 = -2;
    unsigned long long b32 = 1;
    
    printf("p->a31       => %lld\n", (long long)(s.a31));
    printf("p->a32       => %lld\n", (long long)(s.a32));
    printf("p->a33       => %lld\n", (long long)(s.a33));
    
    printf("p->b31       => %lld\n", (long long)(s.b31));
    printf("p->b32       => %lld\n", (long long)(s.b32));
    printf("p->b33       => %lld\n", (long long)(s.b33));
    
    
    printf("f31(&s)       => %lld\n", (long long)(f31(&s)));
    printf("f32(&s)       => %lld\n", (long long)(f32(&s)));
    printf("f33(&s)       => %lld\n", (long long)(f33(&s)));
    printf("s.a31 + s.b31 => %lld\n", ((long long)s.a31 + (long long)s.b31));
    printf("s.a32 + s.b32 => %lld\n", ((long long)s.a32 + (long long)s.b32));
    printf("s.a33 + s.b33 => %lld\n", ((long long)s.a33 + (long long)s.b33));
    printf("  a32 +   b32 => %lld\n", (long long) (a32 +   b32));
    return 0;
}

Run Code Online (Sandbox Code Playgroud)
p->a31       => -2
p->a32       => -2
p->a33       => -2
p->b31       => 1
p->b32       => 1
p->b33       => 1
f31(&s)       => -1
f32(&s)       => -1
f33(&s)       => -1
s.a31 + s.b31 => -1
s.a32 + s.b32 => -1
s.a33 + s.b33 => -1
  a32 +   b32 => -1
Run Code Online (Sandbox Code Playgroud)

参考

[1] C 中的有符号到无符号转换 - 它总是安全的吗?

[2] https://www.geeksforgeeks.org/bit-fields-c/ “我们不能有指向位字段成员的指针,因为它们可能不从字节边界开始。”