Fre*_*urn 5 c arrays bit-manipulation
在 C 中任意大小的字节数组中,我想存储紧密打包的 14 位数字 (0-16,383)。换句话说,在序列中:
0000000000000100000000000001
Run Code Online (Sandbox Code Playgroud)
有两个数字我希望能够任意存储和检索为 16 位整数。(在这种情况下,它们都是 1,但可以是给定范围内的任何东西)如果我有函数uint16_t 14bitarr_get(unsigned char* arr, unsigned int index)and void 14bitarr_set(unsigned char* arr, unsigned int index, uint16_t value),我将如何实现这些函数?
这不是一个家庭作业项目,只是我自己的好奇心。我有一个将用于的特定项目,它是整个项目的关键/中心。
我不想要一个包含 14 位值的结构数组,因为它会为每个存储的结构生成浪费位。我希望能够将尽可能多的 14 位值压缩到一个字节数组中。(例如:在我发表的评论中,将尽可能多的 14 位值放入 64 字节的块中是可取的,没有浪费位。对于特定用例,这些 64 字节的工作方式是完全紧凑的,这样即使是一位浪费将剥夺存储另一个 14 位值的能力)
嗯,这在最好的情况下有点摆弄。使用字节数组执行此操作比使用较大元素更复杂,因为单个 14 位数量可以跨越 3 个字节,其中 uint16_t 或任何更大的元素只需要两个字节。但我会相信你的话,这就是你想要的(没有双关语的意思)。该代码实际上适用于设置为 8 或更大的常量(但不能超过 的大小int;为此,需要额外的类型转换)。当然如果大于16就必须调整值类型。
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define W 14
uint16_t arr_get(unsigned char* arr, size_t index) {
size_t bit_index = W * index;
size_t byte_index = bit_index / 8;
unsigned bit_in_byte_index = bit_index % 8;
uint16_t result = arr[byte_index] >> bit_in_byte_index;
for (unsigned n_bits = 8 - bit_in_byte_index; n_bits < W; n_bits += 8)
result |= arr[++byte_index] << n_bits;
return result & ~(~0u << W);
}
void arr_set(unsigned char* arr, size_t index, uint16_t value) {
size_t bit_index = W * index;
size_t byte_index = bit_index / 8;
unsigned bit_in_byte_index = bit_index % 8;
arr[byte_index] &= ~(0xff << bit_in_byte_index);
arr[byte_index++] |= value << bit_in_byte_index;
unsigned n_bits = 8 - bit_in_byte_index;
value >>= n_bits;
while (n_bits < W - 8) {
arr[byte_index++] = value;
value >>= 8;
n_bits += 8;
}
arr[byte_index] &= 0xff << (W - n_bits);
arr[byte_index] |= value;
}
int main(void) {
int mod = 1 << W;
int n = 50000;
unsigned x[n];
unsigned char b[2 * n];
for (int tries = 0; tries < 10000; tries++) {
for (int i = 0; i < n; i++) {
x[i] = rand() % mod;
arr_set(b, i, x[i]);
}
for (int i = 0; i < n; i++)
if (arr_get(b, i) != x[i])
printf("Err @%d: %d should be %d\n", i, arr_get(b, i), x[i]);
}
return 0;
}
Run Code Online (Sandbox Code Playgroud)
更快的版本 既然您在评论中说过性能是一个问题:开放编码循环在我的机器上使用原始版本中包含的小测试驱动程序提供了大约 10% 的速度改进。这包括随机数生成和测试,因此基元的速度可能快 20%。我相信 16 位或 32 位数组元素会带来进一步的改进,因为字节访问的成本很高:
uint16_t arr_get(unsigned char* a, size_t i) {
size_t ib = 14 * i;
size_t iy = ib / 8;
switch (ib % 8) {
case 0:
return (a[iy] | (a[iy+1] << 8)) & 0x3fff;
case 2:
return ((a[iy] >> 2) | (a[iy+1] << 6)) & 0x3fff;
case 4:
return ((a[iy] >> 4) | (a[iy+1] << 4) | (a[iy+2] << 12)) & 0x3fff;
}
return ((a[iy] >> 6) | (a[iy+1] << 2) | (a[iy+2] << 10)) & 0x3fff;
}
#define M(IB) (~0u << (IB))
#define SETLO(IY, IB, V) a[IY] = (a[IY] & M(IB)) | ((V) >> (14 - (IB)))
#define SETHI(IY, IB, V) a[IY] = (a[IY] & ~M(IB)) | ((V) << (IB))
void arr_set(unsigned char* a, size_t i, uint16_t val) {
size_t ib = 14 * i;
size_t iy = ib / 8;
switch (ib % 8) {
case 0:
a[iy] = val;
SETLO(iy+1, 6, val);
return;
case 2:
SETHI(iy, 2, val);
a[iy+1] = val >> 6;
return;
case 4:
SETHI(iy, 4, val);
a[iy+1] = val >> 4;
SETLO(iy+2, 2, val);
return;
}
SETHI(iy, 6, val);
a[iy+1] = val >> 2;
SETLO(iy+2, 4, val);
}
Run Code Online (Sandbox Code Playgroud)
另一种变体 这在我的机器上要快得多,比上面的好大约 20%:
uint16_t arr_get2(unsigned char* a, size_t i) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
return (buf >> (ib % 8)) & 0x3fff;
}
void arr_set2(unsigned char* a, size_t i, unsigned val) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
unsigned io = ib % 8;
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
a[iy+2] = buf >> 16;
}
Run Code Online (Sandbox Code Playgroud)
请注意,为了确保此代码的安全,您应该在打包数组的末尾分配一个额外的字节。即使所需的 14 位位于前 2 位中,它也始终读取和写入 3 个字节。
另一种变体最后,它的运行速度比上面的慢一点(同样在我的机器上;YMMV),但您不需要额外的字节。它对每个操作使用一次比较:
uint16_t arr_get2(unsigned char* a, size_t i) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned io = ib % 8;
unsigned buf = ib % 8 <= 2
? a[iy] | (a[iy+1] << 8)
: a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
return (buf >> io) & 0x3fff;
}
void arr_set2(unsigned char* a, size_t i, unsigned val) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned io = ib % 8;
if (io <= 2) {
unsigned buf = a[iy] | (a[iy+1] << 8);
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
} else {
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
a[iy+2] = buf >> 16;
}
}
Run Code Online (Sandbox Code Playgroud)