这是我当前的代码:
// Yes, the missing single quotation mark is intentional
static void replace_punctuation(char *s, size_t len)
{
static const unsigned char punctuation[] = ".,;:!?\"()[]{}-";
for (size_t i = 0; i < len; ++i) {
if (memchr(punctuation, s[i], sizeof punctuation - 1)) {
s[i] = ' ';
}
}
}
Run Code Online (Sandbox Code Playgroud)
对程序进行分析(cachegrind/Kcachegrind)(在启用优化的情况下编译,-O2)后,发现这是一个瓶颈。
s不是以 null 结尾的字符串,因此strpbrk()不能使用。
如何对其进行优化?
您可以使用查找表直接替换每个字符:
#include <limits.h>
#include <stdlib.h>
static void replace_punctuation(char *s, size_t len)
{
_Static_assert(UCHAR_MAX == 255, "This code is written for eight-bit char.");
static const char table[UCHAR_MAX + 1] =
{
// First, we initialize table[i] with i.
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135,
136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151,
152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183,
184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215,
216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231,
232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247,
248, 249, 250, 251, 252, 253, 254, 255,
/* Then we replace each punctuation character with a space.
Overriding previous initializers is specified by C 2018 6.7.9
19.
*/
['.'] = ' ',
[','] = ' ',
[';'] = ' ',
[':'] = ' ',
['!'] = ' ',
['?'] = ' ',
['"'] = ' ',
['('] = ' ',
[')'] = ' ',
['['] = ' ',
[']'] = ' ',
['{'] = ' ',
['}'] = ' ',
['-'] = ' ',
};
for (size_t i = 0; i < len; ++i)
s[i] = table[(unsigned char) s[i]];
}
#include <string.h>
#include <stdio.h>
int main(void)
{
char p[] = "This, is- text: with? some [puncutation]!";
puts(p);
replace_punctuation(p, strlen(p));
puts(p);
}
Run Code Online (Sandbox Code Playgroud)
为了处理将初始值设定项转换为有符号的过程中可能的实现定义行为,可以使用而不是仅仅char使用 来初始化从 128 开始的元素。i - 2*(UCHAR_MAX-CHAR_MAX)i