Ric*_*ral 0 c string string-split
假设我有这个:
"foo bar 1 and foo bar 2"
Run Code Online (Sandbox Code Playgroud)
我怎么能把它分成:
foo bar 1
foo bar 2
Run Code Online (Sandbox Code Playgroud)
?
我尝试过strtok(),strsep()但都没有奏效.他们不承认"和"作为分隔符,他们将"a","n"和"d"视为分隔符.
任何帮助我的功能,或者我必须通过空格分割并进行一些字符串操作?
小智 5
在C中拆分字符串的主要问题是它不可避免地导致一些动态内存管理,并且只要有可能,标准库就会避免这种情况.这就是为什么标准C函数都没有处理动态内存分配的原因,只有malloc/calloc/realloc这样做.
但要做到这一点并不困难.让我带你走过.
我们需要返回一些字符串,最简单的方法是返回一个指向字符串的指针数组,该字符串由NULL项终止.除了最后的NULL之外,数组中的每个元素都指向一个动态分配的字符串.
首先,我们需要一些辅助函数来处理这样的数组.最简单的一个是计算字符串数量(最终NULL之前的元素):
/* Return length of a NULL-delimited array of strings. */
size_t str_array_len(char **array)
{
size_t len;
for (len = 0; array[len] != NULL; ++len)
continue;
return len;
}
Run Code Online (Sandbox Code Playgroud)
另一个简单的方法是释放数组的功能:
/* Free a dynamic array of dynamic strings. */
void str_array_free(char **array)
{
if (array == NULL)
return;
for (size_t i = 0; array[i] != NULL; ++i)
free(array[i]);
free(array);
}
Run Code Online (Sandbox Code Playgroud)
更复杂的是将字符串副本添加到数组的函数.它需要处理许多特殊情况,例如当数组尚不存在时(整个数组为NULL).此外,它需要处理不以'\ 0'结尾的字符串,以便我们的实际拆分函数更容易在追加时使用输入字符串的一部分.
/* Append an item to a dynamically allocated array of strings. On failure,
return NULL, in which case the original array is intact. The item
string is dynamically copied. If the array is NULL, allocate a new
array. Otherwise, extend the array. Make sure the array is always
NULL-terminated. Input string might not be '\0'-terminated. */
char **str_array_append(char **array, size_t nitems, const char *item,
size_t itemlen)
{
/* Make a dynamic copy of the item. */
char *copy;
if (item == NULL)
copy = NULL;
else {
copy = malloc(itemlen + 1);
if (copy == NULL)
return NULL;
memcpy(copy, item, itemlen);
copy[itemlen] = '\0';
}
/* Extend array with one element. Except extend it by two elements,
in case it did not yet exist. This might mean it is a teeny bit
too big, but we don't care. */
array = realloc(array, (nitems + 2) * sizeof(array[0]));
if (array == NULL) {
free(copy);
return NULL;
}
/* Add copy of item to array, and return it. */
array[nitems] = copy;
array[nitems+1] = NULL;
return array;
}
Run Code Online (Sandbox Code Playgroud)
这是一个moutful.对于非常好的风格,如果将输入项目转换为自己的功能,最好将动态副本分成两部分,但我会将其作为练习留给读者.
最后,我们有实际的分裂功能.它还需要处理一些特殊情况:
如果分隔符紧邻输入字符串的开头或结尾,或者紧挨着另一个分隔符,我选择在结果中添加一个空字符串.如果您还需要其他内容,则需要调整代码.
除了特殊情况和一些错误处理之外,拆分现在相当简单.
/* Split a string into substrings. Return dynamic array of dynamically
allocated substrings, or NULL if there was an error. Caller is
expected to free the memory, for example with str_array_free. */
char **str_split(const char *input, const char *sep)
{
size_t nitems = 0;
char **array = NULL;
const char *start = input;
char *next = strstr(start, sep);
size_t seplen = strlen(sep);
const char *item;
size_t itemlen;
for (;;) {
next = strstr(start, sep);
if (next == NULL) {
/* Add the remaining string (or empty string, if input ends with
separator. */
char **new = str_array_append(array, nitems, start, strlen(start));
if (new == NULL) {
str_array_free(array);
return NULL;
}
array = new;
++nitems;
break;
} else if (next == input) {
/* Input starts with separator. */
item = "";
itemlen = 0;
} else {
item = start;
itemlen = next - item;
}
char **new = str_array_append(array, nitems, item, itemlen);
if (new == NULL) {
str_array_free(array);
return NULL;
}
array = new;
++nitems;
start = next + seplen;
}
if (nitems == 0) {
/* Input does not contain separator at all. */
assert(array == NULL);
array = str_array_append(array, nitems, input, strlen(input));
}
return array;
}
Run Code Online (Sandbox Code Playgroud)
这是整个程序的一个整体.它还包括一个运行一些测试用例的主程序.
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Append an item to a dynamically allocated array of strings. On failure,
return NULL, in which case the original array is intact. The item
string is dynamically copied. If the array is NULL, allocate a new
array. Otherwise, extend the array. Make sure the array is always
NULL-terminated. Input string might not be '\0'-terminated. */
char **str_array_append(char **array, size_t nitems, const char *item,
size_t itemlen)
{
/* Make a dynamic copy of the item. */
char *copy;
if (item == NULL)
copy = NULL;
else {
copy = malloc(itemlen + 1);
if (copy == NULL)
return NULL;
memcpy(copy, item, itemlen);
copy[itemlen] = '\0';
}
/* Extend array with one element. Except extend it by two elements,
in case it did not yet exist. This might mean it is a teeny bit
too big, but we don't care. */
array = realloc(array, (nitems + 2) * sizeof(array[0]));
if (array == NULL) {
free(copy);
return NULL;
}
/* Add copy of item to array, and return it. */
array[nitems] = copy;
array[nitems+1] = NULL;
return array;
}
/* Free a dynamic array of dynamic strings. */
void str_array_free(char **array)
{
if (array == NULL)
return;
for (size_t i = 0; array[i] != NULL; ++i)
free(array[i]);
free(array);
}
/* Split a string into substrings. Return dynamic array of dynamically
allocated substrings, or NULL if there was an error. Caller is
expected to free the memory, for example with str_array_free. */
char **str_split(const char *input, const char *sep)
{
size_t nitems = 0;
char **array = NULL;
const char *start = input;
char *next = strstr(start, sep);
size_t seplen = strlen(sep);
const char *item;
size_t itemlen;
for (;;) {
next = strstr(start, sep);
if (next == NULL) {
/* Add the remaining string (or empty string, if input ends with
separator. */
char **new = str_array_append(array, nitems, start, strlen(start));
if (new == NULL) {
str_array_free(array);
return NULL;
}
array = new;
++nitems;
break;
} else if (next == input) {
/* Input starts with separator. */
item = "";
itemlen = 0;
} else {
item = start;
itemlen = next - item;
}
char **new = str_array_append(array, nitems, item, itemlen);
if (new == NULL) {
str_array_free(array);
return NULL;
}
array = new;
++nitems;
start = next + seplen;
}
if (nitems == 0) {
/* Input does not contain separator at all. */
assert(array == NULL);
array = str_array_append(array, nitems, input, strlen(input));
}
return array;
}
/* Return length of a NULL-delimited array of strings. */
size_t str_array_len(char **array)
{
size_t len;
for (len = 0; array[len] != NULL; ++len)
continue;
return len;
}
#define MAX_OUTPUT 20
int main(void)
{
struct {
const char *input;
const char *sep;
char *output[MAX_OUTPUT];
} tab[] = {
/* Input is empty string. Output should be a list with an empty
string. */
{
"",
"and",
{
"",
NULL,
},
},
/* Input is exactly the separator. Output should be two empty
strings. */
{
"and",
"and",
{
"",
"",
NULL,
},
},
/* Input is non-empty, but does not have separator. Output should
be the same string. */
{
"foo",
"and",
{
"foo",
NULL,
},
},
/* Input is non-empty, and does have separator. */
{
"foo bar 1 and foo bar 2",
" and ",
{
"foo bar 1",
"foo bar 2",
NULL,
},
},
};
const int tab_len = sizeof(tab) / sizeof(tab[0]);
bool errors;
errors = false;
for (int i = 0; i < tab_len; ++i) {
printf("test %d\n", i);
char **output = str_split(tab[i].input, tab[i].sep);
if (output == NULL) {
fprintf(stderr, "output is NULL\n");
errors = true;
break;
}
size_t num_output = str_array_len(output);
printf("num_output %lu\n", (unsigned long) num_output);
size_t num_correct = str_array_len(tab[i].output);
if (num_output != num_correct) {
fprintf(stderr, "wrong number of outputs (%lu, not %lu)\n",
(unsigned long) num_output, (unsigned long) num_correct);
errors = true;
} else {
for (size_t j = 0; j < num_output; ++j) {
if (strcmp(tab[i].output[j], output[j]) != 0) {
fprintf(stderr, "output[%lu] is '%s' not '%s'\n",
(unsigned long) j, output[j], tab[i].output[j]);
errors = true;
break;
}
}
}
str_array_free(output);
printf("\n");
}
if (errors)
return EXIT_FAILURE;
return 0;
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
5989 次 |
| 最近记录: |