C - 从字符串中提取单词

Sha*_*abu 3 c string pointers words extract

我对C很新,因此使用指针时遇到了很多困惑.

我正在尝试从一串ASCII字符中提取单词.例如,如果我有字符串@@ Hello..world >>,我想从字符串中获取单词"Hello"和"world",并将它们添加到我的链接列表中.

一个字被定义为任何字母序列,每个字最多为64个字节.此外,函数isspace()返回非零值的任何字符都被视为空格.

基本上,我正在使用fscanf从文件中扫描字符串,然后为每个字符串调用我的函数read_words(char*s)以从字符串中获取正确的单词并将它们添加到我的链接列表中以供进一步使用.

这是我的代码似乎抛出了一个与指针有关的错误.

struct node {
    char *val;
    struct node *next;
    int count;
} *words = NULL;


void read_words(char *s)
{
    struct node *tmp;
    char word[64+1];
    int i, check, wordStarted = 0, count = 0;

    for (i = 0; s[i] != '\0'; i++)
    {
            if ((isspace(s[i]) != 0) || !isalpha(s[i]))
            {
                    if (wordStarted == 1)
                    {
                            check = check_list(word);
                            if (check != 1) {
                                    word[count] = '\0';
                                    tmp = malloc(sizeof(struct node));
                                    tmp->val = word;
                                    tmp->count = 1;
                                    tmp->next = words;
                                    words = tmp;
                            }
                            count = 0;
                            wordStarted = 0;
                    }
            }
            else
            {
                    word[count++] = s[i];
                    wordStarted = 1;
            }
    }

}
Run Code Online (Sandbox Code Playgroud)

任何帮助都将非常感谢!

谢谢!

Mar*_*era 6

您需要对字符串进行标记,而不是实现自己的算法,并将这些部分附加到链接列表中.使用strtok(参考).

从上面的链接..示例:

#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
  return 0;
}
Run Code Online (Sandbox Code Playgroud)

输出:

Splitting string "- This, a sample string." into tokens:
This
a
sample
string
Run Code Online (Sandbox Code Playgroud)


PAD*_*MKO 5

ANSI C 的更好解决方案。

用法strtok()它并不总是好的:

  1. 它会改变原点数组。
  2. 空格分隔符 " " 忽略类似字符:"\n"、"\t" 等。

尝试下一步并阅读评论以了解详细信息:

#include <stdio.h>      // printf
#include <string.h>     // strlen, strncpy
#include <ctype.h>      // isalnum
#include <stdlib.h>     // malloc, calloc

/*
    A logical type
 */
typedef enum {
    false,
    true,
} bool;


/*
    A Struct for hold 2D-array with count items
 */
typedef struct _ListWithLength {
    char **list;
    size_t length;
} ListWithLength;


/*
    Parse a text and return pointer to a ListWithLength words and count it
 */
ListWithLength* getWords(char *text) {

    // a variable for count words
    int count = 0;

    // keep length of the text
    size_t text_len = strlen(text);

    // a flag indicating the a beginning of a word
    bool new_word = false;

    // an index of a start found a word
    int index_start_word = 0;

    // 2D-array for found word
    // it will be same memory size as the original text
    char **words = malloc(text_len * sizeof(char));

    for (int i = 0; i <= text_len; ++i) {

        // if found ascii letter or digits and new no traced early
        // keep index of beginning a new word
        // and change the flag
        if (isalnum(text[i]) != 0) {
            if (new_word == false) {
                new_word = true;
                index_start_word = i;
            }

        // if it is not ascii letter or digits and a word traced early
        // it means the word ended
        } else {
            if (new_word == true) {

                // allocate a memory for a new word in the array of words
                words[count] = malloc(i - index_start_word * sizeof(char) + 1);

                // copy the found word from the text by indexes
                strncpy(words[count], text + index_start_word, i - index_start_word);

                // change the flag
                new_word = false;

                // increase the counter of words
                count++;
            }
        };
    }

    // bind the found words and it count to a structure and return it
    ListWithLength *list_with_length = malloc(sizeof(ListWithLength));

    list_with_length->length = count;
    list_with_length->list = words;

    return list_with_length;
}


/*
    Print information of a ListWithLength
 */
void printListWithLength(ListWithLength *list_with_length) {
    printf("Total items: %li\n", list_with_length->length);
    puts("----------------------");
    for (int i = 0; i < list_with_length->length; ++i) {
        printf("%d. %s\n", i + 1, list_with_length->list[i]);
    }
}


int main(int argc, char const *argv[])
{

    char c_keywords[300] = "auto else    long    switch\
    break\t   enum \t register    typedef\
    \ncase    extern,  return  union\
    ?char    float.   short   unsigned\
    const   !for signed  void\
    continue    goto    sizeof  volatile\
    .default???? if  static  while\
    do  int struct,,,,  _Packed\
    double.......";

    ListWithLength *list_with_length = getWords(c_keywords);
    printListWithLength(list_with_length);

    return 0;
}
Run Code Online (Sandbox Code Playgroud)

编译并查看结果:

$ gcc -Wall -ansi -std=c11 -o main main.c
$ ./main 
Total items: 33
----------------------
1. auto
2. else
3. long
4. switch
5. break
6. enum
7. register
8. typedef
9. case
10. extern
11. return
12. union
13. char
14. float
15. short
16. unsigned
17. const
18. for
19. signed
20. void
21. continue
22. goto
23. sizeof
24. volatile
25. default
26. if
27. static
28. while
29. do
30. int
31. struct
32. Packed
33. double
Run Code Online (Sandbox Code Playgroud)