char*str ="..."vs char str [] ="......"奇怪的行为

Tho*_*nig 2 c gcc g++ libc

可能重复:
尝试设置char数组的字符时程序崩溃

我有一个示例代码,按预期工作:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}
Run Code Online (Sandbox Code Playgroud)

...除非我将char str []更改为char*str,它不应该在语义上产生任何差异:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char * str ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}
Run Code Online (Sandbox Code Playgroud)

这是意想不到的结果:

Splitting string "- This, a sample string." into tokens:
Segmentation fault
Run Code Online (Sandbox Code Playgroud)

我编写了两个例子:

gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c
Run Code Online (Sandbox Code Playgroud)

甚至看着集会......但我无法弄清楚,第二个版本有什么问题.

在这里工作的O1-大会:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC0:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    push    rbx
    .cfi_def_cfa_offset 16
    sub rsp, 48
    .cfi_def_cfa_offset 64
    mov rax, QWORD PTR fs:40
    mov QWORD PTR [rsp+40], rax
    xor eax, eax
    mov DWORD PTR [rsp], 1750343725
    mov DWORD PTR [rsp+4], 539784041
    mov DWORD PTR [rsp+8], 1634934881
    mov DWORD PTR [rsp+12], 1701605485
    mov DWORD PTR [rsp+16], 1920234272
    mov DWORD PTR [rsp+20], 778530409
    mov BYTE PTR [rsp+24], 0
    mov rdx, rsp
    mov esi, OFFSET FLAT:.LC0
    mov edi, 1
    .cfi_offset 3, -16
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC1
    mov rdi, rsp
    call    strtok
    mov eax, 0
    mov rdx, QWORD PTR [rsp+40]
    xor rdx, QWORD PTR fs:40
    je  .L3
    call    __stack_chk_fail
.L3:
    add rsp, 48
    pop rbx
    .p2align 4,,1
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits
Run Code Online (Sandbox Code Playgroud)

而破碎的那个:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "- This, a sample string."
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC1:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1
.LC2:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    sub rsp, 8
    .cfi_def_cfa_offset 16
    mov edx, OFFSET FLAT:.LC0
    mov esi, OFFSET FLAT:.LC1
    mov edi, 1
    mov eax, 0
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC2
    mov edi, OFFSET FLAT:.LC0
    call    strtok
    mov eax, 0
    add rsp, 8
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits
Run Code Online (Sandbox Code Playgroud)

我能看到的唯一明显区别是,在工作版本中,GCC直接在代码中用MOV替换字符串常量.

非常感谢帮助

编辑 gcc(Ubuntu/Linaro 4.4.4-14ubuntu5)4.4.5,

最好的,托马斯

Tom*_*ych 5

在第二种情况下,您指向str内存中某个无法更改的静态对象.该strtok手册页警告说,它改变了它的第一个参数,并且不能对常量字符串使用.因此错误.


Ker*_* SB 5

strtok()需要一个可修改的缓冲区,因为它用空字节替换分隔符.所以你不能说char * str = "- This, a sample string.";,因为那应该是真的const char * str = "- This, a sample string.";并指向只读内存.相反,您有几个选择:

char str[] = "- This, a sample string.";  // local array
char * pch = strtok (str," ,.-");


char * str = strdup("- This, a sample string.");  // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);
Run Code Online (Sandbox Code Playgroud)