and*_*wrk 3 linux assembly gcc x86-64 clang
这段代码有什么问题(在x86_64 Linux上运行)?
.intel_syntax
.text
.globl _start
_start:
mov rax, 1
mov rdi, 1
mov rsi, msg
mov rdx, 14
syscall
mov rax, 60
mov rdi, 0
syscall
.data
msg:
.ascii "Hello, world!\n"
Run Code Online (Sandbox Code Playgroud)
当我运行它:
$ clang -o hello_intel hello_intel.s -nostdlib && ./hello_intel
Run Code Online (Sandbox Code Playgroud)
没有输出.让我们来吧:
$ strace ./hello_intel
execve("./hello_intel", ["./hello_intel"], [/* 96 vars */]) = 0
write(1, 0x77202c6f6c6c6548, 14) = -1 EFAULT (Bad address)
exit(0) = ?
+++ exited with 0 +++
Run Code Online (Sandbox Code Playgroud)
它取消引用msg而不是使用它的位置.为什么?
如果我使用AT&T语法代替......
.text
.globl _start
_start:
mov $1, %rax
mov $1, %rdi
mov $msg, %rsi
mov $14, %rdx
syscall
mov $60, %rax
mov $0, %rdi
syscall
.data
msg:
.ascii "Hello, world!\n"
Run Code Online (Sandbox Code Playgroud)
......工作正常:
$ clang -o hello_att hello_att.s -nostdlib && ./hello_att
Hello, world!
Run Code Online (Sandbox Code Playgroud)
这两者有什么区别?
这是工作中的一个:
$ objdump -d hello_att -s -M intel
hello_att: file format elf64-x86-64
Contents of section .text:
4000e8 48c7c001 00000048 c7c70100 000048c7 H......H......H.
4000f8 c6160160 0048c7c2 0e000000 0f0548c7 ...`.H........H.
400108 c03c0000 0048c7c7 00000000 0f05 .<...H........
Contents of section .data:
600116 48656c6c 6f2c2077 6f726c64 210a Hello, world!.
Disassembly of section .text:
00000000004000e8 <_start>:
4000e8: 48 c7 c0 01 00 00 00 mov rax,0x1
4000ef: 48 c7 c7 01 00 00 00 mov rdi,0x1
4000f6: 48 c7 c6 16 01 60 00 mov rsi,0x600116
4000fd: 48 c7 c2 0e 00 00 00 mov rdx,0xe
400104: 0f 05 syscall
400106: 48 c7 c0 3c 00 00 00 mov rax,0x3c
40010d: 48 c7 c7 00 00 00 00 mov rdi,0x0
400114: 0f 05 syscall
Run Code Online (Sandbox Code Playgroud)
这是破碎的一个:
$ objdump -d hello_intel -s -M intel
hello_intel: file format elf64-x86-64
Contents of section .text:
4000e8 48c7c001 00000048 c7c70100 0000488b H......H......H.
4000f8 34251701 600048c7 c20e0000 000f0548 4%..`.H........H
400108 c7c03c00 000048c7 c7000000 000f05 ..<...H........
Contents of section .data:
600117 48656c6c 6f2c2077 6f726c64 210a Hello, world!.
Disassembly of section .text:
00000000004000e8 <_start>:
4000e8: 48 c7 c0 01 00 00 00 mov rax,0x1
4000ef: 48 c7 c7 01 00 00 00 mov rdi,0x1
4000f6: 48 8b 34 25 17 01 60 mov rsi,QWORD PTR ds:0x600117
4000fd: 00
4000fe: 48 c7 c2 0e 00 00 00 mov rdx,0xe
400105: 0f 05 syscall
400107: 48 c7 c0 3c 00 00 00 mov rax,0x3c
40010e: 48 c7 c7 00 00 00 00 mov rdi,0x0
400115: 0f 05 syscall
Run Code Online (Sandbox Code Playgroud)
所以这里的重要区别是0x600116vs QWORD PTR ds:0x600117,它确实看起来像指针和解引用指针之间的区别.
那你怎么不取消引用英特尔语法代码中的指针呢?
这里的代码在GCC中有效:
.intel_syntax noprefix
.text
.globl _start
_start:
mov rax, 1
mov rdi, 1
mov rsi, offset msg
mov rdx, 14
syscall
mov rax, 60
mov rdi, 0
syscall
.data
msg:
.ascii "Hello, world!\n"
Run Code Online (Sandbox Code Playgroud)
无论是noprefix和offset必须增加.可悲的是,这不适用于clang:
hello_intel.s:8:24: error: unknown token in expression
mov rsi, offset msg
^
Run Code Online (Sandbox Code Playgroud)
但是,您可以解决此问题通过使用lea替代mov:
lea rsi, [msg+rip]
Run Code Online (Sandbox Code Playgroud)
这适用于clang和gcc.