Sho*_*ore 2 cpu x86 assembly multiprocessing apic
我正在编写引导加载程序并尝试测试处理器间中断。我有以下两个问题阻止了我:
1、在哪里可以找到启动AP的流程;
2、在发出IPI时,我应该加载内存地址告诉目标处理器从哪个内存地址开始。
感谢您的回答,如果您能附上一个装配示例。
我从现已解散的 Stackoverflow 文档项目中提取了它。这最初是由 Margaret Bloom 编写的,我已经清理了她的代码。由于这不是我自己的,我将其标记为社区维基。可能有些信息可能对您有用。
此示例将唤醒每个应用处理器(AP) 并使它们与引导处理器(BSP) 一起显示其 LAPIC ID。
; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc
BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__
__START__:
mov ax, cs
mov ds, ax
mov es, ax
mov ss, ax
xor sp, sp
cld
;Clear screen
mov ax, 03h
int 10h
;Set limit of 4GiB and base 0 for FS and GS
call 7c0h:unrealmode
;Enable the APIC
call enable_lapic
;Move the payload to the expected address
mov si, payload_start_abs
mov cx, payload_end-payload + 1
mov di, 400h ;7c0h:400h = 8000h
rep movsb
;Wakeup the other APs
;INIT
call lapic_send_init
mov cx, WAIT_10_ms
call us_wait
;SIPI
call lapic_send_sipi
mov cx, WAIT_200_us
call us_wait
;SIPI
call lapic_send_sipi
;Jump to the payload
jmp 0000h:8000h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;CX = Wait (in ms) Max 65536 us (=0 on input)
us_wait:
mov dx, 80h ;POST Diagnose port, 1us per IO
xor si, si
rep outsb
ret
WAIT_10_ms EQU 10000
WAIT_200_us EQU 200
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
enable_lapic:
;Enable the APIC globally
;On P6 CPU once this flag is set to 0, it cannot be set back to 16
;Without an HARD RESET
mov ecx, IA32_APIC_BASE_MSR
rdmsr
or ah, 08h ;bit11: APIC GLOBAL Enable/Disable
wrmsr
;Mask off lower 12 bits to get the APIC base address
and ah, 0f0h
mov DWORD [APIC_BASE], eax
;Newer processors enables the APIC through the Spurious Interrupt Vector register
mov ecx, DWORD [fs: eax + APIC_REG_SIV]
or ch, 01h ;bit8: APIC SOFTWARE enable/disable
mov DWORD [fs: eax+APIC_REG_SIV], ecx
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_sipi:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4608h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
lapic_send_init:
mov eax, DWORD [APIC_BASE]
;Destination field is set to 0 has we will use a shorthand
xor ebx, ebx
mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx
;Vector: 00h
;Delivery mode: Startup
;Destination mode: ignored (0)
;Level: ignored (1)
;Trigger mode: ignored (0)
;Shorthand: All excluding self (3)
mov ebx, 0c4500h
mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
IA32_APIC_BASE_MSR EQU 1bh
APIC_REG_SIV EQU 0f0h
APIC_REG_ICR_LOW EQU 300h
APIC_REG_ICR_HIGH EQU 310h
APIC_REG_ID EQU 20h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
APIC_BASE dd 00h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
unrealmode:
lgdt [cs:GDT]
cli
mov eax, cr0
or ax, 01h
mov cr0, eax
mov bx, 08h
mov fs, bx
mov gs, bx
and ax, 0fffeh
mov cr0, eax
sti
;IMPORTAT: This call is FAR!
;So it can be called from everywhere
retf
GDT:
dw 0fh
dd GDT + 7c00h
dw 00h
dd 0000ffffh
dd 00cf9200h
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
; Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
;Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll Ll
payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
payload:
;IMPORTANT NOTE: Here we are in a \"new\" CPU every state we set before is no
;more present here (except for the BSP, but we handler every processor with
;the same code).
jmp 800h: __RESTART__
__RESTART__:
mov ax, cs
mov ds, ax
xor sp, sp
cld
;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!
;Get an unique id
mov ax, WORD [counter]
.try:
mov bx, ax
inc bx
lock cmpxchg WORD [counter], bx
jnz .try
mov cx, ax ;Save this unique id
;Stack segment = CS + unique id * 1000
shl ax, 12
mov bx, cs
add ax, bx
mov ss, ax
;Text buffer
push 0b800h
pop es
;Set unreal mode again
call 7c0h:unrealmode
;Use GS for old variables
mov ax, 7c0h
mov gs, ax
;Calculate text row
mov ax, cx
mov bx, 160d ;80 * 2
mul bx
mov di, ax
;Get LAPIC id
mov ebx, DWORD [gs:APIC_BASE]
mov edx, DWORD [fs:ebx + APIC_REG_ID]
shr edx, 24d
call itoa8
cli
hlt
;DL = Number
;DI = ptr to text buffer
itoa8:
mov bx, dx
shr bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
mov bx, dx
and bx, 0fh
mov al, BYTE [bx + digits]
mov ah, 09h
stosw
ret
digits db \"0123456789abcdef\"
counter dw 0
payload_end:
; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
dw 0aa55h
Run Code Online (Sandbox Code Playgroud)
有两个主要步骤要执行:
1. 唤醒 AP
这是通过向所有 AP发出INIT-SIPI-SIPI (ISS) 序列来实现的。
将发送 ISS 序列的 BSP 使用速记All 排除 self作为目的地,从而以所有 AP为目标。
SIPI(启动处理器间中断)在收到它时被唤醒的所有 CPU 都会忽略,因此如果第一个 SIPI 足以唤醒目标处理器,则第二个 SIPI 将被忽略。出于兼容性原因,英特尔建议这样做。
SIPI 包含一个向量,这与中断向量(又名中断号)的含义相似,但在实践中完全不同。
该向量是一个 8 位数字,值为V(以基数 16表示为vv),它使 CPU 在物理地址0vv000h处开始执行指令。
我们将0vv000h称为唤醒地址(WA)。
WA 强制在 4KiB(或页面)边界。
我们将使用 08h 作为V,然后 WA 是08000h,引导加载程序之后的 400h 字节。
这将控制权交给 AP。
2. 初始化和区分AP
在WA 有一个可执行代码是必要的。引导加载程序位于7c00h 处,因此我们需要在页面边界重新定位一些代码。
编写有效负载时要记住的第一件事是对共享资源的任何访问都必须受到保护或区分。
一个常见的共享资源是堆栈,如果我们天真地初始化堆栈,每个 AP 最终将使用相同的堆栈!
第一步是使用不同的堆栈地址,从而区分堆栈。
我们通过为每个 CPU 分配一个基于零的唯一编号来实现这一点。这个数字,我们称之为index,用于区分堆栈和 CPU 将写入其 APIC ID 的行。
每个 CPU 的堆栈地址是800h:(index * 1000h)给每个 AP 64KiB 的堆栈。
每个 CPU 的行号是索引,因此指向文本缓冲区的指针是 80 * 2 * 索引。
生成索引 alock cmpxchg
用于自动递增并返回一个 WORD。
最后说明
* 对端口 80h 的写入用于产生 1 µs 的延迟。
*unrealmode
是一个远例程,所以它也可以在唤醒后调用。
* BSP 也跳转到 WA。
截屏
来自具有 8 个处理器的 Bochs