Mar*_*tin 4 python performance for-loop rust
我正在研究 mandelbrot 算法来学习 Rust,我发现空的 25mil(大约 6k 图像)循环需要 0.5 秒。我发现它很慢。于是我去用python测试了一下,发现几乎花费了同样的时间。python的for循环真的是几乎零成本的抽象吗?这真的是我能用英特尔 i7 得到的最好的结果吗?
锈:
use std::time::Instant;
fn main() {
let before = Instant::now();
for i in 0..5000 {
for j in 0..5000 {}
}
println!("Elapsed time: {:.2?}", before.elapsed());
}
>>> Elapsed time: 406.90ms
Run Code Online (Sandbox Code Playgroud)
Python:
import time
s = time.time()
for i in range(5000):
for j in range(5000):
pass
print(time.time()-s)
>>> 0.5715351104736328
Run Code Online (Sandbox Code Playgroud)
更新:如果我使用初始化的元组而不是范围,python 甚至比 rust 更快 -> 0.33s
Joh*_*ica 13
如果您正在进行性能测试,请始终使用--release. 默认情况下,Cargo 构建时启用调试信息并禁用优化。优化器将完全消除这些循环。在Playground上,它从 975 毫秒下降到 1.25\xc2\xb5s。
让我们看一下Godbolt上的汇编,其中只有循环,没有计时器:
\npub fn main() {\n for i in 0..5000 {\n for j in 0..5000 {}\n }\n}\nRun Code Online (Sandbox Code Playgroud)\n没有优化:
\n<i32 as core::iter::range::Step>::forward_unchecked:\n push rax\n mov eax, esi\n add edi, eax\n mov dword ptr [rsp + 4], edi\n mov eax, dword ptr [rsp + 4]\n mov dword ptr [rsp], eax\n mov eax, dword ptr [rsp]\n pop rcx\n ret\n\ncore::intrinsics::copy_nonoverlapping:\n push rax\n mov qword ptr [rsp], rsi\n mov rsi, rdi\n mov rdi, qword ptr [rsp]\n shl rdx, 2\n call memcpy@PLT\n pop rax\n ret\n\ncore::cmp::impls::<impl core::cmp::PartialOrd for i32>::lt:\n mov eax, dword ptr [rdi]\n cmp eax, dword ptr [rsi]\n setl al\n and al, 1\n movzx eax, al\n ret\n\ncore::mem::replace:\n sub rsp, 40\n mov qword ptr [rsp], rdi\n mov dword ptr [rsp + 12], esi\n mov byte ptr [rsp + 23], 0\n mov byte ptr [rsp + 23], 1\n mov rax, qword ptr [rip + core::ptr::read@GOTPCREL]\n call rax\n mov ecx, eax\n mov dword ptr [rsp + 16], ecx\n jmp .LBB3_1\n.LBB3_1:\n mov esi, dword ptr [rsp + 12]\n mov rdi, qword ptr [rsp]\n mov byte ptr [rsp + 23], 0\n mov rcx, qword ptr [rip + core::ptr::write@GOTPCREL]\n call rcx\n jmp .LBB3_4\n.LBB3_2:\n test byte ptr [rsp + 23], 1\n jne .LBB3_8\n jmp .LBB3_7\n mov rcx, rax\n mov eax, edx\n mov qword ptr [rsp + 24], rcx\n mov dword ptr [rsp + 32], eax\n jmp .LBB3_2\n.LBB3_4:\n mov eax, dword ptr [rsp + 16]\n add rsp, 40\n ret\n.LBB3_5:\n jmp .LBB3_2\n mov rcx, rax\n mov eax, edx\n mov qword ptr [rsp + 24], rcx\n mov dword ptr [rsp + 32], eax\n jmp .LBB3_5\n.LBB3_7:\n mov rdi, qword ptr [rsp + 24]\n call _Unwind_Resume@PLT\n ud2\n.LBB3_8:\n jmp .LBB3_7\n\ncore::ptr::read:\n sub rsp, 24\n mov qword ptr [rsp + 8], rdi\n mov eax, dword ptr [rsp + 20]\n mov dword ptr [rsp + 16], eax\n jmp .LBB4_2\n.LBB4_2:\n mov rdi, qword ptr [rsp + 8]\n lea rsi, [rsp + 16]\n mov edx, 1\n call qword ptr [rip + core::intrinsics::copy_nonoverlapping@GOTPCREL]\n mov eax, dword ptr [rsp + 16]\n mov dword ptr [rsp + 4], eax\n mov eax, dword ptr [rsp + 4]\n add rsp, 24\n ret\n\ncore::ptr::write:\n sub rsp, 4\n mov dword ptr [rsp], esi\n mov eax, dword ptr [rsp]\n mov dword ptr [rdi], eax\n add rsp, 4\n ret\n\ncore::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next:\n push rax\n call qword ptr [rip + <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next@GOTPCREL]\n mov dword ptr [rsp], eax\n mov dword ptr [rsp + 4], edx\n mov edx, dword ptr [rsp + 4]\n mov eax, dword ptr [rsp]\n pop rcx\n ret\n\ncore::clone::impls::<impl core::clone::Clone for i32>::clone:\n mov eax, dword ptr [rdi]\n ret\n\n<I as core::iter::traits::collect::IntoIterator>::into_iter:\n mov edx, esi\n mov eax, edi\n ret\n\n<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next:\n sub rsp, 40\n mov rsi, rdi\n mov qword ptr [rsp + 16], rsi\n mov rdi, rsi\n add rsi, 4\n call core::cmp::impls::<impl core::cmp::PartialOrd for i32>::lt\n mov byte ptr [rsp + 31], al\n mov al, byte ptr [rsp + 31]\n test al, 1\n jne .LBB9_3\n jmp .LBB9_2\n.LBB9_2:\n mov dword ptr [rsp + 32], 0\n jmp .LBB9_7\n.LBB9_3:\n mov rdi, qword ptr [rsp + 16]\n call core::clone::impls::<impl core::clone::Clone for i32>::clone\n mov dword ptr [rsp + 12], eax\n mov edi, dword ptr [rsp + 12]\n mov esi, 1\n call <i32 as core::iter::range::Step>::forward_unchecked\n mov dword ptr [rsp + 8], eax\n mov esi, dword ptr [rsp + 8]\n mov rdi, qword ptr [rsp + 16]\n call qword ptr [rip + core::mem::replace@GOTPCREL]\n mov dword ptr [rsp + 4], eax\n mov eax, dword ptr [rsp + 4]\n mov dword ptr [rsp + 36], eax\n mov dword ptr [rsp + 32], 1\n.LBB9_7:\n mov eax, dword ptr [rsp + 32]\n mov edx, dword ptr [rsp + 36]\n add rsp, 40\n ret\n\nexample::main:\n sub rsp, 72\n mov dword ptr [rsp + 24], 0\n mov dword ptr [rsp + 28], 5000\n mov edi, dword ptr [rsp + 24]\n mov esi, dword ptr [rsp + 28]\n call qword ptr [rip + <I as core::iter::traits::collect::IntoIterator>::into_iter@GOTPCREL]\n mov dword ptr [rsp + 16], eax\n mov dword ptr [rsp + 20], edx\n mov eax, dword ptr [rsp + 20]\n mov ecx, dword ptr [rsp + 16]\n mov dword ptr [rsp + 32], ecx\n mov dword ptr [rsp + 36], eax\n.LBB10_2:\n mov rax, qword ptr [rip + core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next@GOTPCREL]\n lea rdi, [rsp + 32]\n call rax\n mov dword ptr [rsp + 44], edx\n mov dword ptr [rsp + 40], eax\n mov eax, dword ptr [rsp + 40]\n test rax, rax\n je .LBB10_5\n jmp .LBB10_13\n.LBB10_13:\n jmp .LBB10_6\n ud2\n.LBB10_5:\n add rsp, 72\n ret\n.LBB10_6:\n mov dword ptr [rsp + 48], 0\n mov dword ptr [rsp + 52], 5000\n mov edi, dword ptr [rsp + 48]\n mov esi, dword ptr [rsp + 52]\n call qword ptr [rip + <I as core::iter::traits::collect::IntoIterator>::into_iter@GOTPCREL]\n mov dword ptr [rsp + 8], eax\n mov dword ptr [rsp + 12], edx\n mov eax, dword ptr [rsp + 12]\n mov ecx, dword ptr [rsp + 8]\n mov dword ptr [rsp + 56], ecx\n mov dword ptr [rsp + 60], eax\n.LBB10_8:\n mov rax, qword ptr [rip + core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next@GOTPCREL]\n lea rdi, [rsp + 56]\n call rax\n mov dword ptr [rsp + 68], edx\n mov dword ptr [rsp + 64], eax\n mov eax, dword ptr [rsp + 64]\n test rax, rax\n je .LBB10_11\n jmp .LBB10_14\n.LBB10_14:\n jmp .LBB10_12\n ud2\n.LBB10_11:\n jmp .LBB10_2\n.LBB10_12:\n jmp .LBB10_8\n\n__rustc_debug_gdb_scripts_section__:\n .asciz "\\001gdb_load_rust_pretty_printers.py"\n\nDW.ref.rust_eh_personality:\n .quad rust_eh_personality\nRun Code Online (Sandbox Code Playgroud)\n\nexample::main:\n ret\nRun Code Online (Sandbox Code Playgroud)\n