Jak*_*ały 5 java performance jvm jmh
我注意到if else/ 三元 ( condition ? a : b) 赋值比ifonly 语句中的条件赋值更快。我在不同的 JDK 上执行了 JMH 基准测试,但我将专注于 JDK 12。
源代码:
@State(Scope.Benchmark)
public class FindMaxBenchmark {
public static int SIZE = 1_000_000;
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_if(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;
for (int i = 0; i < data.length; i++) {
if (data[i] > result) {
result = data[i];
}
}
bh.consume(result);
}
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_if_else(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;
for (int i = 0; i < data.length; i++) {
if (data[i] > result) {
result = data[i];
} else {
result = result;
}
}
bh.consume(result);
}
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_ternary(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;
for (int i = 0; i < data.length; i++) {
result = data[i] > result ? data[i] : result;
}
bh.consume(result);
}
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public static void findMax_intrinsicMax(Blackhole bh, Mock mock) {
int result = Integer.MIN_VALUE;
int[] data = mock.tab;
for (int i = 0; i < data.length; i++) {
result = Math.max(data[i], result);
}
bh.consume(result);
}
@State(Scope.Thread)
public static class Mock {
private int[] tab = new int[SIZE];
public int[] getTab() {
return tab;
}
@Setup(Level.Iteration)
public void setup() {
Random r = new Random();
this.tab = r.ints(SIZE).toArray();
}
}
}
Run Code Online (Sandbox Code Playgroud)
findMax_if_else perfasm 输出(三元几乎相同):
c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if_else, version 493 (165 bytes)
0x00007fc7a8671a6b: cmp r8d,ebp
? 0x00007fc7a8671a6e: jae 0x00007fc7a8671b3d
? 0x00007fc7a8671a74: mov edx,DWORD PTR [r9+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
? 0x00007fc7a8671a78: cmp edx,0x80000000
?? 0x00007fc7a8671a7e: jg 0x00007fc7a8671a85 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
?? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
?? 0x00007fc7a8671a80: mov edx,0x80000000 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
?? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
?? 0x00007fc7a8671a85: mov ebx,ebp
0.02% ? 0x00007fc7a8671a87: add ebx,0xfffffffd
? 0x00007fc7a8671a8a: cmp r8d,ebx
? 0x00007fc7a8671a8d: cmovl ebx,r11d
? 0x00007fc7a8671a91: mov r8d,0x1
0.00% ? 0x00007fc7a8671a97: cmp ebx,0x1
? ? 0x00007fc7a8671a9a: jle 0x00007fc7a8671b00
? ? 0x00007fc7a8671a9c: mov rdi,r9 ;*goto {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
? ?? 0x00007fc7a8671a9f: jmp 0x00007fc7a8671ab9
0.01% ? ?? ? 0x00007fc7a8671aa1: mov edx,ecx
? ?? ? 0x00007fc7a8671aa3: nop DWORD PTR [rax+0x0]
? ?? ? 0x00007fc7a8671aaa: nop WORD PTR [rax+rax*1+0x0]
8.06% ? ?? ?? 0x00007fc7a8671ab0: add r8d,0x4 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ?? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
11.38% ? ?? ?? 0x00007fc7a8671ab4: cmp r8d,ebx
13.63% ? ??? ?? 0x00007fc7a8671ab7: jge 0x00007fc7a8671af1 ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
? ??? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
3.02% ? ??? ?? ? 0x00007fc7a8671ab9: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ? ?? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
8.53% ? ? ? ?? ? 0x00007fc7a8671abe: cmp r11d,edx
4.54% ? ? ?? ?? ? 0x00007fc7a8671ac1: jg 0x00007fc7a8671ae2 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ?? ?? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
4.96% ? ? ?? ??? ? 0x00007fc7a8671ac3: mov r11d,DWORD PTR [r9+r8*4+0x14] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ?? ??? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
3.73% ? ? ?? ??? ? 0x00007fc7a8671ac8: cmp r11d,edx
9.19% ? ? ??? ??? ? 0x00007fc7a8671acb: jg 0x00007fc7a8671ae7 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ??? ??? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
3.70% ? ? ??? ???? ? 0x00007fc7a8671acd: mov r11d,DWORD PTR [r9+r8*4+0x18] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ??? ???? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
4.96% ? ? ??? ???? ? 0x00007fc7a8671ad2: cmp r11d,edx
4.45% ? ? ???????? ? 0x00007fc7a8671ad5: jg 0x00007fc7a8671aec ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ???????? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
8.55% ? ? ?????????? 0x00007fc7a8671ad7: mov ecx,DWORD PTR [r9+r8*4+0x1c] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ?????????? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
6.11% ? ? ?????????? 0x00007fc7a8671adc: cmp ecx,edx
2.48% ? ? ?????????? 0x00007fc7a8671ade: jle 0x00007fc7a8671ab0 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
? ? ???? ????? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
? ? ???? ????? 0x00007fc7a8671ae0: jmp 0x00007fc7a8671aa1
? ? ???? ???? 0x00007fc7a8671ae2: mov edx,r11d
0.00% ? ? ? ?? ???? 0x00007fc7a8671ae5: jmp 0x00007fc7a8671ac3
0.00% ? ? ? ?? ??? 0x00007fc7a8671ae7: mov edx,r11d
0.00% ? ? ? ? ??? 0x00007fc7a8671aea: jmp 0x00007fc7a8671acd
0.00% ? ? ? ? ?? 0x00007fc7a8671aec: mov edx,r11d
0.00% ? ? ? ?? 0x00007fc7a8671aef: jmp 0x00007fc7a8671ad7
? ? ? ? 0x00007fc7a8671af1: mov r11,QWORD PTR [r15+0x108] ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
? ? ? ;*goto {reexecute=1 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
? ? ? 0x00007fc7a8671af8: test DWORD PTR [r11],eax ;*goto {reexecute=0 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
? ? ? ; {poll}
? ? ? 0x00007fc7a8671afb: cmp r8d,ebx
0.00% ? ? ? 0x00007fc7a8671afe: jl 0x00007fc7a8671ab9
? ? 0x00007fc7a8671b00: cmp r8d,ebp
0.00% ? ? 0x00007fc7a8671b03: jge 0x00007fc7a8671b1a
? ? 0x00007fc7a8671b05: data16 xchg ax,ax ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
? ? ? 0x00007fc7a8671b08: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
0.01% ? ? ? 0x00007fc7a8671b0d: cmp r11d,edx
? ??? 0x00007fc7a8671b10: jg 0x00007fc7a8671b38
? ???? 0x00007fc7a8671b12: inc r8d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ???? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
? ???? 0x00007fc7a8671b15: cmp r8d,ebp
? ???? 0x00007fc7a8671b18: jl 0x00007fc7a8671b08 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
? ?? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
? ?? ? 0x00007fc7a8671b1a: test r10,r10
0.00% ? ? ? 0x00007fc7a8671b1d: je 0x00007fc7a8671b52
? ? ? 0x00007fc7a8671b1f: mov rsi,r10
? ? ? 0x00007fc7a8671b22: nop
? ? ? 0x00007fc7a8671b23: call 0x00007fc7a8671ba0 ; ImmutableOopMap{}
? ? ? ;*invokevirtual consume {reexecute=0 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@44 (line 41)
? ? ? ; {optimized virtual_call}
? ? ? 0x00007fc7a8671b28: add rsp,0x20
0.01% ? ? ? 0x00007fc7a8671b2c: pop rbp
? ? ? 0x00007fc7a8671b2d: mov r10,QWORD PTR [r15+0x108]
? ? ? 0x00007fc7a8671b34: test DWORD PTR [r10],eax ; {poll_return}
? ? ? 0x00007fc7a8671b37: ret
? ? ? 0x00007fc7a8671b38: mov edx,r11d
? ? 0x00007fc7a8671b3b: jmp 0x00007fc7a8671b12
? 0x00007fc7a8671b3d: mov esi,0xffffff7e
0x00007fc7a8671b42: mov QWORD PTR [rsp],r10
0x00007fc7a8671b46: mov DWORD PTR [rsp+0x8],r9d
0x00007fc7a8671b4b: call 0x00007fc7a0ba3d00 ; ImmutableOopMap{[0]=Oop [8]=NarrowOop }
;*if_icmpge {reexecute=1 rethrow=0 return_oop=0}
Run Code Online (Sandbox Code Playgroud)
findMax_if 性能输出:
c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if, version 480 (165 bytes)
0x00007f34cc66e7eb: cmp r8d,ebp
? 0x00007f34cc66e7ee: jae 0x00007f34cc66e8c4
? 0x00007f34cc66e7f4: mov edx,DWORD PTR [r9+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
? 0x00007f34cc66e7f8: cmp edx,0x80000000
?? 0x00007f34cc66e7fe: jg 0x00007f34cc66e805 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
?? ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
?? 0x00007f34cc66e800: mov edx,0x80000000 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
?? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
?? 0x00007f34cc66e805: mov ebx,ebp
0.01% ? 0x00007f34cc66e807: add ebx,0xfffffffd
? 0x00007f34cc66e80a: cmp r8d,ebx
? 0x00007f34cc66e80d: cmovl ebx,r11d
? 0x00007f34cc66e811: mov r8d,0x1
? 0x00007f34cc66e817: cmp ebx,0x1
? ? 0x00007f34cc66e81a: jle 0x00007f34cc66e880
? ? 0x00007f34cc66e81c: mov rdi,r9 ;*goto {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
? ?? 0x00007f34cc66e81f: jmp 0x00007f34cc66e839
? ?? ? 0x00007f34cc66e821: mov edx,ecx
0.00% ? ?? ? 0x00007f34cc66e823: nop DWORD PTR [rax+0x0]
? ?? ? 0x00007f34cc66e82a: nop WORD PTR [rax+rax*1+0x0]
0.89% ? ?? ?? 0x00007f34cc66e830: add r8d,0x4 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ?? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
12.36% ? ?? ?? 0x00007f34cc66e834: cmp r8d,ebx
0.11% ? ??? ?? 0x00007f34cc66e837: jge 0x00007f34cc66e871 ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
? ??? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
9.94% ? ??? ?? ? 0x00007f34cc66e839: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ? ?? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.11% ? ? ? ?? ? 0x00007f34cc66e83e: cmp r11d,edx
10.05% ? ? ?? ?? ? 0x00007f34cc66e841: jg 0x00007f34cc66e862 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ?? ?? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
0.13% ? ? ?? ??? ? 0x00007f34cc66e843: mov r11d,DWORD PTR [r9+r8*4+0x14] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ?? ??? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
9.84% ? ? ?? ??? ? 0x00007f34cc66e848: cmp r11d,edx
0.11% ? ? ??? ??? ? 0x00007f34cc66e84b: jg 0x00007f34cc66e867 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ??? ??? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
10.02% ? ? ??? ???? ? 0x00007f34cc66e84d: mov r11d,DWORD PTR [r9+r8*4+0x18] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ??? ???? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.33% ? ? ??? ???? ? 0x00007f34cc66e852: cmp r11d,edx
23.63% ? ? ???????? ? 0x00007f34cc66e855: jg 0x00007f34cc66e86c ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ? ???????? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
0.13% ? ? ?????????? 0x00007f34cc66e857: mov ecx,DWORD PTR [r9+r8*4+0x1c] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ?????????? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
9.89% ? ? ?????????? 0x00007f34cc66e85c: cmp ecx,edx
0.11% ? ? ?????????? 0x00007f34cc66e85e: jg 0x00007f34cc66e821 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
? ? ???? ????? ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
9.71% ? ? ???? ????? 0x00007f34cc66e860: jmp 0x00007f34cc66e830
? ? ???? ???? 0x00007f34cc66e862: mov edx,r11d
0.00% ? ? ? ?? ???? 0x00007f34cc66e865: jmp 0x00007f34cc66e843
? ? ? ?? ??? 0x00007f34cc66e867: mov edx,r11d
0.00% ? ? ? ? ??? 0x00007f34cc66e86a: jmp 0x00007f34cc66e84d
? ? ? ? ?? 0x00007f34cc66e86c: mov edx,r11d
0.00% ? ? ? ?? 0x00007f34cc66e86f: jmp 0x00007f34cc66e857
? ? ? ? 0x00007f34cc66e871: mov r11,QWORD PTR [r15+0x108] ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
? ? ? ;*goto {reexecute=1 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
0.00% ? ? ? 0x00007f34cc66e878: test DWORD PTR [r11],eax ;*goto {reexecute=0 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
? ? ? ; {poll}
? ? ? 0x00007f34cc66e87b: cmp r8d,ebx
? ? ? 0x00007f34cc66e87e: jl 0x00007f34cc66e839
? ? 0x00007f34cc66e880: cmp r8d,ebp
0.00% ? ? 0x00007f34cc66e883: jge 0x00007f34cc66e89a
? ? 0x00007f34cc66e885: data16 xchg ax,ax ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
0.00% ? ? ? 0x00007f34cc66e888: mov r11d,DWORD PTR [r9+r8*4+0x10] ;*iaload {reexecute=0 rethrow=0 return_oop=0}
? ? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
0.01% ? ? ? 0x00007f34cc66e88d: cmp r11d,edx
? ???
首先,为了尽量减少无关 ASM 代码的数量并简化分析,让我们添加以下 JVM 选项:
-XX:LoopUnrollLimit=0 - 关闭循环展开;-XX:-UseCountedLoopSafepoints - 从循环中消除安全点轮询。现在支持的性能差异if_else将更大,而结果组装将简单得多。这是两个基准测试的循环体。
findMax_if
? 0x0000029707af78f5: jmp 29707af7908h
? ? 0x0000029707af78f7: mov r8d,ecx
? ? 0x0000029707af78fa: nop word ptr [rax+rax+0h]
0,66% ? ?? 0x0000029707af7900: inc r9d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
1,02% ? ?? 0x0000029707af7903: cmp r9d,r10d
???? 0x0000029707af7906: jnl 29707af7914h ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
???? ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
2,06% ???? 0x0000029707af7908: mov ecx,dword ptr [r11+r9*4+10h]
??? ;*iaload {reexecute=0 rethrow=0 return_oop=0}
??? ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
50,86% ??? 0x0000029707af790d: cmp ecx,r8d
0,02% ??? 0x0000029707af7910: jnle 29707af78f7h ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
41,01% ? ? 0x0000029707af7912: jmp 29707af7900h ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
? ; - codes.dbg.FindMaxBenchmark::findMax_if@15 (line 18)
? 0x0000029707af7914: test rbx,rbx
Run Code Online (Sandbox Code Playgroud)
findMax_if_else
? 0x00000137d24d4b75: jmp 137d24d4b88h
? ? 0x00000137d24d4b77: mov r8d,ecx
? ? 0x00000137d24d4b7a: nop word ptr [rax+rax+0h]
72,63% ? ?? 0x00000137d24d4b80: inc r9d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
? ?? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
0,05% ? ?? 0x00000137d24d4b83: cmp r9d,r10d
0,01% ???? 0x00000137d24d4b86: jnl 137d24d4b94h ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
???? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
6,47% ???? 0x00000137d24d4b88: mov ecx,dword ptr [r11+r9*4+10h]
??? ;*iaload {reexecute=0 rethrow=0 return_oop=0}
??? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
15,93% ??? 0x00000137d24d4b8d: cmp ecx,r8d
0,18% ??? 0x00000137d24d4b90: jle 137d24d4b80h ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
? ? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
0,01% ? ? 0x00000137d24d4b92: jmp 137d24d4b77h ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
? ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
? 0x00000137d24d4b94: test rbx,rbx
Run Code Online (Sandbox Code Playgroud)
这与您的发现一致:两个编译之间的唯一区别是反向跳转条件:jnlevs jle. . 为什么jnle变体更慢呢?
如果我们仔细查看基准代码,我们会意识到当前最大值发生变化的点很少发生。平均而言,data[i] > result整个循环中只有 14 次为真。这意味着,jnle分支仅占用 0.001% 的时间,其余 99.999% 的时间执行通过下一jmp条指令。
相反,jle第二个变体中的指令占用了 99.999% 的时间,并且执行几乎从未到达以下jmp. 因此,第一个循环每次迭代退出 7 条指令,而第二个循环仅退出 6 条指令。
JMH 具有内置perfnorm分析器(可在 Linux 上使用),可使用 CPU 性能计数器统计信息补充基准测试结果。让我们用-prof perfnorm.
Benchmark Mode Cnt Score Error Units
FindMaxBenchmark.findMax_if thrpt 10 1447.576 ± 8.854 ops/s
FindMaxBenchmark.findMax_if:CPI thrpt 0.335 #/op
FindMaxBenchmark.findMax_if:L1-dcache-load-misses thrpt 63971.361 #/op
FindMaxBenchmark.findMax_if:L1-dcache-loads thrpt 1014974.522 #/op
FindMaxBenchmark.findMax_if:L1-dcache-stores thrpt 6105.121 #/op
FindMaxBenchmark.findMax_if:L1-icache-load-misses thrpt 1641.074 #/op
FindMaxBenchmark.findMax_if:branch-misses thrpt 146.305 #/op
FindMaxBenchmark.findMax_if:branches thrpt 3006620.048 #/op
FindMaxBenchmark.findMax_if:cycles thrpt 2358093.526 #/op
FindMaxBenchmark.findMax_if:dTLB-load-misses thrpt 1085.740 #/op
FindMaxBenchmark.findMax_if:dTLB-loads thrpt 1012739.362 #/op
FindMaxBenchmark.findMax_if:dTLB-store-misses thrpt 21.985 #/op
FindMaxBenchmark.findMax_if:dTLB-stores thrpt 6146.243 #/op
FindMaxBenchmark.findMax_if:iTLB-load-misses thrpt 139.741 #/op
FindMaxBenchmark.findMax_if:iTLB-loads thrpt 42.031 #/op
FindMaxBenchmark.findMax_if:instructions thrpt 7039394.622 #/op
FindMaxBenchmark.findMax_if_else thrpt 10 2472.400 ± 36.958 ops/s
FindMaxBenchmark.findMax_if_else:CPI thrpt 0.229 #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-load-misses thrpt 63353.481 #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-loads thrpt 1007856.753 #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-stores thrpt 3696.805 #/op
FindMaxBenchmark.findMax_if_else:L1-icache-load-misses thrpt 1182.253 #/op
FindMaxBenchmark.findMax_if_else:branch-misses thrpt 72.334 #/op
FindMaxBenchmark.findMax_if_else:branches thrpt 2000460.845 #/op
FindMaxBenchmark.findMax_if_else:cycles thrpt 1380927.546 #/op
FindMaxBenchmark.findMax_if_else:dTLB-load-misses thrpt 845.629 #/op
FindMaxBenchmark.findMax_if_else:dTLB-loads thrpt 1006135.685 #/op
FindMaxBenchmark.findMax_if_else:dTLB-store-misses thrpt 13.336 #/op
FindMaxBenchmark.findMax_if_else:dTLB-stores thrpt 3545.950 #/op
FindMaxBenchmark.findMax_if_else:iTLB-load-misses thrpt 80.233 #/op
FindMaxBenchmark.findMax_if_else:iTLB-loads thrpt 19.009 #/op
FindMaxBenchmark.findMax_if_else:instructions thrpt 6018937.376 #/op
Run Code Online (Sandbox Code Playgroud)
Perf 计数器确认findMax_if执行具有 3M 分支的 7M 指令,而findMax_if_else执行具有 2M 分支的 6M 指令。我想现在很清楚差异的来源,那么其他问题呢?
添加包含不改变任何内容的代码的 else 语句是正常的
我不这么认为。至少因为这看起来违反直觉,并且使代码更难阅读和理解。冗余代码很好地反转了分支条件只是运气问题。用排序的随机数组替换你的随机数组,这样data[i] > result大部分都是正确的,然后findMax_if将成为最快的选择。
如果简单的 if else 语句具有更高的吞吐量,那么使用 Math.max 有什么意义?
同样,这并不总是正确的。这在很大程度上取决于数据的性质。当分支易于预测时,if语句的性能更好。但是一旦分支预测器开始经常失败,性能就会急剧下降。Math.max,作为JVM的内在方法,被转译为无分支cmov指令,无论数据分布如何,都具有性能稳定的优点。
这是一个示例数据集,其Math.max性能大大优于所有其他选项:
public void setup() {
Random r = new Random();
this.tab = r.ints(SIZE).sorted().toArray();
for (int i = 0; i < tab.length; i += ThreadLocalRandom.current().nextInt(3)) {
tab[i] = 0;
}
}
Run Code Online (Sandbox Code Playgroud)