Java中“if”和“if else”的不同表现

Jak*_*ały 5 java performance jvm jmh

我注意到if else/ 三元 ( condition ? a : b) 赋值比ifonly 语句中的条件赋值更快。我在不同的 JDK 上执行了 JMH 基准测试,但我将专注于 JDK 12。

(操作数/秒,越高越好) JMH 基准

源代码:

@State(Scope.Benchmark)
public class FindMaxBenchmark {
    public static int SIZE = 1_000_000;

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public static void findMax_if(Blackhole bh, Mock mock) {
        int result = Integer.MIN_VALUE;
        int[] data = mock.tab;

        for (int i = 0; i < data.length; i++) {
            if (data[i] > result) {
                result = data[i];
            }
        }

        bh.consume(result);
    }

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public static void findMax_if_else(Blackhole bh, Mock mock) {
        int result = Integer.MIN_VALUE;
        int[] data = mock.tab;

        for (int i = 0; i < data.length; i++) {
            if (data[i] > result) {
                result = data[i];
            } else {
                result = result;
            }
        }

        bh.consume(result);
    }

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public static void findMax_ternary(Blackhole bh, Mock mock) {
        int result = Integer.MIN_VALUE;
        int[] data = mock.tab;

        for (int i = 0; i < data.length; i++) {
            result = data[i] > result ? data[i] : result;
        }

        bh.consume(result);
    }

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public static void findMax_intrinsicMax(Blackhole bh, Mock mock) {
        int result = Integer.MIN_VALUE;
        int[] data = mock.tab;

        for (int i = 0; i < data.length; i++) {
            result = Math.max(data[i], result);
        }

        bh.consume(result);
    }

    @State(Scope.Thread)
    public static class Mock {
        private int[] tab = new int[SIZE];

        public int[] getTab() {
            return tab;
        }

        @Setup(Level.Iteration)
        public void setup() {
            Random r = new Random();
            this.tab = r.ints(SIZE).toArray();
        }
    }
}
Run Code Online (Sandbox Code Playgroud)

findMax_if_else perfasm 输出(三元几乎相同):

c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if_else, version 493 (165 bytes)

                             0x00007fc7a8671a6b: cmp    r8d,ebp
         ?                   0x00007fc7a8671a6e: jae    0x00007fc7a8671b3d
         ?                   0x00007fc7a8671a74: mov    edx,DWORD PTR [r9+0x10]        ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ?                                                                             ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
         ?                   0x00007fc7a8671a78: cmp    edx,0x80000000
         ??                  0x00007fc7a8671a7e: jg     0x00007fc7a8671a85             ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
         ??                                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
         ??                  0x00007fc7a8671a80: mov    edx,0x80000000                 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ??                                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
         ??                  0x00007fc7a8671a85: mov    ebx,ebp
  0.02%  ?                   0x00007fc7a8671a87: add    ebx,0xfffffffd
         ?                   0x00007fc7a8671a8a: cmp    r8d,ebx
         ?                   0x00007fc7a8671a8d: cmovl  ebx,r11d
         ?                   0x00007fc7a8671a91: mov    r8d,0x1
  0.00%  ?                   0x00007fc7a8671a97: cmp    ebx,0x1
         ? ?                 0x00007fc7a8671a9a: jle    0x00007fc7a8671b00
         ? ?                 0x00007fc7a8671a9c: mov    rdi,r9                         ;*goto {reexecute=0 rethrow=0 return_oop=0}
         ? ?                                                                           ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
         ? ??                0x00007fc7a8671a9f: jmp    0x00007fc7a8671ab9
  0.01%  ? ??     ?          0x00007fc7a8671aa1: mov    edx,ecx
         ? ??     ?          0x00007fc7a8671aa3: nop    DWORD PTR [rax+0x0]
         ? ??     ?          0x00007fc7a8671aaa: nop    WORD PTR [rax+rax*1+0x0]
  8.06%  ? ??    ??          0x00007fc7a8671ab0: add    r8d,0x4                        ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ??    ??                                                                    ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
 11.38%  ? ??    ??          0x00007fc7a8671ab4: cmp    r8d,ebx
 13.63%  ? ???   ??          0x00007fc7a8671ab7: jge    0x00007fc7a8671af1             ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ? ???   ??                                                                    ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
  3.02%  ? ???   ??   ?      0x00007fc7a8671ab9: mov    r11d,DWORD PTR [r9+r8*4+0x10]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ?   ??   ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
  8.53%  ? ? ?   ??   ?      0x00007fc7a8671abe: cmp    r11d,edx
  4.54%  ? ? ??  ??   ?      0x00007fc7a8671ac1: jg     0x00007fc7a8671ae2             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??  ??   ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
  4.96%  ? ? ??  ???  ?      0x00007fc7a8671ac3: mov    r11d,DWORD PTR [r9+r8*4+0x14]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??  ???  ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
  3.73%  ? ? ??  ???  ?      0x00007fc7a8671ac8: cmp    r11d,edx
  9.19%  ? ? ??? ???  ?      0x00007fc7a8671acb: jg     0x00007fc7a8671ae7             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??? ???  ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
  3.70%  ? ? ??? ???? ?      0x00007fc7a8671acd: mov    r11d,DWORD PTR [r9+r8*4+0x18]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??? ???? ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
  4.96%  ? ? ??? ???? ?      0x00007fc7a8671ad2: cmp    r11d,edx
  4.45%  ? ? ???????? ?      0x00007fc7a8671ad5: jg     0x00007fc7a8671aec             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ???????? ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
  8.55%  ? ? ??????????      0x00007fc7a8671ad7: mov    ecx,DWORD PTR [r9+r8*4+0x1c]   ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??????????                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
  6.11%  ? ? ??????????      0x00007fc7a8671adc: cmp    ecx,edx
  2.48%  ? ? ??????????      0x00007fc7a8671ade: jle    0x00007fc7a8671ab0             ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
         ? ? ???? ?????                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
         ? ? ???? ?????      0x00007fc7a8671ae0: jmp    0x00007fc7a8671aa1
         ? ? ????  ????      0x00007fc7a8671ae2: mov    edx,r11d
  0.00%  ? ? ? ??  ????      0x00007fc7a8671ae5: jmp    0x00007fc7a8671ac3
  0.00%  ? ? ? ??   ???      0x00007fc7a8671ae7: mov    edx,r11d
  0.00%  ? ? ?  ?   ???      0x00007fc7a8671aea: jmp    0x00007fc7a8671acd
  0.00%  ? ? ?  ?    ??      0x00007fc7a8671aec: mov    edx,r11d
  0.00%  ? ? ?       ??      0x00007fc7a8671aef: jmp    0x00007fc7a8671ad7
         ? ? ?        ?      0x00007fc7a8671af1: mov    r11,QWORD PTR [r15+0x108]      ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
         ? ?          ?                                                                ;*goto {reexecute=1 rethrow=0 return_oop=0}
         ? ?          ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
         ? ?          ?      0x00007fc7a8671af8: test   DWORD PTR [r11],eax            ;*goto {reexecute=0 rethrow=0 return_oop=0}
         ? ?          ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@39 (line 33)
         ? ?          ?                                                                ;   {poll}
         ? ?          ?      0x00007fc7a8671afb: cmp    r8d,ebx
  0.00%  ? ?          ?      0x00007fc7a8671afe: jl     0x00007fc7a8671ab9
         ? ?                 0x00007fc7a8671b00: cmp    r8d,ebp
  0.00%  ?             ?     0x00007fc7a8671b03: jge    0x00007fc7a8671b1a
         ?             ?     0x00007fc7a8671b05: data16 xchg ax,ax                     ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ?             ?                                                               ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
         ?             ? ?   0x00007fc7a8671b08: mov    r11d,DWORD PTR [r9+r8*4+0x10]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ?             ? ?                                                             ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
  0.01%  ?             ? ?   0x00007fc7a8671b0d: cmp    r11d,edx
         ?             ???   0x00007fc7a8671b10: jg     0x00007fc7a8671b38
         ?             ????  0x00007fc7a8671b12: inc    r8d                            ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ?             ????                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
         ?             ????  0x00007fc7a8671b15: cmp    r8d,ebp
         ?             ????  0x00007fc7a8671b18: jl     0x00007fc7a8671b08             ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
         ?             ?? ?                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
         ?             ?? ?  0x00007fc7a8671b1a: test   r10,r10
  0.00%  ?              ? ?  0x00007fc7a8671b1d: je     0x00007fc7a8671b52
         ?              ? ?  0x00007fc7a8671b1f: mov    rsi,r10
         ?              ? ?  0x00007fc7a8671b22: nop
         ?              ? ?  0x00007fc7a8671b23: call   0x00007fc7a8671ba0             ; ImmutableOopMap{}
         ?              ? ?                                                            ;*invokevirtual consume {reexecute=0 rethrow=0 return_oop=0}
         ?              ? ?                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if_else@44 (line 41)
         ?              ? ?                                                            ;   {optimized virtual_call}
         ?              ? ?  0x00007fc7a8671b28: add    rsp,0x20
  0.01%  ?              ? ?  0x00007fc7a8671b2c: pop    rbp
         ?              ? ?  0x00007fc7a8671b2d: mov    r10,QWORD PTR [r15+0x108]
         ?              ? ?  0x00007fc7a8671b34: test   DWORD PTR [r10],eax            ;   {poll_return}
         ?              ? ?  0x00007fc7a8671b37: ret
         ?              ? ?  0x00007fc7a8671b38: mov    edx,r11d
         ?                ?  0x00007fc7a8671b3b: jmp    0x00007fc7a8671b12
         ?                   0x00007fc7a8671b3d: mov    esi,0xffffff7e
                             0x00007fc7a8671b42: mov    QWORD PTR [rsp],r10
                             0x00007fc7a8671b46: mov    DWORD PTR [rsp+0x8],r9d
                             0x00007fc7a8671b4b: call   0x00007fc7a0ba3d00             ; ImmutableOopMap{[0]=Oop [8]=NarrowOop }
                                                                                       ;*if_icmpge {reexecute=1 rethrow=0 return_oop=0}
Run Code Online (Sandbox Code Playgroud)

findMax_if 性能输出:

c2, level 4, codes.dbg.FindMaxBenchmark::findMax_if, version 480 (165 bytes)

                              0x00007f34cc66e7eb: cmp    r8d,ebp
         ?                    0x00007f34cc66e7ee: jae    0x00007f34cc66e8c4
         ?                    0x00007f34cc66e7f4: mov    edx,DWORD PTR [r9+0x10]        ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ?                                                                              ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
         ?                    0x00007f34cc66e7f8: cmp    edx,0x80000000
         ??                   0x00007f34cc66e7fe: jg     0x00007f34cc66e805             ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
         ??                                                                             ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
         ??                   0x00007f34cc66e800: mov    edx,0x80000000                 ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ??                                                                             ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
         ??                   0x00007f34cc66e805: mov    ebx,ebp
  0.01%  ?                    0x00007f34cc66e807: add    ebx,0xfffffffd
         ?                    0x00007f34cc66e80a: cmp    r8d,ebx
         ?                    0x00007f34cc66e80d: cmovl  ebx,r11d
         ?                    0x00007f34cc66e811: mov    r8d,0x1
         ?                    0x00007f34cc66e817: cmp    ebx,0x1
         ? ?                  0x00007f34cc66e81a: jle    0x00007f34cc66e880
         ? ?                  0x00007f34cc66e81c: mov    rdi,r9                         ;*goto {reexecute=0 rethrow=0 return_oop=0}
         ? ?                                                                            ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
         ? ??                 0x00007f34cc66e81f: jmp    0x00007f34cc66e839
         ? ??    ?            0x00007f34cc66e821: mov    edx,ecx
  0.00%  ? ??    ?            0x00007f34cc66e823: nop    DWORD PTR [rax+0x0]
         ? ??    ?            0x00007f34cc66e82a: nop    WORD PTR [rax+rax*1+0x0]
  0.89%  ? ??    ??           0x00007f34cc66e830: add    r8d,0x4                        ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ??    ??                                                                     ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
 12.36%  ? ??    ??           0x00007f34cc66e834: cmp    r8d,ebx
  0.11%  ? ???   ??           0x00007f34cc66e837: jge    0x00007f34cc66e871             ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ? ???   ??                                                                     ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
  9.94%  ? ???   ??   ?       0x00007f34cc66e839: mov    r11d,DWORD PTR [r9+r8*4+0x10]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ?   ??   ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
  0.11%  ? ? ?   ??   ?       0x00007f34cc66e83e: cmp    r11d,edx
 10.05%  ? ? ??  ??   ?       0x00007f34cc66e841: jg     0x00007f34cc66e862             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??  ??   ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
  0.13%  ? ? ??  ???  ?       0x00007f34cc66e843: mov    r11d,DWORD PTR [r9+r8*4+0x14]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??  ???  ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
  9.84%  ? ? ??  ???  ?       0x00007f34cc66e848: cmp    r11d,edx
  0.11%  ? ? ??? ???  ?       0x00007f34cc66e84b: jg     0x00007f34cc66e867             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??? ???  ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
 10.02%  ? ? ??? ???? ?       0x00007f34cc66e84d: mov    r11d,DWORD PTR [r9+r8*4+0x18]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??? ???? ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
  0.33%  ? ? ??? ???? ?       0x00007f34cc66e852: cmp    r11d,edx
 23.63%  ? ? ???????? ?       0x00007f34cc66e855: jg     0x00007f34cc66e86c             ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ? ???????? ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
  0.13%  ? ? ??????????       0x00007f34cc66e857: mov    ecx,DWORD PTR [r9+r8*4+0x1c]   ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ? ? ??????????                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
  9.89%  ? ? ??????????       0x00007f34cc66e85c: cmp    ecx,edx
  0.11%  ? ? ??????????       0x00007f34cc66e85e: jg     0x00007f34cc66e821             ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
         ? ? ???? ?????                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
  9.71%  ? ? ???? ?????       0x00007f34cc66e860: jmp    0x00007f34cc66e830
         ? ? ????  ????       0x00007f34cc66e862: mov    edx,r11d
  0.00%  ? ? ? ??  ????       0x00007f34cc66e865: jmp    0x00007f34cc66e843
         ? ? ? ??   ???       0x00007f34cc66e867: mov    edx,r11d
  0.00%  ? ? ?  ?   ???       0x00007f34cc66e86a: jmp    0x00007f34cc66e84d
         ? ? ?  ?    ??       0x00007f34cc66e86c: mov    edx,r11d
  0.00%  ? ? ?       ??       0x00007f34cc66e86f: jmp    0x00007f34cc66e857
         ? ? ?        ?       0x00007f34cc66e871: mov    r11,QWORD PTR [r15+0x108]      ; ImmutableOopMap{r10=Oop r9=NarrowOop rdi=Oop }
         ? ?          ?                                                                 ;*goto {reexecute=1 rethrow=0 return_oop=0}
         ? ?          ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
  0.00%  ? ?          ?       0x00007f34cc66e878: test   DWORD PTR [r11],eax            ;*goto {reexecute=0 rethrow=0 return_oop=0}
         ? ?          ?                                                                 ; - codes.dbg.FindMaxBenchmark::findMax_if@34 (line 18)
         ? ?          ?                                                                 ;   {poll}
         ? ?          ?       0x00007f34cc66e87b: cmp    r8d,ebx
         ? ?          ?       0x00007f34cc66e87e: jl     0x00007f34cc66e839
         ? ?                  0x00007f34cc66e880: cmp    r8d,ebp
  0.00%  ?             ?      0x00007f34cc66e883: jge    0x00007f34cc66e89a
         ?             ?      0x00007f34cc66e885: data16 xchg ax,ax                     ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ?             ?                                                                ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
  0.00%  ?             ? ?    0x00007f34cc66e888: mov    r11d,DWORD PTR [r9+r8*4+0x10]  ;*iaload {reexecute=0 rethrow=0 return_oop=0}
         ?             ? ?                                                              ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
  0.01%  ?             ? ?    0x00007f34cc66e88d: cmp    r11d,edx
         ?             ???  

apa*_*gin 6

首先,为了尽量减少无关 ASM 代码的数量并简化分析,让我们添加以下 JVM 选项:

  • -XX:LoopUnrollLimit=0 - 关闭循环展开;
  • -XX:-UseCountedLoopSafepoints - 从循环中消除安全点轮询。

现在支持的性能差异if_else将更大,而结果组装将简单得多。这是两个基准测试的循环体。

findMax_if

         ?     0x0000029707af78f5: jmp     29707af7908h
         ? ?   0x0000029707af78f7: mov     r8d,ecx
         ? ?   0x0000029707af78fa: nop     word ptr [rax+rax+0h]
  0,66%  ? ??  0x0000029707af7900: inc     r9d               ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ??                                                ; - codes.dbg.FindMaxBenchmark::findMax_if@31 (line 18)
  1,02%  ? ??  0x0000029707af7903: cmp     r9d,r10d
         ????  0x0000029707af7906: jnl     29707af7914h      ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ????                                                ; - codes.dbg.FindMaxBenchmark::findMax_if@18 (line 19)
  2,06%  ????  0x0000029707af7908: mov     ecx,dword ptr [r11+r9*4+10h]
          ???                                                ;*iaload {reexecute=0 rethrow=0 return_oop=0}
          ???                                                ; - codes.dbg.FindMaxBenchmark::findMax_if@21 (line 19)
 50,86%   ???  0x0000029707af790d: cmp     ecx,r8d
  0,02%   ???  0x0000029707af7910: jnle    29707af78f7h      ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
          ? ?                                                ; - codes.dbg.FindMaxBenchmark::findMax_if@23 (line 19)
 41,01%   ? ?  0x0000029707af7912: jmp     29707af7900h      ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
          ?                                                  ; - codes.dbg.FindMaxBenchmark::findMax_if@15 (line 18)
          ?    0x0000029707af7914: test    rbx,rbx
Run Code Online (Sandbox Code Playgroud)

findMax_if_else

         ?     0x00000137d24d4b75: jmp     137d24d4b88h
         ?  ?  0x00000137d24d4b77: mov     r8d,ecx
         ?  ?  0x00000137d24d4b7a: nop     word ptr [rax+rax+0h]
 72,63%  ? ??  0x00000137d24d4b80: inc     r9d               ;*iinc {reexecute=0 rethrow=0 return_oop=0}
         ? ??                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@36 (line 33)
  0,05%  ? ??  0x00000137d24d4b83: cmp     r9d,r10d
  0,01%  ????  0x00000137d24d4b86: jnl     137d24d4b94h      ;*aload_3 {reexecute=0 rethrow=0 return_oop=0}
         ????                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@18 (line 34)
  6,47%  ????  0x00000137d24d4b88: mov     ecx,dword ptr [r11+r9*4+10h]
          ???                                                ;*iaload {reexecute=0 rethrow=0 return_oop=0}
          ???                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@21 (line 34)
 15,93%   ???  0x00000137d24d4b8d: cmp     ecx,r8d
  0,18%   ???  0x00000137d24d4b90: jle     137d24d4b80h      ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
          ? ?                                                ; - codes.dbg.FindMaxBenchmark::findMax_if_else@23 (line 34)
  0,01%   ? ?  0x00000137d24d4b92: jmp     137d24d4b77h      ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
          ?                                                  ; - codes.dbg.FindMaxBenchmark::findMax_if_else@15 (line 33)
          ?    0x00000137d24d4b94: test    rbx,rbx
Run Code Online (Sandbox Code Playgroud)

这与您的发现一致:两个编译之间的唯一区别是反向跳转条件:jnlevs jle. . 为什么jnle变体更慢呢?

如果我们仔细查看基准代码,我们会意识到当前最大值发生变化的点很少发生。平均而言,data[i] > result整个循环中只有 14 次为真。这意味着,jnle分支仅占用 0.001% 的时间,其余 99.999% 的时间执行通过下一jmp条指令。

相反,jle第二个变体中的指令占用了 99.999% 的时间,并且执行几乎从未到达以下jmp. 因此,第一个循环每次迭代退出 7 条指令,而第二个循环仅退出 6 条指令。

JMH 具有内置perfnorm分析器(可在 Linux 上使用),可使用 CPU 性能计数器统计信息补充基准测试结果。让我们用-prof perfnorm.

Benchmark                                                Mode  Cnt        Score    Error  Units
FindMaxBenchmark.findMax_if                             thrpt   10     1447.576 ±  8.854  ops/s
FindMaxBenchmark.findMax_if:CPI                         thrpt             0.335            #/op
FindMaxBenchmark.findMax_if:L1-dcache-load-misses       thrpt         63971.361            #/op
FindMaxBenchmark.findMax_if:L1-dcache-loads             thrpt       1014974.522            #/op
FindMaxBenchmark.findMax_if:L1-dcache-stores            thrpt          6105.121            #/op
FindMaxBenchmark.findMax_if:L1-icache-load-misses       thrpt          1641.074            #/op
FindMaxBenchmark.findMax_if:branch-misses               thrpt           146.305            #/op
FindMaxBenchmark.findMax_if:branches                    thrpt       3006620.048            #/op
FindMaxBenchmark.findMax_if:cycles                      thrpt       2358093.526            #/op
FindMaxBenchmark.findMax_if:dTLB-load-misses            thrpt          1085.740            #/op
FindMaxBenchmark.findMax_if:dTLB-loads                  thrpt       1012739.362            #/op
FindMaxBenchmark.findMax_if:dTLB-store-misses           thrpt            21.985            #/op
FindMaxBenchmark.findMax_if:dTLB-stores                 thrpt          6146.243            #/op
FindMaxBenchmark.findMax_if:iTLB-load-misses            thrpt           139.741            #/op
FindMaxBenchmark.findMax_if:iTLB-loads                  thrpt            42.031            #/op
FindMaxBenchmark.findMax_if:instructions                thrpt       7039394.622            #/op
FindMaxBenchmark.findMax_if_else                        thrpt   10     2472.400 ± 36.958  ops/s
FindMaxBenchmark.findMax_if_else:CPI                    thrpt             0.229            #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-load-misses  thrpt         63353.481            #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-loads        thrpt       1007856.753            #/op
FindMaxBenchmark.findMax_if_else:L1-dcache-stores       thrpt          3696.805            #/op
FindMaxBenchmark.findMax_if_else:L1-icache-load-misses  thrpt          1182.253            #/op
FindMaxBenchmark.findMax_if_else:branch-misses          thrpt            72.334            #/op
FindMaxBenchmark.findMax_if_else:branches               thrpt       2000460.845            #/op
FindMaxBenchmark.findMax_if_else:cycles                 thrpt       1380927.546            #/op
FindMaxBenchmark.findMax_if_else:dTLB-load-misses       thrpt           845.629            #/op
FindMaxBenchmark.findMax_if_else:dTLB-loads             thrpt       1006135.685            #/op
FindMaxBenchmark.findMax_if_else:dTLB-store-misses      thrpt            13.336            #/op
FindMaxBenchmark.findMax_if_else:dTLB-stores            thrpt          3545.950            #/op
FindMaxBenchmark.findMax_if_else:iTLB-load-misses       thrpt            80.233            #/op
FindMaxBenchmark.findMax_if_else:iTLB-loads             thrpt            19.009            #/op
FindMaxBenchmark.findMax_if_else:instructions           thrpt       6018937.376            #/op
Run Code Online (Sandbox Code Playgroud)

Perf 计数器确认findMax_if执行具有 3M 分支的 7M 指令,而findMax_if_else执行具有 2M 分支的 6M 指令。我想现在很清楚差异的来源,那么其他问题呢?

添加包含不改变任何内容的代码的 else 语句是正常的

我不这么认为。至少因为这看起来违反直觉,并且使代码更难阅读和理解。冗余代码很好地反转了分支条件只是运气问题。用排序的随机数组替换你的随机数组,这样data[i] > result大部分都是正确的,然后findMax_if将成为最快的选择。

如果简单的 if else 语句具有更高的吞吐量,那么使用 Math.max 有什么意义?

同样,这并不总是正确的。这在很大程度上取决于数据的性质。当分支易于预测时,if语句的性能更好。但是一旦分支预测器开始经常失败,性能就会急剧下降。Math.max,作为JVM的内在方法,被转译为无分支cmov指令,无论数据分布如何,都具有性能稳定的优点。

这是一个示例数据集,其Math.max性能大大优于所有其他选项:

public void setup() {
    Random r = new Random();
    this.tab = r.ints(SIZE).sorted().toArray();
    for (int i = 0; i < tab.length; i += ThreadLocalRandom.current().nextInt(3)) {
        tab[i] = 0;
    }
}
Run Code Online (Sandbox Code Playgroud)