在程序集中找到第二大值的程序集

Question

在程序集中找到第二大值的程序集

我编写了一个汇编程序,它在数组中找到最大值,但现在我希望它能找到数组中的第二大数字.如何修改程序来执行此操作？

这是我写的程序,它确实有效.程序打印数组中的所有值,然后查找数组的最大值.现在我希望它找到第二大值.

 %include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0
   msg2  db "Min",0

   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln




    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish
    cmp ax, [si]        ; ax holds the max . So if ax is greater than si, then dont assign si to ax. 
    jge increment
    jmp newMax
newMax: 
    mov ax, [si] ; Otherwise we have a new a max

increment:   
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max

finish: ;clean up.
    leave ;pop bp
    ret   ;return

Run Code Online (Sandbox Code Playgroud)

我编辑了我的程序以跟踪第二个最大值,我收到的第二个最大值的结果是3.我期待程序输出5.我不知道为什么我得到错误的输出.以下是我对该计划的编辑.

%include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0


   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln

   PutInt bx
    nwln


    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    mov bx, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish          ;equals 0, then finished
    cmp ax, [si]         
    jg testSecondMax   ;So if ax is greater than si, then dont assign si to ax and check second max
    jmp newMax         ;otherwise go to new max


newMax: 
    mov ax, [si] ; save new max
    jmp increment ; and increment

testSecondMax:
    cmp bx, [si]
    jl secondMax
    jg increment

secondMax:
    mov bx, [si]
    jmp increment

increment:  
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max 



finish: ;clean up.
    leave ;pop bp
    ret   ;return

Run Code Online (Sandbox Code Playgroud)

Answer 1

Pet*_*des 5

我最终为此编写代码,因为它让我想知道如何有效地实现循环.如果你想自己解决你的作业,不要看我的代码,只看英文的要点.使用调试器单步执行代码.

以下是我将如何更改您的代码:

NASM样式:.noswap:在函数内使用本地标签(如).将操作数缩进到一致的列,使其看起来不整齐.使用输入/返回值和调用约定来注释您的函数(将其注册为clobbers).
优化jmp next_instruction之前newMax:因为它只是一个昂贵的无操作跳跃,无论如何执行将会下降.
除非可能优化真正的8086,否则不要使用enter,它很慢.
将正在检查的每个元素加载到寄存器中,而不是多次使用相同的内存操作数.(x86-16有6个整数寄存器而不是BP/SP;使用它们.)
将loop-exit条件分支放在底部.(如果需要,跳转到循环入口点).
保持最大和第二最大值在两个寄存器中,就像你在AX中保持最大值一样.
当您找到大于2nd-max的元素时,请保留您拥有的3个数字中最高的2个.即在2个寄存器中维护2个元素的队列/排序列表.

未经测试:

; word max2Meth(word *array);
; Input: implicit-length array (terminated by a 0 element),
;        pointed to by pointer passed on the stack.  (DS segment)
; returns in ax
; clobbers: cx, dx
global max2Meth
max2Meth:
    push  bp
    mov   bp, sp     ; make a stack frame.  (ENTER is slow, don't use it)
    push  si         ; SI is call-preserved in many calling conventions.  Omit this if you want to just clobber it.

    mov   si, [bp+4] ; pointer to array 

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, ax     ; start with max = max2 instead of putting a conditional xchg outside the loop

    jmp   .loop_entry   ; enter at the bottom, at the conditional branch
;;; ax: 2nd max
;;; dx: max

.maxloop:              ; do {
    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jg  .updateMaxes   ; optimize for the common case: fall through on not-found

.loop_entry:
    add  si, 2
    mov  cx, [si]      ;   c = *p++;
    test cx, cx
    jnz .maxloop       ; } while (c != 0);

.finish:
   ; 2nd-max is already in AX, just clean up and return

    pop  si
    leave    ;pop bp would be faster because SP is already pointing to the right place
    ret

; This block is part of the function, even though it's placed after the ret
.updateMaxes:
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .loop_entry      ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .loop_entry

Run Code Online (Sandbox Code Playgroud)

将块用于罕见的情况是不合适的,因为这意味着常见的情况可以通过没有采取分支. 通常把if/else条件内联需要一个jmp地方只是为了避免在if之后运行else部分.但.updateMaxes:最终变得相当优雅,IMO:它必须跳回到循环中,我们可以在交换之前或之后做到这一点.

16位xchg是3 uops(与3 mov条指令一样昂贵),但是为了在new-max情况下更便宜(并且只做mov ax, dx/ mov dx, cx)我们必须使新的2ndmax情况更慢.假设它更有可能只更新第二个max而不是更新两者,我认为只需要mov和cmp/jcc就可以获得胜利.你可以使那个部分无分支cmov(在一个686 CPU上),这可能是好的.让整个事情无分支会给你一个很长的依赖链,并且不值得,除非数组元素平均变得越来越大,所以你总是频繁地进行最大更新(但没有模式,所以你得分支未命中).

在Intel Haswell/Skylake上,内环只有4个融合域uops(比较/分支都可以宏融合).在没有更新的长时间运行中,它应该每个时钟以1次迭代运行.

如果你正在优化代码大小超过速度(例如对于真正的8086),请使用ax临时lodsw代替而不是mov ax, [si]和add si, 2.(保持max在不同的注册表中).

使用隐式长度列表,您不能只使用scasw2nd-max in ax,因为您需要检查0以及> 2nd-max:/

作为进一步的优化,如果您使用的是小型模型(SS = DS),则可以使用,bp而不是si在加载指针后不需要访问堆栈.你可以pop bp代替leave

在我想到用ax = dx = first元素进入循环之前,我打算在循环之前使用这段代码:

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, [si+2] ; but check if it's only 1 element long, like maxMeth does
    test  dx, dx
    jz    .finish

    add   si, 4      ; first 2 elements are loaded

    cmp   ax, dx     ; sort them so ax <= dx
    jng  .noswap
    xchg  ax, dx
 .noswap:

Run Code Online (Sandbox Code Playgroud)

构造内循环的另一种方法是这样的:

.maxloop:              ; do {
    add  si, 2
    mov  cx, [si]      ;   c = *p++;

    test cx, cx
    jz .finish         ; jz instead of jnz: exit the loop on the sentinel value

    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jng .maxloop

;; .updateMaxes:  ;; conditionally fall through into this part 
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .maxloop         ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .maxloop

.finish:

Run Code Online (Sandbox Code Playgroud)

这可能更好,因为我们可以直接进入循环的顶部开始.我们jz跳过循环,跳过条件更新的东西,所以我们没有任何分支只存在跳过"在路上"的代码.(即我们已经成功地有效地布局了我们的代码块.)

一些CPU的唯一缺点是test/jz和cmp/jg是背靠背的.当条件分支被多于几个指令分开时,一些CPU会做得更好.(例如,除非你对如何击中Sandybridge上的解码器感到幸运,否则两个分支中的一个将不会进行宏观融合.但是他们将使用第一个循环.)

提醒:Stack Overflow用户贡献在cc by-sa 3.0下获得许可,需要归属,因此如果您复制粘贴我的整个代码,请确保包含https://stackoverflow.com/questions/47466116/assembly-program-that-finds-second-largest-value-in-array/47467682#47467682在评论中.

归档时间：	8 年，2 月前
查看次数：	1448 次
最近记录：	8 年，2 月前