如何选择排序 - 装配 8086答案

【问题标题】：How to Selection Sort - Assembly 8086如何选择排序 - 装配 8086
【发布时间】：2019-04-18 19:46:27
【问题描述】：

我创建了一个对词向量进行选择排序的过程，但有一个问题：排序完全错误。

我的矢量：VET_2 DW 2, 7, 0, 1, 4, 8, 9, 3, 6, 5

; Selection Sort
SELECTION_SORT PROC
    ; AX = j & aux      CX = i
    ; BX = offset/min   DX = data and others
    PUSH 0                              ; to initialize i
    MOV SI, [OFFSET VET_2]
    ; ----- start for(int i = 0; i < n-1; i++) -----
    SLC_LOOP_FORA:                      ; outer loop
        CALL RESET_REGIST               ; reset some AX, BX, CX & DX
        CALL RESET_VAR                  ; used to reset AUX

        POP CX                          ; initialize i
        CMP CX, 18                      ; check if it's smaller than n-1 (20-2=18)
        JGE SLC_FIM                     ; if bigger, goes to the end 

        MOV BX, CX                      ; offset receive i, the position of the smaller
        ; ----- start j = i+1 -----
        MOV AX, CX                      ; AX = j.
        ADD AX, 2                       ; j = i+1
        ; ----- end j = i+1 -----

        ; ----- start for(int j = i+1; j < n; j++) -----
        SLC_LOOP_DENTRO:                ; inner loop
            MOV DX, [SI+BX]             ; move the smaller element to DX
            MOV BX, AX                  ; offset receives j

            CMP DX, [SI+BX]             ; compare if VET_2[min]<=VET_2[j]
            JL SLC_DENTRO_PULAR         ; if lesser, ignore the code below

            MOV BX, AX                  ; offset receive j, position of the smaller element

            SLC_DENTRO_PULAR:
                ADD AX, 2               ; inc 2 in j
                CMP AX, 20              ; compare j (ax) with n
            JL SLC_LOOP_DENTRO          ; if it's smaller, repeat inner loop
        ; ----- end for(int j = n+1; j < n; j++) -----

        CMP CX, BX                      ; compare i with the position of the smaller element
        JE SLC_LOOP_FORA                ; if equals, repeat outer loop, otherwise do the swap

        PUSH BX                         ; position of the smaller element
        PUSH [SI+BX]                    ; put vet[min] top of the stack

        ; ----- start aux = vet[i] -----
        MOV BX, CX                      ; offset (BX) receives i
        MOV DX, [SI+BX]                 ; DX receives vet_2[i]
        MOV AUX, DX                     ; AUX receives DX
        ; ----- end aux = vet[i] -----

        ; ----- start vet[i] = vet[min] -----
        POP AX                          ; AX receives the top of the stack (vet_2[min])
        MOV [SI+BX], AX                 ; vet_2[i] receives DX (smaller element)
        ; ----- end vet[i] = vet[min] -----

        ; ----- start vet[min] = aux -----
        POP BX                          ; offset (BX) receives the position of the smaller element from the stack
        MOV DX, AUX                     ; DX receives AUX
        MOV [SI+BX], DX                 ; vet_2[min] receives DX
        ; ----- end vet[min] = aux -----
        ADD CX, 2                       ; INC 2 on i
        PUSH CX                         ; put in the stack
        JMP SLC_LOOP_FORA               repeat outer loop
    ; ----- end for(int i = 0; i < n-1; i++) -----
    SLC_FIM:                            ; end the procedure
        RET
SELECTION_SORT ENDP

调用选择排序程序前：2 7 0 1 4 8 9 3 6 5

调用后选择排序过程：5 2 7 0 1 4 8 9 3 6

错误在哪里？有人可以帮帮我吗？

【问题讨论】：

POP CX 在循环内对我来说看起来很可疑，因为您在循环外push 0，但是有一个JE SLC_LOOP_FORA 可以重复此操作而无需推动任何内容。所以你会消耗一些堆栈空间。 xor cx,cx 或 mov cx,0 会更明智。如果您的本地寄存器用完了，通常您在制作堆栈帧后将它们存储/重新加载到[bp-4] 之类的位置。（您不能在 16 位代码中使用 [SP+4] 或其他任何内容）。循环内的 push/pop 更难推理，会导致类似这样的错误。
您是否尝试过使用调试器单步执行代码以查看它是否提前离开循环？如果是这样，您可以准确地看到哪个分支出了问题。

标签： assembly x86-16 selection-sort

【解决方案1】：

问题

当你不需要交换 2 个元素时，你仍然需要提高 CX 的下限。 cmp cx, bx je SLC_LOOP_FORA 忽略了这一点。此外，它会给你留下一个不平衡的堆栈（弹出比被压下更多）。

解决办法：

这个问题（包括堆栈不平衡）很容易通过引入一个额外的标签来纠正：

    PUSH 0                 ; to initialize i
    MOV SI, OFFSET VET_2
SLC_LOOP_FORA:             ; outer loop

    ...

    CMP CX, BX             ; compare i with the position of the smaller element
    JE NO_SWAP             ; if equals, repeat outer loop, otherwise do the swap

    ...

NO_SWAP:
    ADD CX, 2              ; INC 2 on i
    PUSH CX                ; put in the stack
    JMP SLC_LOOP_FORA      ; repeat outer loop

考虑

; AX = j & aux      CX = i
; BX = offset/min   DX = data and others
PUSH 0                              ; to initialize i
MOV SI, [OFFSET VET_2]

如果你愿意重新分配寄存器，你可以极大地简化这个程序。

像这样的寄存器分配

; AX = j & aux      DI = i
; SI = offset/min   DX = data and others
PUSH 0                              ; to initialize i
MOV BX, OFFSET VET_2

交换代码，例如将变成这 3 条指令：

mov  ax, [bx+si]
xchg ax, [bx+di]
mov  [bx+si], ax

【讨论】：

【解决方案2】：

MOV SI, [OFFSET VET_2]

我从来没有见过会这样做的汇编程序！上面设置SI寄存器等于第一个数组元素的内容，所以SI=2。不是很实用。

获取向量地址的可接受指令是：

mov si, offset VET_2

或

lea si, [VET_2]

或

lea si, VET_2

【讨论】：

根据罗斯对Confusing brackets in MASM32 的回答mov si, [OFFSET VET_2] 仍然是即时的。 MASM 显然只尊重数字文字周围的方括号，或者如果您使用ds:[stuff]。（看起来很疯狂和可怕，我不会建议在立即数周围使用方括号，除非是故意混淆，但如果你坚持使用 MASM，那么它就是这样工作的。）
+1 以揭露 MASM 中的这种纯粹的疯狂。感谢@PeterCordes 提供指向罗斯答案的链接。我不知道...

【解决方案3】：

selection_sort_i64:
    ; /*
    ; Input:
    ; RDI = long long * array
    ; RSI = length
    ;
    ; Pseudo-C code: 
    ;
    ; for (int i = 0; i < n - 1; ++i) {
    ;    min = i
    ;    for (int j = i + 1; j < n; ++j) {
    ;       if a[j] < a[min]; min = j
    ;   swap(i, min)
    ;*/

    I64_SIZE equ 8
    LG_I64_SIZE equ 3

    cmp rsi, 1
    jle .end            ; Just end it if length <= 1
    xchg rsi, rdi       ; rsi => left pointer
    mov rcx, rdi        ; rcx => length

    ; RDX will be the boundary for i: RSI + (N-1)*sizeof(int64)
    lea rdx, [rsi + rcx * LL_SIZE - LL_SIZE]

    ; /*
    ; Let's explain what's happening here.
    ; RSI will be &a[i], RDX will be it's right boundary
    ; RDI will be &a[j] (&a[i + 1]) and will loop n-i times
    ; RCX will be n-i and will be the counter for the inner loop
    ; RAX will track our minimum in the remaining of the array
    ; RBX will 
    ; */
.outer_loop:
    cmp rsi, rdx
    jge .end            ; while (i < n - 1) {
    mov rax, [rsi]      ;   min = a[i]
    mov rbx, rsi        ;   min_i = i   
    push rax

    mov rdi, rsi
    add rdi, I64_SIZE   ;   j = i + 1 // rdi => right pointer
    dec rcx             ;   // rcx = n - i, inner loop counter
    push rcx
.inner_loop:
        cmp rax, [rdi]              ; if (min > a[j])
        jle .min_less_or_equal
        mov rbx, rdi                ;   min_i = j
        mov rax, [rdi]              ;   min = a[j]
.min_less_or_equal:
        add rdi, I64_SIZE           ; j += 1
        loop .inner_loop
    pop rcx             ; // restore inner loop counter
    pop rax             ; // restore a[i]

    cmp rsi, rbx        ; // swap if minimum found
    jz .no_min          ;   if (i != min_i) 
    mov rdi, [rbx]      ;       temp = a[min]
    mov [rbx], rax      ;       a[min] = a[i]
    mov [rsi], rdi      ;       a[i] = temp
.no_min:
    add rsi, I64_SIZE   ;   i += 1
    jmp .outer_loop     ;   } // end outer loop
.end:
    ret

希望这可行。谢谢。

【讨论】：

OP 正在寻求帮助调试 他们的 代码，而不是一些随机选择排序实现。这是一个 x86-16 问题，但至少您没有使用任何 r8..r15 或任何在 16 位模式下不可用的寻址模式，因此您实际上可以直接将其移植到 x86-64更改寄存器大小和I64_SIZE。
但是，在 x86-64 中，您应该将 r8 或其他东西用于循环计数器之一，因此您无需在循环内推送/弹出 rcx。 loop 指令很慢；只有在优化代码大小而不是速度时才使用它。 Why is the loop instruction slow? Couldn't Intel have implemented it efficiently?