【发布时间】:2016-10-14 11:53:45
【问题描述】:
我无法理解 MSVC 编译器如何展开以下循环(抱歉,我对汇编语言理解不佳):
#define NUM_ITERATIONS (1000 * 1000 * 1000)
double dummySum = 0;
for (int x = 0; x < NUM_ITERATIONS; x++) {
if (x & 1)
dummySum += x;
}
这是生成的程序集:
00007FF7B4511070 xorps xmm1,xmm1
double dummySum = 0;
00007FF7B4511073 mov ecx,2
00007FF7B4511078 nop dword ptr [rax+rax]
if (x & 1)
00007FF7B4511080 lea eax,[rcx-2]
00007FF7B4511083 mov r8d,eax
00007FF7B4511086 and r8d,1
00007FF7B451108A je someTest+28h (07FF7B4511098h)
dummySum += x;
00007FF7B451108C movd xmm0,eax
00007FF7B4511090 cvtdq2pd xmm0,xmm0
00007FF7B4511094 addsd xmm1,xmm0
if (x & 1)
00007FF7B4511098 lea edx,[rcx-1]
00007FF7B451109B and edx,1
00007FF7B451109E je someTest+3Fh (07FF7B45110AFh)
dummySum += x;
00007FF7B45110A0 lea eax,[rcx-1]
00007FF7B45110A3 movd xmm0,eax
00007FF7B45110A7 cvtdq2pd xmm0,xmm0
00007FF7B45110AB addsd xmm1,xmm0
00007FF7B45110AF test r8d,r8d
if (x & 1)
00007FF7B45110B2 je someTest+50h (07FF7B45110C0h)
dummySum += x;
00007FF7B45110B4 movd xmm0,ecx
00007FF7B45110B8 cvtdq2pd xmm0,xmm0
00007FF7B45110BC addsd xmm1,xmm0
00007FF7B45110C0 test edx,edx
if (x & 1)
00007FF7B45110C2 je someTest+63h (07FF7B45110D3h)
dummySum += x;
00007FF7B45110C4 lea eax,[rcx+1]
00007FF7B45110C7 movd xmm0,eax
00007FF7B45110CB cvtdq2pd xmm0,xmm0
00007FF7B45110CF addsd xmm1,xmm0
00007FF7B45110D3 test r8d,r8d
if (x & 1)
00007FF7B45110D6 je someTest+77h (07FF7B45110E7h)
dummySum += x;
00007FF7B45110D8 lea eax,[rcx+2]
00007FF7B45110DB movd xmm0,eax
00007FF7B45110DF cvtdq2pd xmm0,xmm0
00007FF7B45110E3 addsd xmm1,xmm0
00007FF7B45110E7 test edx,edx
if (x & 1)
00007FF7B45110E9 je someTest+8Ah (07FF7B45110FAh)
dummySum += x;
00007FF7B45110EB lea eax,[rcx+3]
00007FF7B45110EE movd xmm0,eax
00007FF7B45110F2 cvtdq2pd xmm0,xmm0
00007FF7B45110F6 addsd xmm1,xmm0
00007FF7B45110FA test r8d,r8d
if (x & 1)
00007FF7B45110FD je someTest+9Eh (07FF7B451110Eh)
dummySum += x;
00007FF7B45110FF lea eax,[rcx+4]
00007FF7B4511102 movd xmm0,eax
00007FF7B4511106 cvtdq2pd xmm0,xmm0
00007FF7B451110A addsd xmm1,xmm0
00007FF7B451110E test edx,edx
if (x & 1)
00007FF7B4511110 je someTest+0B1h (07FF7B4511121h)
dummySum += x;
00007FF7B4511112 lea eax,[rcx+5]
00007FF7B4511115 movd xmm0,eax
00007FF7B4511119 cvtdq2pd xmm0,xmm0
00007FF7B451111D addsd xmm1,xmm0
00007FF7B4511121 test r8d,r8d
if (x & 1)
00007FF7B4511124 je someTest+0C5h (07FF7B4511135h)
dummySum += x;
00007FF7B4511126 lea eax,[rcx+6]
00007FF7B4511129 movd xmm0,eax
00007FF7B451112D cvtdq2pd xmm0,xmm0
00007FF7B4511131 addsd xmm1,xmm0
00007FF7B4511135 test edx,edx
if (x & 1)
00007FF7B4511137 je someTest+0D8h (07FF7B4511148h)
dummySum += x;
00007FF7B4511139 lea eax,[rcx+7]
00007FF7B451113C movd xmm0,eax
00007FF7B4511140 cvtdq2pd xmm0,xmm0
00007FF7B4511144 addsd xmm1,xmm0
for (int x = 0; x < NUM_ITERATIONS; x++) {
00007FF7B4511148 add ecx,0Ah
00007FF7B451114B lea eax,[rcx-2]
00007FF7B451114E cmp eax,3B9ACA00h
00007FF7B4511153 jl someTest+10h (07FF7B4511080h)
}
我理解这部分(循环的开始):
// if (x % 2 == 0) jump over the sumation
00007FF7B4511073 mov ecx,2 // ecx/rcx = 2
00007FF7B4511080 lea eax,[rcx-2] // eax = rcx - 2
00007FF7B4511083 mov r8d,eax // r8d = eax
00007FF7B4511086 and r8d,1 // r8x & 1
00007FF7B451108A je someTest+28h (07FF7B4511098h) // jump if zero
// add double
00007FF7B451108C movd xmm0,eax
00007FF7B4511090 cvtdq2pd xmm0,xmm0
00007FF7B4511094 addsd xmm1,xmm0
但我不明白后续跳转指令如何跳过下一条lea 指令,如果我查看地址(这是假设发生了跳转) - 请注意,我省略了跳转之间的指令,来自上面的列表:
00007FF7B45110C0 test edx,edx
00007FF7B45110C2 je someTest+63h (07FF7B45110D3h)
... addresses in between omitted ...
00007FF7B45110D3 test r8d,r8d
00007FF7B45110D6 je someTest+77h (07FF7B45110E7h)
... addresses in between omitted ...
00007FF7B45110E7 test edx,edx
00007FF7B45110E9 je someTest+8Ah (07FF7B45110FAh)
... addresses in between omitted ...
00007FF7B45110FA test r8d,r8d
00007FF7B45110FD je someTest+9Eh (07FF7B451110Eh)
... addresses in between omitted ...
00007FF7B451110E test edx,edx
00007FF7B4511110 je someTest+0B1h (07FF7B4511121h)
如果每次跳转都发生,它似乎只是交替使用test r8d,r8d 和test edx,edx 指令,而不加载下一个值。
我在这里解释错了什么?
【问题讨论】:
-
最后一部分(
test edx,edxstuff)不在列表中。这里缺少一些东西。 -
@MichaelWalz:你能澄清一下吗?究竟是哪条地址线?我检查了地址,它们似乎匹配(除非我出错了)。 IE。如果您从列表中的地址
00007FF7B45110C0开始,然后按照je跳转。 -
好的,我明白了,你应该发布你剥离的“跳过这部分......”,这当然很重要。
-
@MichaelWalz:它在原始列表中,这只是试图表明(如果我没记错的话)如果执行跳转,这些说明似乎会被跳过。
-
你看到那些
lea eax,[rcx+X]指令了吗?您看到这些说明中的X有何不同吗?这可能与它有关。您还应该在调试器中运行并逐步执行 程序集 代码(不是源代码)。
标签: c msvcrt loop-unrolling