【发布时间】:2021-07-08 03:22:28
【问题描述】:
获取此代码。
#include <stdlib.h>
int main(int argc , char **argv) {
int *x = malloc(argc*sizeof(int));
for (int i = 0; i < argc; ++i) {
x[i] = argc;
}
int t = 0;
for (int i = 0; i < argc; ++i) {
t += x[i];
}
free(x);
return t;
这个循环
for (int i = 0; i < argc; ++i) {
x[i] = argc;
}
向量化为
movdqu xmmword ptr [rax + 4*rdx], xmm0
movdqu xmmword ptr [rax + 4*rdx + 16], xmm0
movdqu xmmword ptr [rax + 4*rdx + 32], xmm0
movdqu xmmword ptr [rax + 4*rdx + 48], xmm0
movdqu xmmword ptr [rax + 4*rdx + 64], xmm0
movdqu xmmword ptr [rax + 4*rdx + 80], xmm0
movdqu xmmword ptr [rax + 4*rdx + 96], xmm0
movdqu xmmword ptr [rax + 4*rdx + 112], xmm0
movdqu xmmword ptr [rax + 4*rdx + 128], xmm0
movdqu xmmword ptr [rax + 4*rdx + 144], xmm0
movdqu xmmword ptr [rax + 4*rdx + 160], xmm0
movdqu xmmword ptr [rax + 4*rdx + 176], xmm0
movdqu xmmword ptr [rax + 4*rdx + 192], xmm0
movdqu xmmword ptr [rax + 4*rdx + 208], xmm0
movdqu xmmword ptr [rax + 4*rdx + 224], xmm0
movdqu xmmword ptr [rax + 4*rdx + 240], xmm0
https://godbolt.org/z/33vvonojd
按照我的阅读方式,它在 256 字节的内存块中进行矢量化。考虑到我的malloc 的大小argc*sizeof(int) 没有那么大,这怎么可能?这不会覆盖我分配的内存吗?
【问题讨论】:
标签: c compiler-construction clang simd auto-vectorization