【发布时间】:2019-05-14 20:04:15
【问题描述】:
我正在尝试确定我的处理器的关联性。 我有英特尔酷睿 i5-2500:
L1 数据:32 Kb,8 路组关联
L1 指令:32 Kb,8 路组关联
L2:256 Kb,8 路组关联
L3:6 Mb,12 路组关联,在所有内核之间共享
我以处理器记号为单位测量对数组元素的平均访问时间。数组被分成片段。
在循环中,我增加了片段的数量。两个相邻片段之间的距离等于 L3 缓存大小。我访问所有片段的第一个元素,然后访问第二个元素,依此类推。每个元素都包含下一个元素的索引。最后一个元素包含第一个元素的索引。
看起来像这样:enter image description here
当分片的数量将大于缓存的关联性时,平均访问时间应该增加。
我得到了以下结果: enter image description here
第一个跳转对应TLB的关联性,第二个对应L1和L2缓存的关联性,但是我不明白为什么超过L3缓存的关联性后时间没有增加。
我也尝试了不同的尺寸和偏移量
我做错了吗?还是我的代码有错误?
你能解释一下吗? 代码如下:
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#define SIZE 6291456 //6 Mb
#define OFFSET 6291456
#define ROUNDS 200
#define MIN_FRAGMENTS_COUNT 1
#define MAX_FRAGMENTS_COUNT 32
void FreeArray(int* array, int size) {
assert(size > 0 && "Size must be gerater than zero\n");
if (NULL == array) {
return;
}
for (int i = 0; i < size; ++i) {
array[i] = 0;
}
free(array);
}
int* CreateArray(int size) {
assert(size > 0 && "Size must be greater than zero\n");
return calloc(size, sizeof(int));
}
unsigned long long int GetTicksCount(void) {
unsigned int high = 0;
unsigned int low = 0;
__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
return (((unsigned long long int)high) << 32 | (unsigned long long int)low);
}
void SetIndexes(int* array, int fragment_size, int offset,
int fragments_count) {
assert(NULL != array && "Pointer to array must not be NULL\n");
assert(fragment_size > 0 && "Fragmnet size must be greater than zero\n");
assert(offset > 0 && "Offset must be greater than zero\n");
assert(fragments_count > 0 && "Fragments count must be greater than zero\n");
assert(fragment_size <= offset &&
"Fragment size must not be greater than offset\n");
int last_fragment = fragments_count - 1;
int last_element = fragment_size - 1;
for (int i = 0; i < last_element; ++i) {
for (int j = 0; j < last_fragment; ++j) {
array[j * offset + i] = (j + 1) * offset + i; //Go in the same element of next fragment
}
array[last_fragment * offset + i] = i + 1; // Go in the next element from last fragment
}
array[last_fragment * offset + last_element] = 0; // Go in first element from last element
}
unsigned long long int CalcAccessTime(int* array, int size) {
assert(NULL != array && "Pointer to array must not be NULL\n");
assert(size > 0 && "Size must be greater than zero\n");
unsigned long long int start = 0;
unsigned long long int end = 0;
unsigned long long int min_time = ULLONG_MAX;
int index = 0;
for (int i = 0; i < ROUNDS; ++i) {
start = GetTicksCount();
for (int j = 0; j < size; ++j) {
index = array[index];
}
end = GetTicksCount();
unsigned long long int cur_time = (end - start) / size;
if (cur_time < min_time) {
min_time = cur_time;
}
}
return min_time;
}
int main(int argc, char** argv) {
int integers_count = SIZE / sizeof(int);
int offset_int = OFFSET / sizeof(int);
for (int i = MIN_FRAGMENTS_COUNT; i <= MAX_FRAGMENTS_COUNT; ++i) {
int size = i * offset_int;
int* array = CreateArray(size);
if (NULL == array) {
return -1;
}
SetIndexes(array, integers_count / i, offset_int, i);
printf("Fragments: %d\n", i);
printf("Time: %llu\n", CalcAccessTime(array, integers_count));
FreeArray(array, size);
}
return 0;
}
【问题讨论】:
-
可能是特定于检测缓存关联性的操作系统(理论上它对用户程序“隐藏”)。在 Linux 上,尝试
cat /proc/cpuinfo