【发布时间】:2019-08-15 19:50:27
【问题描述】:
我已经用 C++ 实现了一个归并排序算法。
在算法内部,它会检查数组的大小是否大于min_size_to_thread,如果是:使用线程递归调用函数。
但是当我增加min_size_to_thread: 减少正在使用的线程数时,函数变得更快。即使从 1 个线程变为 2 个线程。
我的假设是函数速度会随着更多线程的增加而增加,然后再次开始下降。这对我来说没有任何意义,所以我开始相信我的实现在某种程度上是错误的。
template <typename T>
void merge_sort(T S[], int S_size, int min_size_to_thread)
{
if (S_size < 2) return;
// Left Sequence
int L_size = S_size / 2;
T* L = new T[L_size];
for (int i = 0; i < L_size; i++)
{
L[i] = S[i];
}
// Right Sequence
int R_size = (S_size + 1) / 2;
T* R = new T[R_size];
for (int i = 0; i < R_size; i++)
{
R[i] = S[i + L_size];
}
if (S_size > min_size_to_thread)
{
std::thread thread_left(merge_sort<T>, L, L_size, min_size_to_thread);
std::thread thread_right(merge_sort<T>, R, R_size, min_size_to_thread);
thread_right.join();
thread_left.join();
}
else
{
merge_sort<T>(L, L_size, min_size_to_thread);
merge_sort<T>(R, R_size, min_size_to_thread);
}
int S_iterator = 0;
int L_iterator = 0;
int R_iterator = 0;
while ((L_iterator < L_size) && (R_iterator < R_size))
{
if (L[L_iterator] < R[R_iterator])
{
S[S_iterator] = L[L_iterator];
++L_iterator;
}
else
{
S[S_iterator] = R[R_iterator];
++R_iterator;
}
++S_iterator;
}
while (L_iterator < L_size)
{
S[S_iterator] = L[L_iterator];
++L_iterator;
++S_iterator;
}
while (R_iterator < R_size)
{
S[S_iterator] = R[R_iterator];
++R_iterator;
++S_iterator;
}
delete[] L;
delete[] R;
}
int main()
{
const int S_size = 500000;
unsigned char S[S_size];
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
int min_size_to_thread;
min_size_to_thread = 250;
auto t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
min_size_to_thread = 500;
t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
min_size_to_thread = 1000;
t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
min_size_to_thread = 10000;
t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
min_size_to_thread = 250000;
t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
for (int i = 0; i < S_size; ++i)
{
S[i] = i % 255;
}
min_size_to_thread = 500000;
t1 = std::chrono::high_resolution_clock::now();
merge_sort(S, S_size, min_size_to_thread);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "size > " << min_size_to_thread << ": " << (t2 - t1) / std::chrono::milliseconds(1) << std::endl;
return 0;
}
【问题讨论】:
-
你不应该通过复制子数组。那会扼杀你的表现。只需保留一个数组,然后传递您正在使用的索引,尽管这会导致错误的共享问题。
-
@NathanOliver 是线程速度变慢的原因吗?
-
很有可能。
-
根本没有使用
thread_right的意义,因为您创建它并立即等待它(在左侧等待之后),因此原始线程在完成之前不会做任何事情。创建左线程后,在当前线程中做右线程的事情,然后等待左线程完成。 -
您的实现创建并连接了两个线程,而您却无缘无故地拥有一个已经很热且已调度的线程停放。您至少应该在新线程中对左侧进行排序,而在当前线程中对右侧进行排序(然后加入),反之亦然。
标签: c++ algorithm performance sorting