我的问题是,如果有更好/更有效的方法来做到这一点,使用 Thrust
我相信有。置换向量为您提供了将输入矩阵的内容直接复制到置换矩阵所需的所有信息,而无需进行排序。
一个有用的thrust 功能是permutation_iterator。置换迭代器允许我们即时重新排序我们选择的输入元素以用于任何操作。如果我们提供适当的索引计算函子,我们可以将线性索引(通过counting_iterator)传递给索引函子,以(通过transform_iterator)为复制操作中的任何元素创建适当的置换输入索引。
这是一个有效的例子:
$ cat t1061.cu
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <iostream>
#include <assert.h>
typedef int mytype;
struct copy_idx_func : public thrust::unary_function<unsigned, unsigned>
{
size_t c;
unsigned *p;
copy_idx_func(const size_t _c, unsigned *_p) : c(_c),p(_p) {};
__host__ __device__
unsigned operator()(unsigned idx){
unsigned myrow = idx/c;
unsigned newrow = p[myrow]-1;
unsigned mycol = idx%c;
return newrow*c+mycol;
}
};
int main(){
const mytype mat[] = {1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5};
const mytype vec[] = {1,2,3,4,5};
const unsigned per[] = {5,3,1,2,4};
const size_t msize = sizeof(mat)/sizeof(mytype);
const size_t vsize = sizeof(vec)/sizeof(mytype);
const size_t psize = sizeof(per)/sizeof(unsigned);
const size_t cols = msize/vsize;
// const size_t rows = vsize;
assert(msize%vsize == 0);
assert(vsize == psize);
thrust::device_vector<mytype> d_m(mat, mat+msize);
thrust::device_vector<mytype> d_v(vec, vec+vsize);
thrust::device_vector<unsigned> d_p(per, per+psize);
thrust::device_vector<mytype> d_rm(msize);
thrust::device_vector<mytype> d_rv(vsize);
std::cout << "Initial Matrix:" << std::endl;
thrust::copy_n(d_m.begin(), msize, std::ostream_iterator<mytype>(std::cout, ","));
// permute the matrix
thrust::copy_n(thrust::make_permutation_iterator(d_m.begin(), thrust::make_transform_iterator(thrust::counting_iterator<unsigned>(0), copy_idx_func(cols,thrust::raw_pointer_cast(d_p.data())))), msize, d_rm.begin());
std::cout << std::endl << "Permuted Matrix:" << std::endl;
thrust::copy_n(d_rm.begin(), msize, std::ostream_iterator<mytype>(std::cout, ","));
std::cout << std::endl << "Initial Vector:" << std::endl;
thrust::copy_n(d_v.begin(), vsize, std::ostream_iterator<mytype>(std::cout, ","));
// permute the vector
thrust::copy_n(thrust::make_permutation_iterator(d_v.begin(), thrust::make_transform_iterator(thrust::counting_iterator<unsigned>(0), copy_idx_func(1,thrust::raw_pointer_cast(d_p.data())))), vsize, d_rv.begin());
std::cout << std::endl << "Permuted Vector:" << std::endl;
thrust::copy_n(d_rv.begin(), vsize, std::ostream_iterator<mytype>(std::cout, ","));
std::cout << std::endl;
}
$ nvcc -o t1061 t1061.cu
$ ./t1061
Initial Matrix:
1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,
Permuted Matrix:
5,5,5,5,5,3,3,3,3,3,1,1,1,1,1,2,2,2,2,2,4,4,4,4,4,
Initial Vector:
1,2,3,4,5,
Permuted Vector:
5,3,1,2,4,
$
注意事项:
在操作上置换向量与置换矩阵相同。我们简单地将向量视为一列的矩阵。
正如 cmets 中所讨论的,如果用例完全在推力范围内,则可能根本不需要复制元素。 permutation_iterator 允许我们以任何排列顺序从原始矩阵中选择元素,我们可以简单地将这个构造传递给任何需要以排列顺序排列的原始矩阵的推力操作。