【问题标题】:Using Thrust counting iterators with strides使用带步幅的推力计数迭代器
【发布时间】:2014-09-11 06:40:05
【问题描述】:

我正在寻找一种使用 thrust::counting_iterator 函数的方法,以便并行化以下 for 循环:

for (int stride = 0 ; stride < N * M ; stride+=M) // N iterations
{
    // Body of the loop
}

代码如下:

struct functor ()
{
   __host__ __device__ void operator() (const int i)
   {
      // Body of the loop
   }
}

thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N * M;
thrust::for_each (it1 , it2 , functor());

我知道counting_iterator 将迭代器递增 1,那么有没有办法以 M 递增?

【问题讨论】:

    标签: cuda thrust


    【解决方案1】:

    这是arbitrary transformation examplestrided range example 的组合。

    下面,我正在考虑一个转换的例子

    D[i] = A[i] + B[i] * C[i]
    

    代码如下:

    #include <thrust/for_each.h>
    #include <thrust/host_vector.h>
    #include <thrust/device_vector.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <iostream>
    
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/functional.h>
    
    #include <thrust/fill.h>
    
    // for printing
    #include <thrust/copy.h>
    #include <ostream>
    
    #define STRIDE 2
    
    template <typename Iterator>
    class strided_range
    {
        public:
    
        typedef typename thrust::iterator_difference<Iterator>::type difference_type;
    
        struct stride_functor : public thrust::unary_function<difference_type,difference_type>
        {
            difference_type stride;
    
            stride_functor(difference_type stride)
                : stride(stride) {}
    
            __host__ __device__
            difference_type operator()(const difference_type& i) const
            {
                return stride * i;
            }
        };
    
        typedef typename thrust::counting_iterator<difference_type>                   CountingIterator;
        typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
        typedef typename thrust::permutation_iterator<Iterator,TransformIterator>     PermutationIterator;
    
        // type of the strided_range iterator
        typedef PermutationIterator iterator;
    
        // construct strided_range for the range [first,last)
        strided_range(Iterator first, Iterator last, difference_type stride)
            : first(first), last(last), stride(stride) {}
    
        iterator begin(void) const
        {
            return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
        }
    
        iterator end(void) const
        {
            return begin() + ((last - first) + (stride - 1)) / stride;
        }
    
        protected:
        Iterator first;
        Iterator last;
        difference_type stride;
    };
    
    struct arbitrary_functor
    {
        template <typename Tuple>
        __host__ __device__
        void operator()(Tuple t)
        {
            // D[i] = A[i] + B[i] * C[i];
            thrust::get<3>(t) = thrust::get<0>(t) + thrust::get<1>(t) * thrust::get<2>(t);
        }
    };
    
    
    int main(void)
    {
        // allocate storage
        thrust::device_vector<float> A(5);
        thrust::device_vector<float> B(5);
        thrust::device_vector<float> C(5);
        thrust::device_vector<float> D(5);
    
        // initialize input vectors
        A[0] = 3;  B[0] = 6;  C[0] = 2; 
        A[1] = 4;  B[1] = 7;  C[1] = 5; 
        A[2] = 0;  B[2] = 2;  C[2] = 7; 
        A[3] = 8;  B[3] = 1;  C[3] = 4; 
        A[4] = 2;  B[4] = 8;  C[4] = 3; 
    
        typedef thrust::device_vector<float>::iterator Iterator;
        strided_range<Iterator> posA(A.begin(), A.end(), STRIDE);
        strided_range<Iterator> posB(B.begin(), B.end(), STRIDE);
        strided_range<Iterator> posC(C.begin(), C.end(), STRIDE);
        strided_range<Iterator> posD(D.begin(), D.end(), STRIDE);
    
        // apply the transformation
        thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(posA.begin(), posB.begin(), posC.begin(), posD.begin())),
                     thrust::make_zip_iterator(thrust::make_tuple(posA.end(), posB.end(), posC.end(), posD.end())),
                     arbitrary_functor());
    
        // print the output
        for(int i = 0; i < 5; i++)
        std::cout << A[i] << " + " << B[i] << " * " << C[i] << " = " << D[i] << std::endl;
    }
    

    【讨论】:

      【解决方案2】:

      为什么不在仿函数中将i 变量乘以M

      如果M 在编译时已知,它可能是:

      struct functor 
      {
         __host__ __device__ void operator() (const int my_i)
         {
            int i = my_i *M;
            // Body of the loop
         }
      };
      
      thrust::counting_iterator<int> it1(0);
      thrust::counting_iterator<int> it2 = it1 + N;
      thrust::for_each (it1 , it2 , functor());
      

      如果M 仅在运行时已知,我们可以将其作为初始化参数传递给函子:

      struct functor 
      {
         int my_M;
         functor(int _M) : my_M(_M) ();
         __host__ __device__ void operator() (const int my_i)
         {
            int i = my_i *my_M;
            // Body of the loop
         }
      };
      
      thrust::counting_iterator<int> it1(0);
      thrust::counting_iterator<int> it2 = it1 + N;
      thrust::for_each (it1 , it2 , functor(M));
      

      您还可以将计数迭代器包装在转换迭代器中,它接受计数迭代器并将其乘以 M:

      struct functor 
      {
         __host__ __device__ void operator() (const int i)
         {
            // Body of the loop
         }
      };
      
      using namespace thrust::placeholders;
      thrust::counting_iterator<int> it1(0);
      thrust::counting_iterator<int> it2 = it1 + N;
      thrust::for_each (make_transform_iterator(it1, _1 * M) , thrust::make_transform_iterator(it2, _1 * M) , functor());
      

      最后一个示例使用thrust placeholder expressions,尽管它可以通过一个额外的普通函子等效地实现,该函子返回其参数乘以它的参数。

      这是一个完整的示例,展示了所有 3 种方法:

      $ cat t492.cu
      #include <stdio.h>
      #include <thrust/transform.h>
      #include <thrust/for_each.h>
      #include <thrust/execution_policy.h>
      #include <thrust/iterator/counting_iterator.h>
      #include <thrust/iterator/transform_iterator.h>
      #include <thrust/host_vector.h>
      #include <thrust/functional.h>
      #define N 5
      #define M 4
      using namespace thrust::placeholders;
      
      struct my_functor_1
      {
        __host__ __device__  void operator() (const int i)
        {
          printf("functor 1 value: %d\n", i);
        }
      };
      
      struct my_functor_2
      {
         __host__ __device__ void operator() (const int my_i)
         {
          int i = my_i*M;
          printf("functor 2 value: %d\n", i);
         }
      };
      
      struct my_functor_3
      {
         int my_M;
         my_functor_3(int _M) : my_M(_M) {};
         __host__ __device__ void operator() (const int my_i)
         {
            int i = my_i *my_M;
            printf("functor 3 value: %d\n", i);
         }
      };
      
      
      int main(){
        thrust::counting_iterator<int> it1(0);
        thrust::counting_iterator<int> it2 = it1 + N;
        thrust::for_each(thrust::host, it1, it2, my_functor_1());
        thrust::for_each(thrust::host, it1, it2, my_functor_2());
        thrust::for_each(thrust::host, it1, it2, my_functor_3(M));
        thrust::for_each(thrust::host, thrust::make_transform_iterator(it1, _1 * M), thrust::make_transform_iterator(it2, _1 * M), my_functor_1());
        return 0;
      }
      
      
      $ nvcc -arch=sm_20 -o t492 t492.cu
      $ ./t492
      functor 1 value: 0
      functor 1 value: 1
      functor 1 value: 2
      functor 1 value: 3
      functor 1 value: 4
      functor 2 value: 0
      functor 2 value: 4
      functor 2 value: 8
      functor 2 value: 12
      functor 2 value: 16
      functor 3 value: 0
      functor 3 value: 4
      functor 3 value: 8
      functor 3 value: 12
      functor 3 value: 16
      functor 1 value: 0
      functor 1 value: 4
      functor 1 value: 8
      functor 1 value: 12
      functor 1 value: 16
      $
      

      【讨论】:

      • 第三种方法(使用 make_transform_iterator)正是我想要的!谢谢。
      猜你喜欢
      • 1970-01-01
      • 2019-05-17
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2011-07-22
      • 2020-10-25
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多