【问题标题】:compute pi value using monte carlo method multithreading使用蒙特卡罗方法多线程计算 pi 值
【发布时间】:2015-11-14 20:42:40
【问题描述】:

我正在尝试使用 montecarlo 方法并使用并行 C 代码来查找 PI 的值。我已经编写了 serail 代码并且工作正常。但是并行代码有时会给我错误的 pi 值 0 或负值

我的代码

 #include <pthread.h>
 #include <stdio.h>
#include <stdlib.h>
#include <math.h>
 #define NUM_THREADS 4 //number of threads
#define TOT_COUNT 10000055 //total number of iterations



void *doCalcs(void *threadid)
{
long longTid;
longTid = (long)threadid;
int tid = (int)longTid; //obtain the integer value of thread id
//using malloc for the return variable in order make
//sure that it is not destroyed once the thread call is finished
float *in_count = (float *)malloc(sizeof(float));
*in_count=0;
unsigned int rand_state = rand();
//get the total number of iterations for a thread
float tot_iterations= TOT_COUNT/NUM_THREADS;
int counter=0;
//calculation
for(counter=0;counter<tot_iterations;counter++){
    //float x = (double)random()/RAND_MAX;
    //float y = (double)random()/RAND_MAX;
    //float result = sqrt((x*x) + (y*y));
    double x = rand_r(&rand_state) / ((double)RAND_MAX + 1) * 2.0 - 1.0;
    double y = rand_r(&rand_state) / ((double)RAND_MAX + 1) * 2.0 - 1.0;
    float result = sqrt((x*x) + (y*y));
    if(result<1){
        *in_count+=1; //check if the generated value is inside a unit circle
    }
}
//get the remaining iterations calculated by thread 0
 if(tid==0){
      float remainder = TOT_COUNT%NUM_THREADS;
      for(counter=0;counter<remainder;counter++){
            float x = (double)random()/RAND_MAX;
         float y = (double)random()/RAND_MAX;
          float result = sqrt((x*x) + (y*y));
        if(result<1){
            *in_count+=1; //check if the generated value is inside a unit circle
        }
    }
}
 }


 int main(int argc, char *argv[])
  {
    pthread_t threads[NUM_THREADS];
   int rc;
   long t;
   void *status;
    float tot_in=0;
     for(t=0;t<NUM_THREADS;t++){
      rc = pthread_create(&threads[t], NULL, doCalcs, (void *)t);
      if (rc){
    printf("ERROR; return code from pthread_create() is %d\n", rc);
    exit(-1);
     }
}
//join the threads
for(t=0;t<NUM_THREADS;t++){
pthread_join(threads[t], &status);
//printf("Return from thread %ld is : %f\n",t, *(float*)status);
tot_in+=*(float*)status; //keep track of the total in count
}
printf("Value for PI is %f \n",1, 4*(tot_in/TOT_COUNT));
/* Last thing that main() should do */
pthread_exit(NULL);
  }               

【问题讨论】:

  • 这段代码中有很多可疑的东西,比如你为什么要为in_count动态分配一个浮点数?顺便说一下,您将其视为整数的变量。哦,你没有在任何导致内存泄漏的地方释放该分配。
  • 我建议使用double。如果你想要float,为什么不直接计算22/7355/113
  • @WeatherVane 他想尝试蒙特卡洛方法,我认为是为了教育。

标签: c++ c parallel-processing


【解决方案1】:

这是使用@vladon 建议的asyncfuture 的解决方案。

#include <iostream>
#include <vector>
#include <random>
#include <future>

using namespace std;

long random_circle_sampling(long n_samples){
    std::random_device rd;  //Will be used to obtain a seed for the random number engine
    std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
    std::uniform_real_distribution<> dis(0.0, 1.0);
    long points_inside = 0;
    for(long i = 0; i < n_samples; ++i){
        double x = dis(gen);
        double y = dis(gen);
        if(x*x + y*y <= 1.0){
            ++points_inside;
        }
    }
    return points_inside;
}

double approximate_pi(long tot_samples, int n_threads){
    long samples_per_thread = tot_samples / n_threads;

    // Used to store the future results
    vector<future<long>> futures;
    for(int t = 0; t < n_threads; ++t){
        // Start a new asynchronous task
        futures.emplace_back(async(launch::async, random_circle_sampling, samples_per_thread));
    }

    long tot_points_inside = 0;
    for(future<long>& f : futures){
        // Wait for the result to be ready
        tot_points_inside += f.get();
    }

    double pi = 4.0 * (double) tot_points_inside / (double) tot_samples;
    return pi;

}


int main() {
    cout.precision(32);

    long tot_samples = 1e6;
    int n_threads = 8;
    double pi = 3.14159265358979323846;
    double approx_pi = approximate_pi(tot_samples, n_threads);
    double abs_diff = abs(pi - approx_pi);
    cout << "pi\t\t" <<pi << endl;
    cout << "approx_pi\t" <<approx_pi << endl;
    cout << "abs_diff\t" <<abs_diff << endl;

    return 0;
}

你可以简单地运行它:

$ g++ -std=c++11 -O3 pi.cpp -o pi && time ./pi
pi              3.1415926535897931159979634685442
approx_pi       3.1427999999999998159694314381341
abs_diff        0.0012073464102066999714679695898667
./pi  0.04s user 0.00s system 27% cpu 0.163 total

【讨论】:

    【解决方案2】:

    您的代码不是 C++,它很糟糕,非常糟糕的普通旧 C。

    那是 C++:

    #include <cmath>
    #include <iostream>
    #include <numeric>
    #include <random>
    #include <thread>
    #include <vector>
    
    constexpr auto num_threads = 4; //number of threads
    constexpr auto total_count = 10000055; //total number of iterations
    
    void doCalcs(int total_iterations, int & in_count_result)
    {
        auto seed = std::random_device{}();
        auto gen = std::mt19937{ seed };
        auto dist = std::uniform_real_distribution<>{0, 1};
    
        auto in_count{ 0 };
    
        //calculation
        for (auto counter = 0; counter < total_iterations; ++counter) {
            auto x = dist(gen);
            auto y = dist(gen);
            auto result = std::sqrt(std::pow(x, 2) + std::pow(y, 2));
            if (result < 1) {
                ++in_count; //check if the generated value is inside a unit circle
            }
        }
    
        in_count_result = in_count;
    }
    
    void main()
    {
        std::vector<std::thread> threads(num_threads);
        std::vector<int> in_count(num_threads);
        in_count.resize(num_threads);
    
        for (size_t i = 0; i < num_threads; ++i) {
            int total_iterations = total_count / num_threads;
            if (i == 0) {
                total_iterations += total_count % num_threads; // get the remaining iterations calculated by thread 0
            }
    
            threads.emplace_back(doCalcs, total_iterations, std::ref(in_count[i]));
        }
    
        for (auto & thread : threads) {
            if (thread.joinable()) {
                thread.join();
            }
        }
    
        double pi_value = 4.0 * static_cast<double>(std::accumulate(in_count.begin(), in_count.end(), 0)) / static_cast<double>(total_count);
        std::cout << "Value of PI is: " << pi_value << std::endl;
    }
    

    附:它也不是那么好,请阅读futures、promises 和std::async

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2023-04-03
      • 2018-07-02
      • 2021-01-06
      • 1970-01-01
      • 2013-02-28
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多