【问题标题】:Find all primes from 1 million random integers in less than 6 seconds在 6 秒内从 100 万个随机整数中找出所有素数
【发布时间】:2021-05-11 10:32:39
【问题描述】:

我应该使用多线程来改进此代码,以便在不到 6 秒的时间内对一百万个随机生成的整数的所有素数进行求和和计数。我的教授不到一秒钟就完成了我该怎么做,所以我被卡住了。

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <math.h>

int isPrime(int num) {
    int i;
    for (i = 2; i < num; i++) {
        if (num % i == 0) {
            return 0;
        }
    }
    return 1;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Too few arguments ");
        printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>");
        exit(0);
    }

    int randomPivot = atoi(argv[1]);
    int numOfRandomNumbers = atoi(argv[2]);
    long sum = 0;
    long primeCounter = 0;
    
    //init rundom generator
    int random = rand();
    srand(randomPivot);

    //generate random numbers
    for (int i = 0; i < numOfRandomNumbers; i++) {
        random = rand();
        //check if the number is prime
        if (isPrime(random)) {
            //if do, add up it's sum, and increment counter
            sum = sum + random;
            primeCounter++;
        }   
    }
    //keep the out format as this!!
    printf("%ld,%ld\n", sum, primeCounter);
    exit(0);
}

我尝试使用此代码,但由于某种原因,输出错误,即使我使用了 3 个线程,它也只使用了 1 个内核。

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <math.h>

pthread_mutex_t lock;

long count1 = 0, count2 = 0, count3 = 0;
long sum1 = 0, sum2 = 0, sum3 = 0;

void *isPrime1(void *x) {
    int i; int num = *(int*)x;
    if (num < 2)
        return NULL;
    if (num == 2)
        return NULL;
    {
        sum1 += num;
        count1 += 1; 
    }
    if (num % 2 == 0)
        return NULL;

    for (i = 3; i * i < num; i = i + 2) {
        if (num % i == 0) {
            return NULL;
        }
    }
    sum1 += num;
    count1 += 1;

    return NULL;
}

void *isPrime2(void *x) {
    int i;
    int num = *(int*)x;
    if (num < 2)
        return NULL;
    if (num == 2)
        return NULL;
    {
        sum2 += num;
        count2 += 1;
    }
    if (num % 2 == 0)
        return NULL;

    for (i = 3; i * i < num; i = i + 2) {
        if (num % i == 0) {
            return NULL;
        }
    }
    sum2 += num;
    count2 += 1;

    return NULL;
}

void *isPrime3(void *x) {
    int i;
    int num = *(int*)x;
    if (num < 2)
        return NULL;
    if (num == 2)
        return NULL;
    {
        sum3 += num;
        count3 += 1;
    }
    if (num % 2 == 0)
        return NULL;

    for (i = 3; i * i < num; i = i + 2) {
        if (num % i == 0) {
            return NULL;
        }
    }
    sum3 += num;
    count3 += 1;

    return NULL;
}

int main(int argc, char *argv[]) {

    if (pthread_mutex_init(&lock, NULL) != 0) {
        printf("mutix init failed\n");
        return 0;
    }

    pthread_t tids[3];

    if (argc != 3) {
        printf("Too few arguments ");
        printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>");
        exit(0);
    }

    int randomPivot = atoi(argv[1]);
    int numOfRandomNumbers = atoi(argv[2]);

    //init rundom generator

    int random = rand();

    srand(randomPivot);

    //generate random numbers

    for (int i = 0; i < numOfRandomNumbers; i += 3) {
        random = rand();

        pthread_create((&tids[0]), NULL, isPrime1, &random);
        random = rand();

        pthread_create((&tids[1]), NULL, isPrime2, &random);
        random = rand();
        pthread_create((&tids[2]), NULL, isPrime3, &random);
    }

    for (int j = 0; j < 3; j++)
        pthread_join(tids[j], NULL);
    long sum = sum1 + sum2 + sum3;
    long count = count1 + count2 + count3;

    //keep the out format as this!!
    printf("%ld,%ld\n", sum, count);
    exit(0);
}

【问题讨论】:

  • 第二件事要尝试:检查一次奇数/偶数...然后只检查奇数:for (i = 3; i &lt; num; i += 2) { ... }。首先,请参阅@qrdl 的评论 :-)
  • 你做了什么来试图摆脱“卡住”?
  • for (i = 2;i&lt;num;i++) - 你需要迭代到num吗?
  • 您的代码现在不使用任何线程。它应该。
  • 假设所有输入值都适合int,首先找到直到INT_MAX 平方根的所有素数并将它们存储在一个表(数组)中。然后,对于每个候选数,测试它是否可以被每个存储的素数整除(但不等于存储的数字)。如果是,则不是素数。否则,它是素数。 (例外:如果候选项小于 2,则它既不是质数也不是合数。)

标签: c


【解决方案1】:

不需要线程来实现你的目标,你所需要的只是对isPrime()函数的小改进:

  • 明确测试偶数
  • 只测试循环中的奇数
  • i &gt; num / i 时停止循环。
  • 01 返回0

这是修改后的版本:

#include <stdio.h>
#include <stdlib.h>

int isPrime(int num) {
    int i;
    if ((num & 1) == 0)
        return num == 2;
    for (i = 3; i <= num / i; i += 2) {
        if (num % i == 0)
            return 0;
    }
    return num != 1;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Too few arguments\n");
        printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>\n");
        exit(0);
    }

    int randomPivot = atoi(argv[1]);
    int numOfRandomNumbers = atoi(argv[2]);
    long sum = 0;
    long primeCounter = 0;

    srand(randomPivot);  //init rundom generator

    for (int i = 0; i < numOfRandomNumbers; i++) {
        int random = rand();
        if (isPrime(random)) {
            sum = sum + random;
            primeCounter++;
        }
    }
    printf("%ld,%ld\n", sum, primeCounter);
    return 0;
}

我 2015 年慢速笔记本电脑上的输出:

~/dev/stackoverflow > time ./primeCalc 0 1000000
50915866465059,48687

real    0m4.151s
user    0m4.119s
sys     0m0.011s

为了进一步提高性能,我们可以预先计算小于或等于RAND_MAX 平方根的素数,减少大素数的除法次数。此外,循环测试可以使用乘法,因为候选素数都低于INT_MAX的平方根:

#include <stdio.h>
#include <stdlib.h>

static int primes[4793]; /* There are 4792 primes <= sqrt(0x7fffffff) */

int isPrime(int num) {
    if ((num & 1) == 0)
        return num == 2;
    for (int *p = primes + 1; *p && (*p) * (*p) <= num; p++) {
        if (num % *p == 0)
            return 0;
    }
    return num != 1;
}

void initPrimes(int max) {
    int i = 0, p;
    primes[i++] = 2;
    for (p = 3; p <= max / p; p += 2) {
        if (isPrime(p))
            primes[i++] = p;
    }
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Too few arguments\n");
        printf("USAGE: ./primeCalc <prime pivot> <num of random numbers>\n");
        exit(0);
    }

    int randomPivot = atoi(argv[1]);
    int numOfRandomNumbers = atoi(argv[2]);
    long sum = 0;
    long primeCounter = 0;

    initPrimes(RAND_MAX);

    srand(randomPivot);  //init rundom generator

    for (int i = 0; i < numOfRandomNumbers; i++) {
        int random = rand();
        if (isPrime(random)) {
            sum = sum + random;
            primeCounter++;
        }
    }
    printf("%ld,%ld\n", sum, primeCounter);
    return 0;
}

输出:

~/dev/stackoverflow > ./primeCalc1 0 1000000
50915866465059,48687

real    0m0.580s
user    0m0.566s
sys     0m0.004s

这个结果和你老师的结果一致,但没有线程。

您可以使用线程进一步改进计时,但有一个问题:对于您的测试,您希望计算相同的随机数,如果从不同的线程调用 rand(),则无法保证。因此,您需要在主线程中计算随机数并将它们存储在一个数组中,然后使用单独的线程分隔数组的各个部分并组合结果。我建议您将参数和结果存储在每个线程的单独结构中,并将指向它的指针作为不透明参数传递。

这是一个例子:

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

static int primes[4793];  /* There are 4792 primes <= sqrt(0x7fffffff) */

int isPrime(int num) {
    if ((num & 1) == 0)
        return num == 2;
    for (int *p = primes + 1; *p && (*p) * (*p) <= num; p++) {
        if (num % *p == 0)
            return 0;
    }
    return num != 1;
}

void initPrimes(int max) {
    int i = 0, p;
    primes[i++] = 2;
    for (p = 3; p <= max / p; p += 2) {
        if (isPrime(p))
            primes[i++] = p;
    }
}

struct primeCalcArgs {
    pthread_t tid;
    int *array;
    int start, end;
    long sum, count;
};

void *primeCalcTask(void *opaque) {
    struct primeCalcArgs *p = opaque;
    int *array = p->array;
    p->sum = 0;
    p->count = 0;
    for (int i = p->start; i < p->end; i++) {
        if (isPrime(array[i])) {
            p->sum += array[i];
            p->count++;
        }
    }
    return NULL;
}

int main(int argc, char *argv[]) {
    if (argc < 3) {
        fprintf(stderr, "primeCalc: Too few arguments\n");
        fprintf(stderr, "usage: ./primeCalc <prime pivot> <num of random numbers> [<nb of threads>]\n");
        return 1;
    }

    int randomPivot = atoi(argv[1]);
    int numOfRandomNumbers = atoi(argv[2]);
    int numOfThreads = argc > 3 ? atoi(argv[3]) : 1;
    long sum = 0;
    long count = 0;

    if (numOfThreads < 1)
        numOfThreads = 1;

    if (numOfRandomNumbers > 0) {
        int i, n;

        initPrimes(RAND_MAX);

        int *array = malloc(sizeof(*array) * numOfRandomNumbers);
        if (!array) {
            fprintf(stderr, "primeCalc: cannot allocate random number array\n");
            return 1;
        }
        srand(randomPivot);  //init rundom generator
        for (i = 0; i < numOfRandomNumbers; i++) {
            array[i] = rand();
        }
        struct primeCalcArgs args[numOfThreads];
        for (n = 0; n < numOfThreads; n++) {
            args[n].array = array;
            args[n].start = (long long)numOfRandomNumbers * n / numOfThreads;
            args[n].end = (long long)numOfRandomNumbers * (n + 1) / numOfThreads;
            args[n].sum = 0;
            args[n].count = 0;
            if (pthread_create(&args[n].tid, NULL, primeCalcTask, &args[n])) {
                fprintf(stderr, "primeCalc: cannot create thread %d\n", n + 1);
                break;
            }
        }
        for (i = 0; i < n; i++) {
            pthread_join(args[i].tid, NULL);
            sum += args[i].sum;
            count += args[i].count;
        }
        free(array);
    }
    printf("%ld,%ld\n", sum, count);
    return 0;
}

输出:

~/dev/stackoverflow > time ./primeCalc 0 1000000
50915866465059,48687

real    0m0.581s
user    0m0.565s
sys     0m0.007s

~/dev/stackoverflow > time ./primeCalc 0 1000000 2
50915866465059,48687

real    0m0.293s
user    0m0.552s
sys     0m0.006s

~/dev/stackoverflow > time ./primeCalc 0 1000000 3
50915866465059,48687

real    0m0.286s
user    0m0.739s
sys     0m0.007s

~/dev/stackoverflow > time ./primeCalc 0 1000000 4
50915866465059,48687

real    0m0.242s
user    0m0.846s
sys     0m0.006s

~/dev/stackoverflow > time ./primeCalc 0 1000000 5
50915866465059,48687

real    0m0.245s
user    0m0.845s
sys     0m0.008s

~/dev/stackoverflow > time ./primeCalc 0 1000000 10
50915866465059,48687

real    0m0.281s
user    0m0.858s
sys     0m0.008s

~/dev/stackoverflow > time ./primeCalc 0 1000000 100
50915866465059,48687

real    0m0.279s
user    0m0.846s
sys     0m0.012s

~/dev/stackoverflow > time ./primeCalc 0 1000000 10000
50915866465059,48687

real    0m1.274s
user    0m0.752s
sys     0m1.663s

如您所见,我的笔记本电脑有完整的 2 个内核:2 个线程的实际时间正好是一半。它从 3 和 4 线程的超线程能力中获得了微小的改进,但超过 4 线程没有改进。额外线程的性能实际上会降低,我什至惊讶于请求 10000 个线程成功并且仍然执行得相当好。

对于更大数量的随机数,我建议使用筛子计算直到RAND_MAX 的所有素数的位图,为整个过程增加固定开销。在我的笔记本电脑上,这个筛子需要一到两秒钟,然后立即进行素数测试,只留下随机数生成器的开销。

【讨论】:

  • 您可以将i &lt;= num / i 更改为等效的i*i &lt;= num 以消除除法。
  • @dbush: true 但对于接近INT_MAXnum,乘法可能会溢出,这可能发生在随机数上。此外,高效编译器将除法与模运算相结合。
  • @dbush: 高效的编译器...操作,即使是简单的代码,而 gcc 用于发布的代码。在我的 Macbook 上使用 (unsigned)i * i &lt;= (unsigned)num 可以减少 40% 的运行时间。代码生成可以看这里:godbolt.org/z/P3K8orPnh
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2011-06-05
  • 1970-01-01
  • 2016-08-14
相关资源
最近更新 更多