#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <omp.h>
#define ARRAY_LEN 1000000
int main(int argc, char* argv[]) {
unsigned int seed = 12345;
srand(seed);
int arr[ARRAY_LEN];
for(int i=0;i<ARRAY_LEN;++i){
arr[i] = rand();
}
int realTotalSum = 0;
for(int i=0;i<ARRAY_LEN;++i){
realTotalSum += arr[i];
}
clock_t start = clock();
const int n = 128;
omp_set_num_threads(n);
printf("OpenMP running with %d threads\n", omp_get_max_threads());
int localSums[128] = {0};
int localSum = 0;
long i = 0;
#pragma omp parallel private(localSum)
{
int id = omp_get_thread_num();
printf("Hello world from thread %d\n", id);
#pragma omp for
for(long i = 0; i < ARRAY_LEN; ++i){
localSum += arr[i];
}
localSums[id] = localSum;
printf("local sum in thread: %d\n", localSum);
}
int totalSum = 0;
for(int i = 0;i<n;++i){
totalSum += localSums[i];
}
printf("realTotalSum:%d\n", realTotalSum);
printf("totalSum:%d\n", totalSum);
clock_t end = clock();
double timeSpent = (double)(end - start) / CLOCKS_PER_SEC;
printf("time spent = %10.6f\n", timeSpent);
return 0;
}
//This is my code, I just change the const int n = 128 to others for test.
I just checked your code and confirmed that I have the same result.
Yes, this behavior is pretty standard.
When using more threads, we must consider the cost of thread creation and deletion, data distribution, memory access, etc. Using more threads means a lot of time needed to do them.