#include <stdio.h> #include <stdlib.h> #include <omp.h> int add(int); int main(void) { int a = 0; #pragma omp parallel for for (int i = 0; i < 100000; i++) { a = a + add(1); } printf("a = %d\n", a); // should be 100000 return 0; } int add(int x) { for (int i = 0; i < 10000; i++) { // do nothing, just waste time } return x; } |
$ gcc test1.c -std=c99 -fopenmp $ time ./a.out a = 97855 real 0m0.303s user 0m2.371s sys 0m0.006s |
#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <omp.h> // define number of threads #define N 5 int add(int); int main(void) { int a = 0; int tmpThreads[N]; // initialize tmpThreads for (int i = 0; i < N; i++) { tmpThreads[i] = 0; } #pragma omp parallel for num_threads(N) for (int i = 0; i < 100000; i++) { int tid = omp_get_thread_num(); tmpThreads[tid] += add(1); } // sum up the results of each thread for (int i = 0; i < N; i++) { a += tmpThreads[i]; } printf("a = %d\n", a); return 0; } int add(int x) { for (int i = 0; i < 10000; i++) { // do nothing, just waste time } return x; } |
$ gcc test2.c -std=c99 -fopenmp $ time ./a.out a = 100000 real 0m0.456s user 0m2.211s sys 0m0.000s |
#include <stdio.h> #include <stdlib.h> #include <omp.h> int add(int); int main(void) { int a = 0; #pragma omp parallel for reduction(+:a) for (int i = 0; i < 100000; i++) { a = a + add(1); } printf("a = %d\n", a); return 0; } int add(int x) { for (int i = 0; i < 10000; i++) { // do nothing, just waste time } return x; } |
$ gcc test2.c -std=c99 -fopenmp $ time ./a.out a = 100000 real 0m0.293s user 0m2.342s sys 0m0.000s |
for (int i=0; i < N; i++) { a = a + something; } |
#pragma omp parallel for reduction(+:a) for (int i=0; i < N; i++) { a = a + something; } |
targetVar = targetVar <operator> <expr> |
#pragma omp parallel for reduction(<operator>:targetVar) for (...) { targetVar = targetVar <operator> <expr> } |