#include "stdio.h" #include "stdlib.h" #include "omp.h" #include "math.h" #include "openacc.h" int main (int argc, char **argv); void fillMatrix(int size, float **restrict A) { //#pragma acc kernels loop collapse(2) pcopyin(A[0:size][0:size]) pcopyout(A[0:size][0:size]) gang(1000), vector(32) for (int i = 0; i < size; ++i) { for (int j = 0; j < size; ++j) { A[i][j] = ((float)i); } } } float** MatrixMult(int size, int nr, int nc, float **restrict A, float **restrict B, float **restrict C) { //#pragma acc kernels pcopyin(A[0:size-1][0:size],B[0:size][0:size]) \ // pcopyout(C[0:size][0:size]) #pragma omp parallel for default(none) shared(A,B,C,size) for (int i = 0; i < size; ++i) { for (int j = 0; j < size; ++j) { float tmp = 0.; for (int k = 0; k < size; ++k) { tmp += A[i][k] * B[k][j]; } C[i][j] = tmp; } } return C; } float** MakeMatrix(int size, int nr, int nc, float **restrict arr) { int i; arr = (float **)malloc( sizeof(float *) * nr); arr[0] = (float *)malloc( sizeof(float) * nr * nc); for (i=1; i