#include #include #include #include #include #include "/users/guest/petyros/Training/External_Functions/matrix_op.h" #include "/users/guest/petyros/Training/External_Functions/util.h" #include "/users/guest/petyros/Training/External_Functions/input.h" int main(int argc, char ** argv) { int rank,size; int global_nm[2],local_nm[2]; //global matrix dimensions and local matrix dimensions (2D-domain, 2D-subdomain) int global_padded_nm[2]; //padded global matrix dimensions (if padding is not needed, global_padded=global) int i,j,k, sparse=0, *cooCol, n_z, *I; double * M, * A, * x, * y, *local_y, * cooVal, comm_t, comp_t; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (argc < 2) error("Too few Arguments"); else if ( argc == 2) /* ./Program Input_File -> File Input to COO */ { if(!mtx_read(&I, &cooCol, &cooVal, &global_nm[0], &global_nm[1], &n_z, argv[1])) error("input and/or COO convertion failed"); sparse = 1; } else if ( argc == 3) { /*./Program N M -> Generate random NxM matrix */ global_nm[0]=atoi(argv[1]); global_nm[1]=atoi(argv[2]); } else error("Too many Arguments"); /* Padd N if needed */ local_nm[1]=global_nm[1]; global_padded_nm[1]=global_nm[1]; if (global_nm[0]%size==0) { local_nm[0]=global_nm[0]/size; global_padded_nm[0]=global_nm[0]; } else { local_nm[0]=(global_nm[0]/size)+1; global_padded_nm[0]=local_nm[0]*size; } x = (double *) malloc(global_padded_nm[1] * sizeof(*x)); if (rank==0) { M = (double *) malloc(global_padded_nm[0] * global_padded_nm[1] * sizeof(*M)); vec_init_rand(x, global_padded_nm[1], 1.0); y = (double *) malloc(global_padded_nm[0] * sizeof(*y)); vec_init(y, global_padded_nm[0], 0.0); if( !y || !x || !M ) error("memory allocation failed"); /* Initialize matrices */ if (sparse) { ; //regenerate_matrix_coo(M, I, cooCol, cooVal, n, m, n_z); /* Sparse matrices read from .mtx format */ } else ser_matrix_init_rand_p(M, global_nm[0], global_nm[1], global_padded_nm[1] * (global_padded_nm[0] - global_nm[0]), 1.0); /* Normal matrices generated randomly */ } local_y = (double *) malloc(local_nm[0] * sizeof(*local_y)); vec_init(local_y, local_nm[0], 0.0); //if(rank==0) printf("Local[0]=%d Local[1]=%d global_padded[0]=%d global_padded[1]=%d\n",local[0],local[1],global_padded[0],global_padded[1]); A = (double *) malloc(local_nm[0] * local_nm[1] * sizeof(*A)); #pragma omp parallel for schedule(static) /* Initialize data for each thread in corresponding socket with first-touch policy */ for( i=0 ; i