#include #include #include #include #include #include "/users/guest/petyros/Training/External_Functions/matrix_op.h" #include "/users/guest/petyros/Training/External_Functions/util.h" #include "/users/guest/petyros/Training/External_Functions/input.h" int main(int argc, char ** argv) { int rank,size; int global_nm[2],local_nm[2]; //global matrix dimensions and local matrix dimensions (2D-domain, 2D-subdomain) int global_padded_nm[2]; //padded global matrix dimensions (if padding is not needed, global_padded=global) int i,j,k, sparse=0, *cooCol, n_z, *I; double * M, * A, * x, * y, *local_y, * cooVal, comm_t, comp_t; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (argc < 2) error("Too few Arguments"); else if ( argc == 2) /* ./Program Input_File -> File Input to COO */ { if(!mtx_read(&I, &cooCol, &cooVal, &global_nm[0], &global_nm[1], &n_z, argv[1])) error("input and/or COO convertion failed"); sparse = 1; } else if ( argc == 3) { /*./Program N M -> Generate random NxM matrix */ global_nm[0]=atoi(argv[1]); global_nm[1]=atoi(argv[2]); } else error("Too many Arguments"); /* Padd N if needed */ local_nm[1]=global_nm[1]; global_padded_nm[1]=global_nm[1]; if (global_nm[0]%size==0) { local_nm[0]=global_nm[0]/size; global_padded_nm[0]=global_nm[0]; } else { local_nm[0]=(global_nm[0]/size)+1; global_padded_nm[0]=local_nm[0]*size; } x = (double *) malloc(global_padded_nm[1] * sizeof(*x)); if (rank==0) { M = (double *) malloc(global_padded_nm[0] * global_padded_nm[1] * sizeof(*M)); vec_init_rand(x, global_padded_nm[1], 1.0); y = (double *) malloc(global_padded_nm[0] * sizeof(*y)); vec_init(y, global_padded_nm[0], 0.0); if( !y || !x || !M ) error("memory allocation failed"); /* Initialize matrices */ if (sparse) { ; //regenerate_matrix_coo(M, I, cooCol, cooVal, n, m, n_z); /* Sparse matrices read from .mtx format */ } else ser_matrix_init_rand_p(M, global_nm[0], global_nm[1], global_padded_nm[1] * (global_padded_nm[0] - global_nm[0]), 1.0); /* Normal matrices generated randomly */ } local_y = (double *) malloc(local_nm[0] * sizeof(*local_y)); vec_init(local_y, local_nm[0], 0.0); //if(rank==0) printf("Local[0]=%d Local[1]=%d global_padded[0]=%d global_padded[1]=%d\n",local[0],local[1],global_padded[0],global_padded[1]); A = (double *) malloc(local_nm[0] * local_nm[1] * sizeof(*M)); /* Rank 0 scatters the global matrix */ double * gsendbuf; if (rank == 0){ gsendbuf = &(M[0]); comm_t= MPI_Wtime(); } MPI_Scatter(gsendbuf, local_nm[1] * local_nm[0], MPI_DOUBLE, A, local_nm[1] * local_nm[0], MPI_DOUBLE, 0, MPI_COMM_WORLD); //if (rank == 0) printf("Scatter complete\n"); MPI_Bcast(x, global_padded_nm[1], MPI_DOUBLE, 0, MPI_COMM_WORLD); if (rank==0) { printf("MPI Version(N=%d, M=%d, Tasks=%d, Nodes=%s, Tasks/Node=%s): ", global_nm[0], global_nm[1], size, getenv("SLURM_JOB_NUM_NODES"), getenv("SLURM_NTASKS_PER_NODE")); comp_t= MPI_Wtime(); } for (i = 0; i < NR_ITER; ++i){ register double yi = 0; for (k = 0; k < local_nm[0]; ++k) { //printf( "Compute Thread=%d i=%d\n", omp_get_thread_num(), k); yi = 0.0; for (j = 0; j < local_nm[1]; ++j) yi += A[k*local_nm[1]+j]*x[j]; local_y[k] = yi; } } MPI_Barrier(MPI_COMM_WORLD); if (rank==0) comp_t= MPI_Wtime() - comp_t; MPI_Gather(local_y, local_nm[0], MPI_DOUBLE, y, local_nm[0], MPI_DOUBLE, 0, MPI_COMM_WORLD); if (rank==0) comm_t = MPI_Wtime() - comm_t - comp_t; #ifdef _DEBUG_ /* Output y vector to a file for debugging */ if (rank == 0) { FILE * fp; char * filename = "/users/guest/petyros/Training/Outputs/Debug/MPI.out" ; if(( fp = fopen( filename, "w")) == NULL) error("Output file creation failed\n"); for (k = 0; k < global_nm[0]; ++k) fprintf(fp, "%lf ", y[k]) ; fclose(fp) ; #endif report_mpi_results(comm_t, comp_t); free(M); free(y); } free(x); free(local_y); free(A); MPI_Finalize(); return 0; }