Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <mpi.h>
#include "/users/guest/petyros/Training/External_Functions/matrix_op.h"
#include "/users/guest/petyros/Training/External_Functions/util.h"
#include "/users/guest/petyros/Training/External_Functions/input.h"
int main(int argc, char ** argv) {
int rank,size;
int global_nm[2],local_nm[2]; //global matrix dimensions and local matrix dimensions (2D-domain, 2D-subdomain)
int global_padded_nm[2]; //padded global matrix dimensions (if padding is not needed, global_padded=global)
int i,j,k, sparse=0, *cooCol, n_z, *I;
MPI_Datatype dummy; //dummy datatype used to align user-defined datatypes in memory
double * M, * A, * x, * y, *local_y, * cooVal, comm_t, comp_t; //Global matrix, local current and previous matrices, pointer to swap between current and previous
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
/* Read n,m from stdin */
if (argc < 2) error("Too few Arguments");
else if ( argc == 2) /* ./Program Input_File */
{
if(!mtx_read(&I, &cooCol, &cooVal, &global_nm[0], &global_nm[1], &n_z, argv[1])) error("input and/or COO convertion failed");
sparse = 1;
}
else if ( argc == 3) { /*./Program N M */
global_nm[0]=atoi(argv[1]);
global_nm[1]=atoi(argv[2]);
}
else error("Too many Arguments");
/* Padd n if needed */
local_nm[1]=global_nm[1];
global_padded_nm[1]=global_nm[1];
if (global_nm[0]%size==0) {
local_nm[0]=global_nm[0]/size;
global_padded_nm[0]=global_nm[0];
}
else {
local_nm[0]=(global_nm[0]/size)+1;
global_padded_nm[0]=local_nm[0]*size;
}
x = (double *) malloc(global_padded_nm[1] * sizeof(*x));
if (rank==0) {
M = (double *) malloc(global_padded_nm[0] * global_padded_nm[1] * sizeof(*M));
vec_init_rand(x, global_padded_nm[1], 1.0);
y = (double *) malloc(global_padded_nm[0] * sizeof(*y));
vec_init(y, global_padded_nm[0], 0.0);
if( !y || !x || !M ) error("memory allocation failed");
/* Initialize matrices */
if (sparse) {
; //regenerate_matrix_coo(M, I, cooCol, cooVal, n, m, n_z); /* Sparse matrices read from .mtx format */
}
else ser_matrix_init_rand_p(M, global_nm[0], global_nm[1], global_padded_nm[1] * (global_padded_nm[0] - global_nm[0]), 1.0); /* Normal matrices generated randomly */
}
local_y = (double *) malloc(local_nm[0] * sizeof(*local_y));
vec_init(local_y, local_nm[0], 0.0);
//if(rank==0) printf("Local[0]=%d Local[1]=%d global_padded[0]=%d global_padded[1]=%d\n",local[0],local[1],global_padded[0],global_padded[1]);
A = (double *) malloc(local_nm[0] * local_nm[1] * sizeof(*A));
#pragma omp parallel for schedule(static) /* Initialize data for each thread in corresponding socket with first-touch policy */
for( i=0 ; i<local_nm[0] ; ++i){
for ( j=0 ; j< local_nm[1] ; ++j) A[i*local_nm[1]+j]=0.0;
//printf( "Initialize data Thread=%d i=%d\n", omp_get_thread_num(), i);
}
//----Rank 0 scatters the global matrix----//
double * gsendbuf;
if (rank == 0){
gsendbuf = &(M[0]);
comm_t= MPI_Wtime();
}
MPI_Scatter(gsendbuf, local_nm[1] * local_nm[0], MPI_DOUBLE, A, local_nm[1] * local_nm[0], MPI_DOUBLE, 0, MPI_COMM_WORLD);
//if (rank == 0) printf("Scatter complete\n");
MPI_Bcast(x, global_padded_nm[1], MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (rank==0) {
printf("MPI Version(N=%d, M=%d, Tasks=%d, Nodes=%s, Tasks/Node=%s, threads=%s): ", global_nm[0], global_nm[1], size,
getenv("SLURM_JOB_NUM_NODES"), getenv("SLURM_NTASKS_PER_NODE"), getenv("OMP_NUM_THREADS"));
comp_t= MPI_Wtime();
}
for (i = 0; i < NR_ITER; ++i){
register double yi = 0;
#pragma omp parallel for private(j,yi) shared(local_nm,A,local_y) schedule(static)
for (k = 0; k < local_nm[0]; ++k) {
//printf( "Compute Thread=%d i=%d\n", omp_get_thread_num(), k);
yi = 0.0;
for (j = 0; j < local_nm[1]; ++j) yi += A[k*local_nm[1]+j]*x[j];
local_y[k] = yi;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (rank==0) comp_t= MPI_Wtime() - comp_t;
MPI_Gather(local_y, local_nm[0], MPI_DOUBLE, y, local_nm[0], MPI_DOUBLE, 0, MPI_COMM_WORLD);
//MPI_Reduce(local_y, y, global_nm[0], MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank==0) comm_t = MPI_Wtime() - comm_t - comp_t;
#ifdef _DEBUG_ /* Output y in a file for debug purposes */
if (rank == 0) {
FILE * fp;
char * filename = "/users/guest/petyros/Training/Outputs/Debug/MPI-OpenMP.out" ;
if(( fp = fopen( filename, "w")) == NULL) error("Output file creation failed\n");
for (k = 0; k < global_nm[0]; ++k) fprintf(fp, "%lf ", y[k]) ;
fclose(fp) ;
#endif
report_mpi_results(comm_t, comp_t);
free(M);
free(y);
}
free(x);
free(local_y);
free(A);
MPI_Finalize();
return 0;
}