Newer
Older
/*includes {{{*/
#include <stdio.h>
#include <assert.h>
#include <math.h>
#define MIC_TARGET
#include "../../common/mmio.h"
#include "../../common/util.h"
#include "../../common/csr.h"
/** Multiply two sparse matrices which are stored in CSR format. MKL is used */
void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/
MKL_INT* CJ = NULL;
double* Cval = NULL;
MKL_INT sort = 3; // sort everything
MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 );
MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr
MKL_INT ierr;
MKL_INT request = 1; // symbolic multiplication
mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
request = 2; //numeric multiplication
int Cnnz = CI[Am]-1;
int Cval_size = Cnnz + 1;
CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 );
Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 );
double time_st = dsecnd();
int i;
for(i = 0; i < nrepeat; i++) {
mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
double time_end = dsecnd();
*time = (time_end - time_st)/nrepeat;
*pCval = Cval; *pCJ = CJ; *pCI = CI;
} /*}}}*/
/** usage */
int nrequired_args = 6;
if (argc != nrequired_args){
fprintf(stderr, "NAME:\n\tmkl_spgemm - multiply two sparse matrices\n");
fprintf(stderr, "\nSYNOPSIS:\n");
fprintf(stderr, "\tmkl_spgemm MATRIX_A MATRIX_B MATRIX_C NUMBER_OF_THREADS PRINT_MATRICES\n");
fprintf(stderr, "\nDESCRIPTION:\n");
fprintf(stderr, "\tNUMBER_OF_THREADS: {0,1,2,...}\n");
fprintf(stderr, "\t\t0: Use number of threads determined by MKL\n");
fprintf(stderr, "\tPRINT_MATRICES: PRINT_YES, PRINT_NO\n");
fprintf(stderr, "\nSAMPLE EXECUTION:\n");
fprintf(stderr, "\t%s test.mtx test.mtx out.mtx 2 PRINT_YES\n", argv[0]);
exit(1);
/** parse arguments */
int iarg = 1;
char* strpathA = argv[iarg]; iarg++;
char* strpathB = argv[iarg]; iarg++;
char* strpathC = argv[iarg]; iarg++;
int nthreads = atoi(argv[iarg]); iarg++;
if (nthreads > 0) {
mkl_set_num_threads(nthreads);
} else {
nthreads = mkl_get_max_threads();
option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++;
assert(nrequired_args == iarg);
/** read matrix market file for A matrix */
MKL_INT Am, An, Annz;
MKL_INT *Ax, *Ay;
double *Anz;
read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz);
/** construct csr storage for A matrix */
MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 );
MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 );
double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 );
coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);
/** read matrix market file for B matrix */
MKL_INT Bm, Bn, Bnnz;
MKL_INT *Bx, *By;
double *Bnz;
read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz);
/** construct csr storage for B matrix */
MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 );
MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 );
double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 );
coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);
/** multiply two matrices */
double* Cval, time;
MKL_INT* CJ; MKL_INT* CI;
mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time);
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
for(i=0;i<=Am;i++)AI[i]--; for(i=0;i<Annz;i++)AJ[i]--;
for(i=0;i<=Bm;i++)BI[i]--; for(i=0;i<Bnnz;i++)BJ[i]--;
printmm_one(Am, Cval, CJ, CI);
/** In order to write the output C matrix to file, uncomment the following line */
printfilemm_one(strpathC, Am, Bn, Cval, CJ, CI);
/** run my SpGEMM routine in order to find number of multiply-and-add operations */
long nummult = 0;
csi* xb = (csi*)calloc(Bn, sizeof(csi));
csv* x = (csv*)calloc(Bn, sizeof(csv));
csr* C = csr_multiply(Am, An, Annz, AI, AJ, Aval, Bm, Bn, Bnnz, BI, BJ, Bval, &nummult, xb, x);
double gflop = 2 * (double) nummult / 1e9;
/** print gflop per second and time */
printf("%d\t", nthreads);
printf("%g\t", (gflop/time));
printf("%g\t", time);
printf("%s\t", strpathA);
printf("%s\t", strpathB);
printf("%s\t", strpathC);
printf("\n");
/** free allocated space */
mkl_free(AI);
mkl_free(AJ);
mkl_free(Aval);
mkl_free(BI);
mkl_free(BJ);
mkl_free(Bval);
return 0;
} /* ENDOF main }}}*/