/*includes {{{*/ #include #include #include #include #define MIC_TARGET #include "../../common/mmio.h" #include "../../common/util.h" #include "../../common/csr.h" /*}}}*/ /* global varibles and defines {{{*/ int nrepeat = 100; char transa = 'n'; /*}}}*/ /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ MKL_INT* CJ = NULL; double* Cval = NULL; MKL_INT sort = 3; // sort everything MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr MKL_INT ierr; MKL_INT request = 1; // symbolic multiplication mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); request = 2; //numeric multiplication int Cnnz = CI[Am]-1; int Cval_size = Cnnz + 1; CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); double time_st = dsecnd(); int i; for(i = 0; i < nrepeat; i++) { mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); } double time_end = dsecnd(); *time = (time_end - time_st)/nrepeat; *pCval = Cval; *pCJ = CJ; *pCI = CI; } /*}}}*/ int main(int argc, char* argv[]) { /*{{{*/ /** usage */ int nrequired_args = 6; if (argc != nrequired_args){ fprintf(stderr, "NAME:\n\tmkl_spgemm - multiply two sparse matrices\n"); fprintf(stderr, "\nSYNOPSIS:\n"); fprintf(stderr, "\tmkl_spgemm MATRIX_A MATRIX_B MATRIX_C NUMBER_OF_THREADS PRINT_MATRICES\n"); fprintf(stderr, "\nDESCRIPTION:\n"); fprintf(stderr, "\tNUMBER_OF_THREADS: {0,1,2,...}\n"); fprintf(stderr, "\t\t0: Use number of threads determined by MKL\n"); fprintf(stderr, "\tPRINT_MATRICES: PRINT_YES, PRINT_NO\n"); fprintf(stderr, "\nSAMPLE EXECUTION:\n"); fprintf(stderr, "\t%s test.mtx test.mtx out.mtx 2 PRINT_YES\n", argv[0]); exit(1); } /** parse arguments */ int iarg = 1; char* strpathA = argv[iarg]; iarg++; char* strpathB = argv[iarg]; iarg++; char* strpathC = argv[iarg]; iarg++; int nthreads = atoi(argv[iarg]); iarg++; if (nthreads > 0) { mkl_set_num_threads(nthreads); } else { nthreads = mkl_get_max_threads(); } option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; assert(nrequired_args == iarg); /** read matrix market file for A matrix */ MKL_INT Am, An, Annz; MKL_INT *Ax, *Ay; double *Anz; read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); /** construct csr storage for A matrix */ MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); /** read matrix market file for B matrix */ MKL_INT Bm, Bn, Bnnz; MKL_INT *Bx, *By; double *Bnz; read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); /** construct csr storage for B matrix */ MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); /** multiply two matrices */ double* Cval, time; MKL_INT* CJ; MKL_INT* CI; mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); int i; for(i=0;i<=Am;i++)AI[i]--; for(i=0;i