/*includes {{{*/ #include #include #include #include #include "mmio.h" /*}}}*/ /* global varibles and defines {{{*/ int nrepeat = 100; int option_print_matrices = 0; #define OPTION_NOPRINT_MATRICES 0 #define OPTION_PRINT_MATRICES 1 char transa = 'n'; typedef int csi; typedef double csv; csv zero = 0.0; #define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) typedef struct csr_t { csi m; csi n; csi nzmax; csi nr; csi* r; csi* p; csi* j; csv* x; } csr; /*}}}*/ /* method declarations {{{*/ int main(int argc, char* argv[]); void printmm_one(int m, double* Aval, int* AJ, int* AI); void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); void printmm_zero(int m, double* Aval, int* AJ, int* AI); csr *csr_spfree(csr *A); /*}}}*/ /*csr util{{{*/ /* free workspace and return a sparse matrix result */ csr *csr_done(csr *C, void *w, void *x, csi ok) { return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ } /* wrapper for free */ void *cs_free(void *p) { if (p) free(p); /* free p if it is not already NULL */ return(NULL); /* return NULL to simplify the use of cs_free */ } /* wrapper for realloc */ void *csr_realloc(void *p, csi n, size_t size, csi *ok) { void *pnew = NULL; pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ if (pnew == NULL) { printf("%d:reallocation failed, pnew is NULL\n", __LINE__); } //printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); return((*ok) ? pnew : p); /* return original p if failure */ } /* wrapper for realloc */ void *cs_realloc(void *p, csi n, size_t size, csi *ok) { void *pnew = NULL; pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ if (pnew == NULL) { printf("reallocation failed\n"); } return((*ok) ? pnew : p); /* return original p if failure */ } /* change the max # of entries sparse matrix */ csi csr_sprealloc(csr *A, csi nzmax) { csi ok, oki = 0, okj = 1, okx = 1; if (!A) return(0); if (nzmax <= 0) nzmax = A->p[A->m]; A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); if (A->x) A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); ok = (oki && okj && okx); if (ok) A->nzmax = nzmax; return(ok); } /* free a sparse matrix */ csr *csr_spfree(csr *A) { if (!A) return(NULL); /* do nothing if A already NULL */ cs_free(A->p); A->p = NULL; cs_free(A->j); A->j = NULL; cs_free(A->x); A->x = NULL; cs_free(A->r); A->r = NULL; cs_free(A); /* free the cs struct and return NULL */ return NULL; } /* allocate a sparse matrix (triplet form or compressed-ROW form) */ csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) { csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ if (!A) { perror("sparse allocation failed"); return(NULL); /* out of memory */ } A->m = m; /* define dimensions and nzmax */ A->n = n; A->nzmax = nzmax = CS_MAX(nzmax, 0); A->nr = 0; // number of nonzero rows A->p = (csi*)calloc(m + 2, sizeof(csi)); A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); }/*}}}*/ /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/ csv tf = 0; csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, bnz, values = 1; csv *Cx; csr *C; if (An != Bm) return(NULL); if (Anzmax == 0 || Bnzmax == 0) { C = csr_spalloc(Am, Bn, 0, values, 0, tf); return C; } m = Am; anz = Ap[Am]; n = Bn; bnz = Bp[Bm]; for(i = 0; i < n; i++) xb[i] = 0; for(i = 0; i < n; i++) xb[i] = 0; values = (Ax != NULL) && (Bx != NULL); csi tnz = (anz + bnz) * 2; C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ if (!C || !xb || (values && !x)) return (csr_done(C, xb, x, 0)); Cp = C->p; for (i = 0; i < m; i++) { if ( ( (nz + n) > C->nzmax ) ) { if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { return (csr_done(C, xb, x, 0)); // out of memory } else { } } Cj = C->j; Cx = C->x; /* C->j and C->x may be reallocated */ Cp[i] = nz; /* row i of C starts here */ for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { j = Aj[jp]; for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { k = Bj[kp]; /* B(i,j) is nonzero */ if (xb[k] != i + 1) { xb[k] = i + 1; /* i is new entry in column j */ Cj[nz++] = k; /* add i to pattern of C(:,j) */ if (x) { x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ (*nummult)++; } } else if (x) { x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ (*nummult)++; } } } if (values) for (p = Cp[i]; p < nz; p++) Cx[p] = x[Cj[p]]; } Cp[m] = nz; /* finalize the last row of C */ csr_sprealloc(C, 0); /* remove extra space from C */ xb = NULL; x = NULL; return C; }/*}}}*/ /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ MKL_INT* CJ = NULL; double* Cval = NULL; MKL_INT sort = 3; // sort everything MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr MKL_INT ierr; MKL_INT request = 1; // symbolic multiplication mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); request = 2; //numeric multiplication int Cnnz = CI[Am]-1; int Cval_size = Cnnz + 1; CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); double time_st = dsecnd(); int i; for(i = 0; i < nrepeat; i++) { mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); } double time_end = dsecnd(); *time = (time_end - time_st)/nrepeat; *pCval = Cval; *pCJ = CJ; *pCI = CI; } /*}}}*/ /** Read Matrix Market file into COO matrix */ void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ /* * taken from Matrix Market I/O library for ANSI C * * See http://math.nist.gov/MatrixMarket for details. * * */ int i, M, N, nz, *I, *J; double* val; int ret_code; MM_typecode matcode; FILE* f; if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} /* READ MATRIX */ if (mm_read_banner(f, &matcode) != 0) { printf("Could not process Matrix Market banner.\n"); exit(1); } /* This is how one can screen matrix types if their application */ /* only supports a subset of the Matrix Market data types. */ if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { printf("Sorry, this application does not support "); printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); } /* find out size of sparse matrix .... */ if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); /* reseve memory for matrices */ I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); *pI = I; *pJ = J; *pval = val; /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ int realnnz = 0; for (i=0; i 0) { mkl_set_num_threads(nthreads); } else { nthreads = mkl_get_max_threads(); } option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; assert(nrequired_args == iarg); /** read matrix market file for A matrix */ MKL_INT Am, An, Annz; MKL_INT *Ax, *Ay; double *Anz; read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); /** construct csr storage for A matrix */ MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); /** read matrix market file for B matrix */ MKL_INT Bm, Bn, Bnnz; MKL_INT *Bx, *By; double *Bnz; read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); /** construct csr storage for B matrix */ MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); /** multiply two matrices */ double* Cval, time; MKL_INT* CJ; MKL_INT* CI; mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); int i; for(i=0;i<=Am;i++)AI[i]--; for(i=0;i