From 7f93d0b792de6cf3468a4b92b2bbbc66171d14e3 Mon Sep 17 00:00:00 2001 From: "tempy@maggie" Date: Fri, 29 Jan 2016 08:40:56 +0200 Subject: [PATCH] Common functions moved to one place --- common/csr.h | 173 ++++++ spgemm/mkl_shmem/mmio.c => common/mmio.h | 186 +++++- common/util.h | 59 ++ spgemm/mkl_shmem/CMakeLists.txt | 2 +- spgemm/mkl_shmem/mklspgemm.c | 518 ++++------------- spgemm/mkl_shmem/mmio.h | 133 ----- spgemm/mkl_xphi/CMakeLists.txt | 2 +- spgemm/mkl_xphi/mklspgemm.c | 695 +++++++---------------- spgemm/mkl_xphi/mmio.c | 511 ----------------- spgemm/mkl_xphi/mmio.h | 133 ----- spmv/mkl_shmem/CMakeLists.txt | 2 +- spmv/mkl_shmem/mklspmv.c | 289 +++------- spmv/mkl_shmem/mmio.c | 511 ----------------- spmv/mkl_shmem/mmio.h | 133 ----- 14 files changed, 816 insertions(+), 2531 deletions(-) create mode 100644 common/csr.h rename spgemm/mkl_shmem/mmio.c => common/mmio.h (67%) create mode 100644 common/util.h delete mode 100644 spgemm/mkl_shmem/mmio.h delete mode 100644 spgemm/mkl_xphi/mmio.c delete mode 100644 spgemm/mkl_xphi/mmio.h delete mode 100644 spmv/mkl_shmem/mmio.c delete mode 100644 spmv/mkl_shmem/mmio.h diff --git a/common/csr.h b/common/csr.h new file mode 100644 index 0000000..6208633 --- /dev/null +++ b/common/csr.h @@ -0,0 +1,173 @@ +#ifndef CSR_H +#define CSR_H + +typedef int csi; +typedef double csv; +csv zero = 0.0; +#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) + +typedef struct csr_t { + csi m; + csi n; + csi nzmax; + csi nr; + csi* r; + csi* p; + csi* j; + csv* x; +} csr; +/* method declarations {{{*/ +csr *csr_spfree(csr *A); +/*}}}*/ + +/*csr util{{{*/ + +/* free workspace and return a sparse matrix result */ +csr *csr_done(csr *C, void *w, void *x, csi ok) { + return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ +} + +/* wrapper for free */ +void *cs_free(void *p) { + if (p) + free(p); /* free p if it is not already NULL */ + return(NULL); /* return NULL to simplify the use of cs_free */ +} + +/* wrapper for realloc */ +void *csr_realloc(void *p, csi n, size_t size, csi *ok) { + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("%d:reallocation failed, pnew is NULL\n", __LINE__); + } + return((*ok) ? pnew : p); /* return original p if failure */ +} + +/* wrapper for realloc */ +void *cs_realloc(void *p, csi n, size_t size, csi *ok) { + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("reallocation failed\n"); + } + return((*ok) ? pnew : p); /* return original p if failure */ +} + +/* change the max # of entries sparse matrix */ +csi csr_sprealloc(csr *A, csi nzmax) { + csi ok, oki = 0, okj = 1, okx = 1; + if (!A) + return(0); + if (nzmax <= 0) + nzmax = A->p[A->m]; + A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); + if (A->x) + A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); + ok = (oki && okj && okx); + if (ok) + A->nzmax = nzmax; + return(ok); +} + +/* free a sparse matrix */ +csr *csr_spfree(csr *A) { + if (!A) + return(NULL); /* do nothing if A already NULL */ + cs_free(A->p); + A->p = NULL; + cs_free(A->j); + A->j = NULL; + cs_free(A->x); + A->x = NULL; + cs_free(A->r); + A->r = NULL; + cs_free(A); /* free the cs struct and return NULL */ + return NULL; +} + +/* allocate a sparse matrix (triplet form or compressed-ROW form) */ +csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) { + csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ + if (!A) { + perror("sparse allocation failed"); + return(NULL); /* out of memory */ + } + A->m = m; /* define dimensions and nzmax */ + A->n = n; + A->nzmax = nzmax = CS_MAX(nzmax, 0); + A->nr = 0; // number of nonzero rows + A->p = (csi*)calloc(m + 2, sizeof(csi)); + A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); + A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); + return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); +}/*}}}*/ + +/** Multiply two sparse matrices which are stored in CSR format. MKL is used */ +csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/ + csv tf = 0; + csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, + bnz, values = 1; + csv *Cx; + csr *C; + if (An != Bm) + return(NULL); + if (Anzmax == 0 || Bnzmax == 0) { + C = csr_spalloc(Am, Bn, 0, values, 0, tf); + return C; + } + m = Am; + anz = Ap[Am]; + n = Bn; + bnz = Bp[Bm]; + for(i = 0; i < n; i++) xb[i] = 0; + for(i = 0; i < n; i++) + xb[i] = 0; + values = (Ax != NULL) && (Bx != NULL); + csi tnz = (anz + bnz) * 2; + C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ + if (!C || !xb || (values && !x)) + return (csr_done(C, xb, x, 0)); + Cp = C->p; + for (i = 0; i < m; i++) { + if ( ( (nz + n) > C->nzmax ) ) { + if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { + return (csr_done(C, xb, x, 0)); // out of memory + } else { + } + } + Cj = C->j; + Cx = C->x; /* C->j and C->x may be reallocated */ + Cp[i] = nz; /* row i of C starts here */ + for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { + j = Aj[jp]; + for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { + k = Bj[kp]; /* B(i,j) is nonzero */ + if (xb[k] != i + 1) { + xb[k] = i + 1; /* i is new entry in column j */ + Cj[nz++] = k; /* add i to pattern of C(:,j) */ + if (x) { + x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ + (*nummult)++; + } + } else if (x) { + x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ + (*nummult)++; + } + } + } + if (values) + for (p = Cp[i]; p < nz; p++) + Cx[p] = x[Cj[p]]; + } + Cp[m] = nz; /* finalize the last row of C */ + csr_sprealloc(C, 0); /* remove extra space from C */ + xb = NULL; + x = NULL; + return C; +}/*}}}*/ + +#endif + diff --git a/spgemm/mkl_shmem/mmio.c b/common/mmio.h similarity index 67% rename from spgemm/mkl_shmem/mmio.c rename to common/mmio.h index c250ff2..d306433 100644 --- a/spgemm/mkl_shmem/mmio.c +++ b/common/mmio.h @@ -6,14 +6,133 @@ * */ +#ifndef MM_IO_H +#define MM_IO_H + +#define MM_MAX_LINE_LENGTH 1025 +#define MatrixMarketBanner "%%MatrixMarket" +#define MM_MAX_TOKEN_LENGTH 64 + +typedef char MM_typecode[4]; + +char *mm_typecode_to_str(MM_typecode matcode); + +int mm_read_banner(FILE *f, MM_typecode *matcode); +int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz); +int mm_read_mtx_array_size(FILE *f, int *M, int *N); + +int mm_write_banner(FILE *f, MM_typecode matcode); +int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz); +int mm_write_mtx_array_size(FILE *f, int M, int N); + + +/********************* MM_typecode query fucntions ***************************/ + +#define mm_is_matrix(typecode) ((typecode)[0]=='M') + +#define mm_is_sparse(typecode) ((typecode)[1]=='C') +#define mm_is_coordinate(typecode)((typecode)[1]=='C') +#define mm_is_dense(typecode) ((typecode)[1]=='A') +#define mm_is_array(typecode) ((typecode)[1]=='A') + +#define mm_is_complex(typecode) ((typecode)[2]=='C') +#define mm_is_real(typecode) ((typecode)[2]=='R') +#define mm_is_pattern(typecode) ((typecode)[2]=='P') +#define mm_is_integer(typecode) ((typecode)[2]=='I') + +#define mm_is_symmetric(typecode)((typecode)[3]=='S') +#define mm_is_general(typecode) ((typecode)[3]=='G') +#define mm_is_skew(typecode) ((typecode)[3]=='K') +#define mm_is_hermitian(typecode)((typecode)[3]=='H') + +int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ + + +/********************* MM_typecode modify fucntions ***************************/ + +#define mm_set_matrix(typecode) ((*typecode)[0]='M') +#define mm_set_coordinate(typecode) ((*typecode)[1]='C') +#define mm_set_array(typecode) ((*typecode)[1]='A') +#define mm_set_dense(typecode) mm_set_array(typecode) +#define mm_set_sparse(typecode) mm_set_coordinate(typecode) + +#define mm_set_complex(typecode)((*typecode)[2]='C') +#define mm_set_real(typecode) ((*typecode)[2]='R') +#define mm_set_pattern(typecode)((*typecode)[2]='P') +#define mm_set_integer(typecode)((*typecode)[2]='I') + + +#define mm_set_symmetric(typecode)((*typecode)[3]='S') +#define mm_set_general(typecode)((*typecode)[3]='G') +#define mm_set_skew(typecode) ((*typecode)[3]='K') +#define mm_set_hermitian(typecode)((*typecode)[3]='H') + +#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ + (*typecode)[2]=' ',(*typecode)[3]='G') + +#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) + + +/********************* Matrix Market error codes ***************************/ + + +#define MM_COULD_NOT_READ_FILE 11 +#define MM_PREMATURE_EOF 12 +#define MM_NOT_MTX 13 +#define MM_NO_HEADER 14 +#define MM_UNSUPPORTED_TYPE 15 +#define MM_LINE_TOO_LONG 16 +#define MM_COULD_NOT_WRITE_FILE 17 + + +/******************** Matrix Market internal definitions ******************** + + MM_matrix_typecode: 4-character sequence + + ojbect sparse/ data storage + dense type scheme + + string position: [0] [1] [2] [3] + + Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) + A(array) C(omplex) H(ermitian) + P(attern) S(ymmetric) + I(nteger) K(kew) + + ***********************************************************************/ + +#define MM_MTX_STR "matrix" +#define MM_ARRAY_STR "array" +#define MM_DENSE_STR "array" +#define MM_COORDINATE_STR "coordinate" +#define MM_SPARSE_STR "coordinate" +#define MM_COMPLEX_STR "complex" +#define MM_REAL_STR "real" +#define MM_INT_STR "integer" +#define MM_GENERAL_STR "general" +#define MM_SYMM_STR "symmetric" +#define MM_HERM_STR "hermitian" +#define MM_SKEW_STR "skew-symmetric" +#define MM_PATTERN_STR "pattern" + + +/* high level routines */ + +int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img, + MM_typecode matcode); + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_); #include #include #include #include -#include "mmio.h" - int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, double **val_, int **I_, int **J_) { @@ -509,3 +628,66 @@ char *mm_typecode_to_str(MM_typecode matcode) return mm_strdup(buffer); } +/** Read Matrix Market file into COO matrix */ +void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ +/* +* taken from Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + +int i, M, N, nz, *I, *J; +double* val; +int ret_code; +MM_typecode matcode; +FILE* f; +if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} +/* READ MATRIX */ +if (mm_read_banner(f, &matcode) != 0) { + printf("Could not process Matrix Market banner.\n"); exit(1); +} +/* This is how one can screen matrix types if their application */ +/* only supports a subset of the Matrix Market data types. */ +if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); +} +/* find out size of sparse matrix .... */ +if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); +/* reseve memory for matrices */ +I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); +*pI = I; +*pJ = J; +*pval = val; +/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ +/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ +/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ +int realnnz = 0; +for (i=0; i #include #include -#include "mmio.h" +#define MIC_TARGET +#include "../../common/mmio.h" +#include "../../common/util.h" +#include "../../common/csr.h" /*}}}*/ /* global varibles and defines {{{*/ int nrepeat = 100; -int option_print_matrices = 0; -#define OPTION_NOPRINT_MATRICES 0 -#define OPTION_PRINT_MATRICES 1 char transa = 'n'; -typedef int csi; -typedef double csv; -csv zero = 0.0; -#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) - -typedef struct csr_t { - csi m; - csi n; - csi nzmax; - csi nr; - csi* r; - csi* p; - csi* j; - csv* x; -} csr; -/*}}}*/ - -/* method declarations {{{*/ -int main(int argc, char* argv[]); -void printmm_one(int m, double* Aval, int* AJ, int* AI); -void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); -void printmm_zero(int m, double* Aval, int* AJ, int* AI); -csr *csr_spfree(csr *A); /*}}}*/ -/*csr util{{{*/ - -/* free workspace and return a sparse matrix result */ -csr *csr_done(csr *C, void *w, void *x, csi ok) { - return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ -} - -/* wrapper for free */ -void *cs_free(void *p) { - if (p) - free(p); /* free p if it is not already NULL */ - return(NULL); /* return NULL to simplify the use of cs_free */ -} - -/* wrapper for realloc */ -void *csr_realloc(void *p, csi n, size_t size, csi *ok) { - void *pnew = NULL; - pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ - *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ - if (pnew == NULL) { - printf("%d:reallocation failed, pnew is NULL\n", __LINE__); - } -//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); - return((*ok) ? pnew : p); /* return original p if failure */ -} - -/* wrapper for realloc */ -void *cs_realloc(void *p, csi n, size_t size, csi *ok) { - void *pnew = NULL; - pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ - *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ - if (pnew == NULL) { - printf("reallocation failed\n"); - } - return((*ok) ? pnew : p); /* return original p if failure */ -} - -/* change the max # of entries sparse matrix */ -csi csr_sprealloc(csr *A, csi nzmax) { - csi ok, oki = 0, okj = 1, okx = 1; - if (!A) - return(0); - if (nzmax <= 0) - nzmax = A->p[A->m]; - A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); - if (A->x) - A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); - ok = (oki && okj && okx); - if (ok) - A->nzmax = nzmax; - return(ok); -} - -/* free a sparse matrix */ -csr *csr_spfree(csr *A) { - if (!A) - return(NULL); /* do nothing if A already NULL */ - cs_free(A->p); - A->p = NULL; - cs_free(A->j); - A->j = NULL; - cs_free(A->x); - A->x = NULL; - cs_free(A->r); - A->r = NULL; - cs_free(A); /* free the cs struct and return NULL */ - return NULL; -} - -/* allocate a sparse matrix (triplet form or compressed-ROW form) */ -csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) { - csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ - if (!A) { - perror("sparse allocation failed"); - return(NULL); /* out of memory */ - } - A->m = m; /* define dimensions and nzmax */ - A->n = n; - A->nzmax = nzmax = CS_MAX(nzmax, 0); - A->nr = 0; // number of nonzero rows - A->p = (csi*)calloc(m + 2, sizeof(csi)); - A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); - A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); - return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); - -}/*}}}*/ - /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ -csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/ - csv tf = 0; - csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, - bnz, values = 1; - csv *Cx; - csr *C; - if (An != Bm) - return(NULL); - if (Anzmax == 0 || Bnzmax == 0) { - C = csr_spalloc(Am, Bn, 0, values, 0, tf); - return C; - } - m = Am; - anz = Ap[Am]; - n = Bn; - bnz = Bp[Bm]; - for(i = 0; i < n; i++) xb[i] = 0; - for(i = 0; i < n; i++) - xb[i] = 0; - values = (Ax != NULL) && (Bx != NULL); - csi tnz = (anz + bnz) * 2; - C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ - if (!C || !xb || (values && !x)) - return (csr_done(C, xb, x, 0)); - Cp = C->p; - for (i = 0; i < m; i++) { - if ( ( (nz + n) > C->nzmax ) ) { - if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { - return (csr_done(C, xb, x, 0)); // out of memory - } else { - } - } - Cj = C->j; - Cx = C->x; /* C->j and C->x may be reallocated */ - Cp[i] = nz; /* row i of C starts here */ - for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { - j = Aj[jp]; - for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { - k = Bj[kp]; /* B(i,j) is nonzero */ - if (xb[k] != i + 1) { - xb[k] = i + 1; /* i is new entry in column j */ - Cj[nz++] = k; /* add i to pattern of C(:,j) */ - if (x) { - x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ - (*nummult)++; - } - } else if (x) { - x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ - (*nummult)++; - } - } - } - if (values) - for (p = Cp[i]; p < nz; p++) - Cx[p] = x[Cj[p]]; - } - Cp[m] = nz; /* finalize the last row of C */ - csr_sprealloc(C, 0); /* remove extra space from C */ - xb = NULL; - x = NULL; - return C; -}/*}}}*/ - -/** Multiply two sparse matrices which are stored in CSR format. MKL is used */ -void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ - -MKL_INT* CJ = NULL; -double* Cval = NULL; -MKL_INT sort = 3; // sort everything -MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); -MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr -MKL_INT ierr; -MKL_INT request = 1; // symbolic multiplication -mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - -request = 2; //numeric multiplication -int Cnnz = CI[Am]-1; -int Cval_size = Cnnz + 1; -CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); -Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); -double time_st = dsecnd(); -int i; -for(i = 0; i < nrepeat; i++) { +void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ + MKL_INT* CJ = NULL; + double* Cval = NULL; + MKL_INT sort = 3; // sort everything + MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 1; // symbolic multiplication mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); -} -double time_end = dsecnd(); -*time = (time_end - time_st)/nrepeat; -*pCval = Cval; *pCJ = CJ; *pCI = CI; -} /*}}}*/ - -/** Read Matrix Market file into COO matrix */ -void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ -/* -* taken from Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -int i, M, N, nz, *I, *J; -double* val; -int ret_code; -MM_typecode matcode; -FILE* f; -if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} -/* READ MATRIX */ -if (mm_read_banner(f, &matcode) != 0) { - printf("Could not process Matrix Market banner.\n"); exit(1); -} -/* This is how one can screen matrix types if their application */ -/* only supports a subset of the Matrix Market data types. */ -if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { - printf("Sorry, this application does not support "); - printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); -} -/* find out size of sparse matrix .... */ -if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); -/* reseve memory for matrices */ -I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); -*pI = I; -*pJ = J; -*pval = val; -/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ -/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ -/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ -int realnnz = 0; -for (i=0; i 0) { - mkl_set_num_threads(nthreads); -} else { - nthreads = mkl_get_max_threads(); -} -option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; -assert(nrequired_args == iarg); - -/** read matrix market file for A matrix */ -MKL_INT Am, An, Annz; -MKL_INT *Ax, *Ay; -double *Anz; -read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - -/** construct csr storage for A matrix */ -MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); -MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); -double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); -coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - -/** read matrix market file for B matrix */ -MKL_INT Bm, Bn, Bnnz; -MKL_INT *Bx, *By; -double *Bnz; -read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); - -/** construct csr storage for B matrix */ -MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); -MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); -double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); -coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); - -/** multiply two matrices */ -double* Cval, time; -MKL_INT* CJ; MKL_INT* CI; -mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); - -int i; -for(i=0;i<=Am;i++)AI[i]--; for(i=0;i 0) { + mkl_set_num_threads(nthreads); + } else { + nthreads = mkl_get_max_threads(); } + option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; + assert(nrequired_args == iarg); + + /** read matrix market file for A matrix */ + MKL_INT Am, An, Annz; + MKL_INT *Ax, *Ay; + double *Anz; + read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); + + /** construct csr storage for A matrix */ + MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); + MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); + double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); + coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); + + /** read matrix market file for B matrix */ + MKL_INT Bm, Bn, Bnnz; + MKL_INT *Bx, *By; + double *Bnz; + read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); + + /** construct csr storage for B matrix */ + MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); + MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); + double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); + coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); + + /** multiply two matrices */ + double* Cval, time; + MKL_INT* CJ; MKL_INT* CI; + mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); + int i; - fprintf(f, "%%%%MatrixMarket matrix coordinate real general\n"); - fprintf(f, "%d %d %d\n", m, n, AI[m]-1); - for(i = 0; i < m; i++) { - int j; - for(j = AI[i]-1; j < AI[i+1]-1; j++) { - fprintf(f, "%d %d %g\n", i+1, AJ[j], Aval[j]); - } - } - fclose(f); -}//}}} + for(i=0;i<=Am;i++)AI[i]--; for(i=0;i #include #include -#include "mmio.h" +#define MIC_TARGET __declspec(target(mic)) +#include "../../common/mmio.h" +#include "../../common/util.h" +#include "../../common/csr.h" /*}}}*/ /* global varibles and defines {{{*/ int micdev = 0; __declspec(target(mic)) int nrepeat = 100; -__declspec(target(mic)) int option_print_matrices = 0; -#define OPTION_NOPRINT_MATRICES 0 -#define OPTION_PRINT_MATRICES 1 __declspec(target(mic)) char transa = 'n'; -typedef int csi; -typedef double csv; -csv zero = 0.0; -#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) __declspec(target(mic)) int nthreads; -typedef struct csr_t { - csi m; - csi n; - csi nzmax; - csi nr; - csi* r; - csi* p; - csi* j; - csv* x; -} csr; /*}}}*/ -/* method declarations {{{*/ -int main(int argc, char* argv[]); -__declspec(target(mic)) void printmm_one(int m, double* Aval, int* AJ, int* AI); -__declspec(target(mic)) void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); -__declspec(target(mic)) void printmm_zero(int m, double* Aval, int* AJ, int* AI); -csr *csr_spfree(csr *A); -/*}}}*/ - -/*csr util{{{*/ - -/* free workspace and return a sparse matrix result */ -csr *csr_done(csr *C, void *w, void *x, csi ok) { - return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ -} - -/* wrapper for free */ -void *cs_free(void *p) { - if (p) - free(p); /* free p if it is not already NULL */ - return(NULL); /* return NULL to simplify the use of cs_free */ -} - -/* wrapper for realloc */ -void *csr_realloc(void *p, csi n, size_t size, csi *ok) { - void *pnew = NULL; - pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ - *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ - if (pnew == NULL) { - printf("%d:reallocation failed, pnew is NULL\n", __LINE__); - } -//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); - return((*ok) ? pnew : p); /* return original p if failure */ -} - -/* wrapper for realloc */ -void *cs_realloc(void *p, csi n, size_t size, csi *ok) { - void *pnew = NULL; - pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ - *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ - if (pnew == NULL) { - printf("reallocation failed\n"); - } - return((*ok) ? pnew : p); /* return original p if failure */ -} - -/* change the max # of entries sparse matrix */ -csi csr_sprealloc(csr *A, csi nzmax) { - csi ok, oki = 0, okj = 1, okx = 1; - if (!A) - return(0); - if (nzmax <= 0) - nzmax = A->p[A->m]; - A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); - if (A->x) - A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); - ok = (oki && okj && okx); - if (ok) - A->nzmax = nzmax; - return(ok); -} - -/* free a sparse matrix */ -csr *csr_spfree(csr *A) { - if (!A) - return(NULL); /* do nothing if A already NULL */ - cs_free(A->p); - A->p = NULL; - cs_free(A->j); - A->j = NULL; - cs_free(A->x); - A->x = NULL; - cs_free(A->r); - A->r = NULL; - cs_free(A); /* free the cs struct and return NULL */ - return NULL; -} - -/* allocate a sparse matrix (triplet form or compressed-ROW form) */ -csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) { - csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ - if (!A) { - perror("sparse allocation failed"); - return(NULL); /* out of memory */ - } - A->m = m; /* define dimensions and nzmax */ - A->n = n; - A->nzmax = nzmax = CS_MAX(nzmax, 0); - A->nr = 0; // number of nonzero rows - A->p = (csi*)calloc(m + 2, sizeof(csi)); - A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); - A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); - return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); - -}/*}}}*/ - /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ -csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/ - csv tf = 0; - csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, - bnz, values = 1; - csv *Cx; - csr *C; - if (An != Bm) - return(NULL); - if (Anzmax == 0 || Bnzmax == 0) { - C = csr_spalloc(Am, Bn, 0, values, 0, tf); - return C; +void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) {/*{{{*/ + int i; + #pragma offload target(mic:micdev) inout(nthreads) + { + if (nthreads > 0) { + mkl_set_num_threads(nthreads); + } else { + nthreads = mkl_get_max_threads(); + } + } + #pragma offload target(mic:micdev) \ + in(transa) \ + in(Am) \ + in(An) \ + in(Bn) \ + in(Aval:length(Annz) free_if(0)) \ + in(AI:length(Am+1) free_if(0)) \ + in(AJ:length(Annz) free_if(0)) \ + in(Bval:length(Bnnz) free_if(0)) \ + in(BI:length(An+1) free_if(0)) \ + in(BJ:length(Bnnz) free_if(0)) + {} + + MKL_INT Cnnz_host = -1; + MKL_INT* CI = NULL; + MKL_INT* CJ = NULL; + double* Cval = NULL; + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 2; //numeric multiplication + MKL_INT sort = 3; // sort everything + double time_mic_symbolic_mm = 0.0; + #pragma offload target(mic:micdev) in(i) in(ierr) in(nnzmax) in(request) in(sort) in(transa) in(Am) in(An) in(Bn) \ + out(time_mic_symbolic_mm) out(Cnnz_host)\ + in(Aval:length(Annz) alloc_if(0) free_if(0)) \ + in(AI:length(Am+1) alloc_if(0) free_if(0)) \ + in(AJ:length(Annz) alloc_if(0) free_if(0)) \ + in(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + in(BI:length(An+1) alloc_if(0) free_if(0)) \ + in(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 1; // symbolic multiplication + + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + double s_initial = dsecnd(); + for(i = 0; i < nrepeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + time_mic_symbolic_mm = (dsecnd() - s_initial) / nrepeat; // seconds + + request = 2; //numeric multiplication + int Cnnz = CI[Am]-1; + int Cval_size = Cnnz + 1; Cnnz_host = Cnnz; + CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); + Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + printmm_one(Am, Cval, CJ, CI); + sort = 7; // do not sort anything + request = 2; // numeric multiplication } - m = Am; - anz = Ap[Am]; - n = Bn; - bnz = Bp[Bm]; - for(i = 0; i < n; i++) xb[i] = 0; - for(i = 0; i < n; i++) - xb[i] = 0; - values = (Ax != NULL) && (Bx != NULL); - csi tnz = (anz + bnz) * 2; - C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ - if (!C || !xb || (values && !x)) - return (csr_done(C, xb, x, 0)); - Cp = C->p; - for (i = 0; i < m; i++) { - if ( ( (nz + n) > C->nzmax ) ) { - if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { - return (csr_done(C, xb, x, 0)); // out of memory - } else { - } + double time_mic_numeric_mm = 0.0; + #pragma offload target(mic:micdev) nocopy(request) nocopy(sort) nocopy(i) nocopy(ierr) nocopy(nnzmax) nocopy(transa) nocopy(Am) nocopy(An) nocopy(Bn) \ + out(time_mic_numeric_mm) \ + nocopy(Aval:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(AI:length(Am+1) alloc_if(0) free_if(0)) \ + nocopy(AJ:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(BI:length(An+1) alloc_if(0) free_if(0)) \ + nocopy(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); } - Cj = C->j; - Cx = C->x; /* C->j and C->x may be reallocated */ - Cp[i] = nz; /* row i of C starts here */ - for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { - j = Aj[jp]; - for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { - k = Bj[kp]; /* B(i,j) is nonzero */ - if (xb[k] != i + 1) { - xb[k] = i + 1; /* i is new entry in column j */ - Cj[nz++] = k; /* add i to pattern of C(:,j) */ - if (x) { - x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ - (*nummult)++; - } - } else if (x) { - x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ - (*nummult)++; - } - } + double s_initial = dsecnd(); + for(i = 0; i < nrepeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); } - if (values) - for (p = Cp[i]; p < nz; p++) - Cx[p] = x[Cj[p]]; + time_mic_numeric_mm = (dsecnd() - s_initial) / nrepeat; // seconds } - Cp[m] = nz; /* finalize the last row of C */ - csr_sprealloc(C, 0); /* remove extra space from C */ - xb = NULL; - x = NULL; - return C; -}/*}}}*/ - -/** Multiply two sparse matrices which are stored in CSR format. MKL is used */ -void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) {/*{{{*/ - - int i; - #pragma offload target(mic:micdev) inout(nthreads) - { - if (nthreads > 0) { - mkl_set_num_threads(nthreads); - } else { - nthreads = mkl_get_max_threads(); - } - } - #pragma offload target(mic:micdev) \ - in(transa) \ - in(Am) \ - in(An) \ - in(Bn) \ - in(Aval:length(Annz) free_if(0)) \ - in(AI:length(Am+1) free_if(0)) \ - in(AJ:length(Annz) free_if(0)) \ - in(Bval:length(Bnnz) free_if(0)) \ - in(BI:length(An+1) free_if(0)) \ - in(BJ:length(Bnnz) free_if(0)) - {} - - MKL_INT Cnnz_host = -1; - MKL_INT* CI = NULL; - MKL_INT* CJ = NULL; - double* Cval = NULL; - MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr - MKL_INT ierr; - MKL_INT request = 2; //numeric multiplication - MKL_INT sort = 3; // sort everything - double time_mic_symbolic_mm = 0.0; - #pragma offload target(mic:micdev) in(i) in(ierr) in(nnzmax) in(request) in(sort) in(transa) in(Am) in(An) in(Bn) \ - out(time_mic_symbolic_mm) out(Cnnz_host)\ - in(Aval:length(Annz) alloc_if(0) free_if(0)) \ - in(AI:length(Am+1) alloc_if(0) free_if(0)) \ - in(AJ:length(Annz) alloc_if(0) free_if(0)) \ - in(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ - in(BI:length(An+1) alloc_if(0) free_if(0)) \ - in(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ - nocopy(CI: alloc_if(0) free_if(0)) \ - nocopy(CJ: alloc_if(0) free_if(0)) \ - nocopy(Cval: alloc_if(0) free_if(0)) - { - CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); - MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr - MKL_INT ierr; - MKL_INT request = 1; // symbolic multiplication - //MKL_INT sort = 7; // do not sort anything - - for(i = 0; i < 10; i++) { - mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } - double s_initial = dsecnd(); - for(i = 0; i < nrepeat; i++) { - mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } - time_mic_symbolic_mm = (dsecnd() - s_initial) / nrepeat; // seconds - - request = 2; //numeric multiplication - int Cnnz = CI[Am]-1; - int Cval_size = Cnnz + 1; Cnnz_host = Cnnz; - CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); - Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); - mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - printmm_one(Am, Cval, CJ, CI); - sort = 7; // do not sort anything - request = 2; // numeric multiplication - } - double time_mic_numeric_mm = 0.0; - #pragma offload target(mic:micdev) nocopy(request) nocopy(sort) nocopy(i) nocopy(ierr) nocopy(nnzmax) nocopy(transa) nocopy(Am) nocopy(An) nocopy(Bn) \ - out(time_mic_numeric_mm) \ - nocopy(Aval:length(Annz) alloc_if(0) free_if(0)) \ - nocopy(AI:length(Am+1) alloc_if(0) free_if(0)) \ - nocopy(AJ:length(Annz) alloc_if(0) free_if(0)) \ - nocopy(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ - nocopy(BI:length(An+1) alloc_if(0) free_if(0)) \ - nocopy(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ - nocopy(CI: alloc_if(0) free_if(0)) \ - nocopy(CJ: alloc_if(0) free_if(0)) \ - nocopy(Cval: alloc_if(0) free_if(0)) - { - for(i = 0; i < 10; i++) { - mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } - double s_initial = dsecnd(); - for(i = 0; i < nrepeat; i++) { - mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } - time_mic_numeric_mm = (dsecnd() - s_initial) / nrepeat; // seconds - } - *time = time_mic_numeric_mm; - if (1) { // Copy result matrix from MIC to Host - int nelm_CI = Am + 2; - int nelm_CJ = Cnnz_host + 1; - int nelm_Cval = Cnnz_host + 1; - __declspec(target(mic)) MKL_INT* CI_host = (MKL_INT*)mkl_malloc( (nelm_CI) * sizeof( MKL_INT ), 64 ); - __declspec(target(mic)) MKL_INT* CJ_host = (MKL_INT*)mkl_malloc( (nelm_CJ) * sizeof( MKL_INT ), 64 ); - __declspec(target(mic)) double* Cval_host = (double*)mkl_malloc( (nelm_Cval) * sizeof( double ), 64 ); - #pragma offload target(mic:micdev) in(nelm_CI)\ - inout(CI_host:length(nelm_CI) alloc_if(1) free_if(0)) \ - inout(CJ_host:length(nelm_CJ) alloc_if(1) free_if(0)) \ - inout(Cval_host:length(nelm_Cval) alloc_if(1) free_if(0)) \ - nocopy(CI: alloc_if(0) free_if(0)) \ - nocopy(CJ: alloc_if(0) free_if(0)) \ - nocopy(Cval: alloc_if(0) free_if(0)) - { - int i; - for(i = 0; i < nelm_CI; i++) CI_host[i] = CI[i]; - for(i = 0; i < nelm_CJ; i++) {CJ_host[i] = CJ[i]; Cval_host[i] = Cval[i];} - } - *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; - } -} /* ENDOF mkl_mic_spmm }}}*/ - -/** Read Matrix Market file into COO matrix */ -void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ -/* -* taken from Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -int i, M, N, nz, *I, *J; -double* val; -int ret_code; -MM_typecode matcode; -FILE* f; -if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} -/* READ MATRIX */ -if (mm_read_banner(f, &matcode) != 0) { - printf("Could not process Matrix Market banner.\n"); exit(1); -} -/* This is how one can screen matrix types if their application */ -/* only supports a subset of the Matrix Market data types. */ -if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { - printf("Sorry, this application does not support "); - printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); -} -/* find out size of sparse matrix .... */ -if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); -/* reseve memory for matrices */ -I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); -*pI = I; -*pJ = J; -*pval = val; -/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ -/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ -/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ -int realnnz = 0; -for (i=0; i 0) { - mkl_set_num_threads(nthreads); -} else { - nthreads = mkl_get_max_threads(); -} -micdev = atoi(argv[iarg]); iarg++; -option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; -assert(nrequired_args == iarg); - -/** read matrix market file for A matrix */ -MKL_INT Am, An, Annz; -MKL_INT *Ax, *Ay; -double *Anz; -read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - -/** construct csr storage for A matrix */ -MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); -MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); -double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); -coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - -/** read matrix market file for B matrix */ -MKL_INT Bm, Bn, Bnnz; -MKL_INT *Bx, *By; -double *Bnz; -read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); - -/** construct csr storage for B matrix */ -MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); -MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); -double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); -coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); - -/** multiply two matrices */ -double* Cval, time; -MKL_INT* CJ; MKL_INT* CI; -mkl_mic_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); - -int i; -for(i=0;i<=Am;i++)AI[i]--; for(i=0;i 0) { + mkl_set_num_threads(nthreads); + } else { + nthreads = mkl_get_max_threads(); } + micdev = atoi(argv[iarg]); iarg++; + option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; + assert(nrequired_args == iarg); + + /** read matrix market file for A matrix */ + MKL_INT Am, An, Annz; + MKL_INT *Ax, *Ay; + double *Anz; + read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); + + /** construct csr storage for A matrix */ + MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); + MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); + double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); + coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); + + /** read matrix market file for B matrix */ + MKL_INT Bm, Bn, Bnnz; + MKL_INT *Bx, *By; + double *Bnz; + read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); + + /** construct csr storage for B matrix */ + MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); + MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); + double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); + coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); + + /** multiply two matrices */ + double* Cval, time; + MKL_INT* CJ; MKL_INT* CI; + mkl_mic_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); + int i; - fprintf(f, "%%%%MatrixMarket matrix coordinate real general\n"); - fprintf(f, "%d %d %d\n", m, n, AI[m]-1); - for(i = 0; i < m; i++) { - int j; - for(j = AI[i]-1; j < AI[i+1]-1; j++) { - fprintf(f, "%d %d %g\n", i+1, AJ[j], Aval[j]); - } - } - fclose(f); -}//}}} + for(i=0;i<=Am;i++)AI[i]--; for(i=0;i -#include -#include -#include - -#include "mmio.h" - -int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, - double **val_, int **I_, int **J_) -{ - FILE *f; - MM_typecode matcode; - int M, N, nz; - int i; - double *val; - int *I, *J; - - if ((f = fopen(fname, "r")) == NULL) - return -1; - - - if (mm_read_banner(f, &matcode) != 0) - { - printf("mm_read_unsymetric: Could not process Matrix Market banner "); - printf(" in file [%s]\n", fname); - return -1; - } - - - - if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && - mm_is_sparse(matcode))) - { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", - mm_typecode_to_str(matcode)); - return -1; - } - - /* find out size of sparse matrix: M, N, nz .... */ - - if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) - { - fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); - return -1; - } - - *M_ = M; - *N_ = N; - *nz_ = nz; - - /* reseve memory for matrices */ - - I = (int *) malloc(nz * sizeof(int)); - J = (int *) malloc(nz * sizeof(int)); - val = (double *) malloc(nz * sizeof(double)); - - *val_ = val; - *I_ = I; - *J_ = J; - - /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ - /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ - /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - - for (i=0; i #include #include -#include "mmio.h" +#define MIC_TARGET +#include "../../common/mmio.h" +#include "../../common/util.h" /*}}}*/ /* global varibles and defines {{{*/ int nrepeat = 100; -int option_print_matrices = 0; -#define OPTION_NOPRINT_MATRICES 0 -#define OPTION_PRINT_MATRICES 1 char transa = 'n'; /*}}}*/ -/* method declarations {{{*/ -int main(int argc, char* argv[]); -void printmm_one(int m, double* Aval, int* AJ, int* AI); -void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); -void printmm_zero(int m, double* Aval, int* AJ, int* AI); -/*}}}*/ - /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ void mkl_cpu_spmv(const MKL_INT Am, const MKL_INT An, double* Aval, MKL_INT* AJ, MKL_INT* AI, double* xvec, double* yvec, double* time) { /*{{{*/ - -MKL_INT ierr; -MKL_INT request = 1; // symbolic multiplication -double alpha = 1.0; -double beta = 0.0; -char* matdescra = "G NF"; - -double time_st = dsecnd(); -int i; -for(i = 0; i < nrepeat; i++) { - /** y := alpha*A*x + beta*y */ - mkl_dcsrmv(&transa, &Am, &An, &alpha, matdescra, Aval, AJ, AI, (AI+1), xvec, &beta, yvec); -} -double time_end = dsecnd(); -*time = (time_end - time_st)/nrepeat; + MKL_INT ierr; + MKL_INT request = 1; // symbolic multiplication + double alpha = 1.0; + double beta = 0.0; + char* matdescra = "G NF"; + + double time_st = dsecnd(); + int i; + for(i = 0; i < nrepeat; i++) { + /** y := alpha*A*x + beta*y */ + mkl_dcsrmv(&transa, &Am, &An, &alpha, matdescra, Aval, AJ, AI, (AI+1), xvec, &beta, yvec); + } + double time_end = dsecnd(); + *time = (time_end - time_st)/nrepeat; } /*}}}*/ -/** Read Matrix Market file into COO matrix */ -void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ -/* -* taken from Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -int i, M, N, nz, *I, *J; -double* val; -int ret_code; -MM_typecode matcode; -FILE* f; -if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} -/* READ MATRIX */ -if (mm_read_banner(f, &matcode) != 0) { - printf("Could not process Matrix Market banner.\n"); exit(1); -} -/* This is how one can screen matrix types if their application */ -/* only supports a subset of the Matrix Market data types. */ -if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { - printf("Sorry, this application does not support "); - printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); -} -/* find out size of sparse matrix .... */ -if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); -/* reseve memory for matrices */ -I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); -val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); -*pI = I; -*pJ = J; -*pval = val; -/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ -/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ -/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ -int realnnz = 0; -for (i=0; i 0) { + mkl_set_num_threads(nthreads); + } else { + nthreads = mkl_get_max_threads(); } - realnnz++; -} -if (f !=stdin) fclose(f); -*pM = M; -*pN = N; -*prealnnz = realnnz; -} /* ENDOF read_mm }}}*/ - -/** Converts COO matrix to CSR matrix */ -void coo_to_csr(int m, int nnz, int* I, int* J, double* val, MKL_INT* AI, MKL_INT* AJ, double* Aval) { /*{{{*/ - -MKL_INT info = 0; -MKL_INT job[8]; -//job[1]=0; // zero based indexing in csr -job[1]=1; // one based indexing in csr - -job[2]=0; // zero based indexing in coo -job[3]=2; // I don't know -job[4]=nnz; // nnz - -job[0]=1; // coo to csr -job[5]=0; // Acsr and AJR allocated by user -//void mkl_dcsrcoo (MKL_INT * job, MKL_INT * n, double *Acsr, MKL_INT * AJR, MKL_INT *AIR, MKL_INT * nnz, double *Acoo, MKL_INT * ir, MKL_INT * jc, MKL_INT * info); -mkl_dcsrcoo (job,&m, Aval, AJ, AI, &nnz, val, I, J, &info); -} /* ENDOF coo_to_csr }}}*/ - -int main(int argc, char* argv[]) { /*{{{*/ -/** usage */ -int nrequired_args = 4; -if (argc != nrequired_args){ - fprintf(stderr, "NAME:\n\tmkl_spmv - multiply a sparse matrix with a dense vector\n"); - fprintf(stderr, "\nSYNOPSIS:\n"); - fprintf(stderr, "\tmkl_spmv MATRIX_A NUMBER_OF_THREADS PRINT_MATRIX\n"); - fprintf(stderr, "\nDESCRIPTION:\n"); - fprintf(stderr, "\tNUMBER_OF_THREADS: {0,1,2,...}\n"); - fprintf(stderr, "\t\t0: Use number of threads determined by MKL\n"); - fprintf(stderr, "\tPRINT_MATRIX: PRINT_YES, PRINT_NO\n"); - fprintf(stderr, "\nSAMPLE EXECUTION:\n"); - fprintf(stderr, "\t%s test.mtx 2 PRINT_YES\n", argv[0]); - exit(1); -} -/** parse arguments */ -int iarg = 1; -char* strpathA = argv[iarg]; iarg++; -int nthreads = atoi(argv[iarg]); iarg++; -if (nthreads > 0) { - mkl_set_num_threads(nthreads); -} else { - nthreads = mkl_get_max_threads(); -} -option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; -assert(nrequired_args == iarg); - -/** read matrix market file for A matrix */ -MKL_INT Am, An, Annz; -MKL_INT *Ax, *Ay; -double *Anz; -read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - -/** construct csr storage for A matrix */ -MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); -MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); -double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); -coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - -double* xvec = (double*) mkl_malloc( An * sizeof( double ), 64 ); -double* yvec = (double*) mkl_malloc( Am * sizeof( double ), 64 ); -int i; -for(i=0;i -#include -#include -#include - -#include "mmio.h" - -int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, - double **val_, int **I_, int **J_) -{ - FILE *f; - MM_typecode matcode; - int M, N, nz; - int i; - double *val; - int *I, *J; - - if ((f = fopen(fname, "r")) == NULL) - return -1; - - - if (mm_read_banner(f, &matcode) != 0) - { - printf("mm_read_unsymetric: Could not process Matrix Market banner "); - printf(" in file [%s]\n", fname); - return -1; - } - - - - if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && - mm_is_sparse(matcode))) - { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", - mm_typecode_to_str(matcode)); - return -1; - } - - /* find out size of sparse matrix: M, N, nz .... */ - - if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) - { - fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); - return -1; - } - - *M_ = M; - *N_ = N; - *nz_ = nz; - - /* reseve memory for matrices */ - - I = (int *) malloc(nz * sizeof(int)); - J = (int *) malloc(nz * sizeof(int)); - val = (double *) malloc(nz * sizeof(double)); - - *val_ = val; - *I_ = I; - *J_ = J; - - /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ - /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ - /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - - for (i=0; i