Skip to content
mklspgemm.c 14 KiB
Newer Older
/*includes {{{*/
#include <stdio.h>
#include <assert.h>
#include <math.h>
kadir-hpi7's avatar
a  
kadir-hpi7 committed
#include <mkl.h>
#include "mmio.h"
/*}}}*/
/* global varibles and defines {{{*/
kadir-hpi7's avatar
a  
kadir-hpi7 committed
int nrepeat = 100;
int option_print_matrices = 0;
#define OPTION_NOPRINT_MATRICES 0
#define OPTION_PRINT_MATRICES 1
kadir-hpi7's avatar
a  
kadir-hpi7 committed
char transa = 'n';
typedef int csi;
typedef double csv;
csv zero = 0.0;
kadir-hpi7's avatar
kadir-hpi7 committed
#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b))
kadir-hpi7's avatar
a  
kadir-hpi7 committed

typedef struct csr_t {
  csi  m; 
  csi  n; 
  csi  nzmax;
  csi  nr; 
  csi* r; 
  csi* p; 
  csi* j; 
  csv* x; 
} csr;
/* method declarations {{{*/
kadir-hpi7's avatar
a  
kadir-hpi7 committed
int  main(int argc, char* argv[]);
void printmm_one(int m, double* Aval, int* AJ, int* AI);
void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI);
void printmm_zero(int m, double* Aval, int* AJ, int* AI);
kadir-hpi7's avatar
kadir-hpi7 committed
csr *csr_spfree(csr *A);
/*csr util{{{*/

/* free workspace and return a sparse matrix result */
kadir-hpi7's avatar
kadir-hpi7 committed
csr *csr_done(csr *C, void *w, void *x, csi ok) {
        return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */
}

/* wrapper for free */
kadir-hpi7's avatar
kadir-hpi7 committed
void *cs_free(void *p) {
        if (p)
                free(p); /* free p if it is not already NULL */
        return(NULL); /* return NULL to simplify the use of cs_free */
}
/* wrapper for realloc */
kadir-hpi7's avatar
kadir-hpi7 committed
void *csr_realloc(void *p, csi n, size_t size, csi *ok) {
        void *pnew = NULL;
        pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */
        *ok = (pnew != NULL); /* realloc fails if pnew is NULL */
        if (pnew == NULL) {
                printf("%d:reallocation failed, pnew is NULL\n", __LINE__);
        }
//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok);
        return((*ok) ? pnew : p); /* return original p if failure */
}

/* wrapper for realloc */
kadir-hpi7's avatar
kadir-hpi7 committed
void *cs_realloc(void *p, csi n, size_t size, csi *ok) {
        void *pnew = NULL;
        pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */
        *ok = (pnew != NULL); /* realloc fails if pnew is NULL */
        if (pnew == NULL) {
                printf("reallocation failed\n");
        }
        return((*ok) ? pnew : p); /* return original p if failure */
}

/* change the max # of entries sparse matrix */
kadir-hpi7's avatar
kadir-hpi7 committed
csi csr_sprealloc(csr *A, csi nzmax) {
        csi ok, oki = 0, okj = 1, okx = 1;
        if (!A)
                return(0);
        if (nzmax <= 0)
                nzmax = A->p[A->m];
        A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki);
        if (A->x)
                A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx);
        ok = (oki && okj && okx);
        if (ok)
                A->nzmax = nzmax;
        return(ok);
}

/* free a sparse matrix */
kadir-hpi7's avatar
kadir-hpi7 committed
csr *csr_spfree(csr *A) {
        if (!A)
                return(NULL); /* do nothing if A already NULL */
        cs_free(A->p);
        A->p = NULL;
        cs_free(A->j);
        A->j = NULL;
        cs_free(A->x);
        A->x = NULL;
        cs_free(A->r);
        A->r = NULL;
        cs_free(A); /* free the cs struct and return NULL */
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    return NULL;
/* allocate a sparse matrix (triplet form or compressed-ROW form) */
kadir-hpi7's avatar
kadir-hpi7 committed
csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) {
        csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */
        if (!A) {
                perror("sparse allocation failed");
                return(NULL); /* out of memory */
        }
        A->m = m; /* define dimensions and nzmax */
        A->n = n;
        A->nzmax = nzmax = CS_MAX(nzmax, 0);
        A->nr = 0;  // number of nonzero rows
        A->p = (csi*)calloc(m + 2, sizeof(csi));
        A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi));
        A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv));
        return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A);
}/*}}}*/
kadir-hpi7's avatar
kadir-hpi7 committed

/** Multiply two sparse matrices which are stored in CSR format. MKL is used */
csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    csv tf = 0;
    csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n,
        bnz, values = 1;
    csv *Cx;
    csr *C;
    if (An != Bm)
        return(NULL);
    if (Anzmax == 0 || Bnzmax == 0) {
        C = csr_spalloc(Am, Bn, 0, values, 0, tf);
        return C;
    }
    m = Am;
    anz = Ap[Am];
    n = Bn;
    bnz = Bp[Bm];
    for(i = 0; i < n; i++) xb[i] = 0;
    for(i = 0; i < n; i++)
        xb[i] = 0;
    values = (Ax != NULL) && (Bx != NULL);
    csi tnz = (anz + bnz) * 2;
    C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */
    if (!C || !xb || (values && !x))
        return (csr_done(C, xb, x, 0));
    Cp = C->p;
    for (i = 0; i < m; i++) {
        if ( ( (nz + n) > C->nzmax ) ) {
            if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) {
                return (csr_done(C, xb, x, 0)); // out of memory
            } else {
            }
        }
        Cj = C->j;
        Cx = C->x; /* C->j and C->x may be reallocated */
        Cp[i] = nz; /* row i of C starts here */
        for (jp = Ap[i]; jp < Ap[i + 1]; jp++) {
            j = Aj[jp];
            for (kp = Bp[j]; kp < Bp[j + 1]; kp++) {
                k = Bj[kp]; /* B(i,j) is nonzero */
                if (xb[k] != i + 1) {
                    xb[k] = i + 1; /* i is new entry in column j */
                    Cj[nz++] = k; /* add i to pattern of C(:,j) */
                    if (x) {
                        x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */
                        (*nummult)++;
                    }
                } else if (x) {
                    x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */
                    (*nummult)++;
                }
            }
        }
        if (values)
            for (p = Cp[i]; p < nz; p++)
                Cx[p] = x[Cj[p]];
    }
    Cp[m] = nz; /* finalize the last row of C */
    csr_sprealloc(C, 0); /* remove extra space from C */
    xb = NULL;
    x = NULL;
kadir-hpi7's avatar
kadir-hpi7 committed
    return C;
}/*}}}*/
kadir-hpi7's avatar
kadir-hpi7 committed

/** Multiply two sparse matrices which are stored in CSR format. MKL is used */
void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) {  /*{{{*/

MKL_INT* CJ = NULL;
double* Cval = NULL;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
MKL_INT sort = 3;    // sort everything
MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 );
kadir-hpi7's avatar
a  
kadir-hpi7 committed
MKL_INT nnzmax = 0;    // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr
MKL_INT ierr;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
MKL_INT request = 1;    // symbolic multiplication
mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);

kadir-hpi7's avatar
a  
kadir-hpi7 committed
request = 2;        //numeric multiplication
int Cnnz = CI[Am]-1;
int Cval_size = Cnnz + 1;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
CJ = (MKL_INT*)mkl_malloc( Cval_size    *    sizeof( MKL_INT ), 64 );
Cval = (double*)mkl_malloc( Cval_size    *    sizeof( double ), 64 );
double time_st = dsecnd();
int i;
for(i = 0; i < nrepeat; i++) {
    mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
}
double time_end = dsecnd();
*time = (time_end - time_st)/nrepeat;
*pCval = Cval; *pCJ = CJ; *pCI = CI;
kadir-hpi7's avatar
kadir-hpi7 committed
} /*}}}*/ 

/** Read Matrix Market file into COO matrix */
kadir-hpi7's avatar
a  
kadir-hpi7 committed
void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/
kadir-hpi7's avatar
kadir-hpi7 committed
/* 
*   taken from Matrix Market I/O library for ANSI C
*
*   See http://math.nist.gov/MatrixMarket for details.
*
*
*/

int i, M, N, nz, *I, *J;
double* val;
int ret_code;
MM_typecode matcode;
FILE* f;
if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);}
/* READ MATRIX */
if (mm_read_banner(f, &matcode) != 0) {
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    printf("Could not process Matrix Market banner.\n");    exit(1);
}
/*  This is how one can screen matrix types if their application */
/*  only supports a subset of the Matrix Market data types.      */
if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) {
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    printf("Sorry, this application does not support ");
    printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1);
}
/* find out size of sparse matrix .... */
if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1);
/* reseve memory for matrices */
I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double));
*pI = I;
*pJ = J;
*pval = val;
/* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
/*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
int realnnz = 0;
for (i=0; i<nz; i++) {
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    if(mm_is_pattern(matcode)) {
        fscanf(f, "%d %d\n", &I[realnnz], &J[realnnz]);
        val[realnnz] = 1.0;
    }
    else
        fscanf(f, "%d %d %lg\n", &I[realnnz], &J[realnnz], &val[realnnz]);
    I[realnnz]--;  /* adjust from 1-based to 0-based */
        J[realnnz]--;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    if(mm_is_symmetric(matcode) && I[realnnz] != J[realnnz]) {
        I[realnnz+1] = J[realnnz];
        J[realnnz+1] = I[realnnz];
        val[realnnz+1] = val[realnnz];
        realnnz++;
    }
    realnnz++;
}
if (f !=stdin) fclose(f);
*pM = M;
*pN = N;
*prealnnz = realnnz;
} /* ENDOF read_mm }}}*/
kadir-hpi7's avatar
kadir-hpi7 committed

/** Converts COO matrix to CSR matrix */
kadir-hpi7's avatar
a  
kadir-hpi7 committed
void coo_to_csr(int m, int nnz, int* I, int* J, double* val, MKL_INT* AI, MKL_INT* AJ, double* Aval) { /*{{{*/

MKL_INT info = 0;
MKL_INT job[8];   
//job[1]=0; // zero based indexing in csr
job[1]=1; // one based indexing in csr

job[2]=0; // zero based indexing in coo
job[3]=2; // I don't know
job[4]=nnz; // nnz

job[0]=1;  // coo to csr
job[5]=0;  // Acsr and AJR allocated by user
//void mkl_dcsrcoo (MKL_INT * job, MKL_INT * n, double *Acsr, MKL_INT * AJR, MKL_INT *AIR, MKL_INT * nnz, double *Acoo, MKL_INT * ir, MKL_INT * jc, MKL_INT * info);
mkl_dcsrcoo (job,&m, Aval, AJ, AI, &nnz, val, I, J, &info);
} /* ENDOF coo_to_csr }}}*/
kadir-hpi7's avatar
a  
kadir-hpi7 committed
int main(int argc, char* argv[]) { /*{{{*/
/** usage */
int nrequired_args = 6;
if (argc != nrequired_args){
kadir-hpi7's avatar
a  
kadir-hpi7 committed
    fprintf(stderr, "NAME:\n\tmkl_spgemm - multiply two sparse matrices\n");
    fprintf(stderr, "\nSYNOPSIS:\n");
    fprintf(stderr, "\tln MATRIX_A MATRIX_B MATRIX_C NUMBER_OF_THREADS PRINT_MATRICES\n");
    fprintf(stderr, "\nDESCRIPTION:\n");
    fprintf(stderr, "\tNUMBER_OF_THREADS: {0,1,2,...}\n");
    fprintf(stderr, "\t\t0: Use number of threads determined by MKL\n");
    fprintf(stderr, "\tPRINT_MATRICES: PRINT_YES, PRINT_NO\n");
    exit(1);
kadir-hpi7's avatar
a  
kadir-hpi7 committed
/** parse arguments */
int iarg = 1;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
char* strpathA = argv[iarg];    iarg++;
char* strpathB = argv[iarg];    iarg++;
char* strpathC = argv[iarg];    iarg++;
int nthreads = atoi(argv[iarg]);    iarg++;
if (nthreads > 0) {
    mkl_set_num_threads(nthreads);
} else {
    nthreads = mkl_get_max_threads();
}
option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES;    iarg++;
assert(nrequired_args == iarg);
kadir-hpi7's avatar
a  
kadir-hpi7 committed

/** read matrix market file for A matrix */
MKL_INT Am, An, Annz;
kadir-hpi7's avatar
a  
kadir-hpi7 committed
MKL_INT *Ax, *Ay;  
double *Anz;
read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz);

kadir-hpi7's avatar
a  
kadir-hpi7 committed
/** construct csr storage for A matrix */
MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 );
MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 );
kadir-hpi7's avatar
a  
kadir-hpi7 committed
double* Aval = (double*) mkl_malloc( Annz * sizeof( double ),  64 );
coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);    
kadir-hpi7's avatar
a  
kadir-hpi7 committed
/** read matrix market file for B matrix */
MKL_INT Bm, Bn, Bnnz;   
kadir-hpi7's avatar
a  
kadir-hpi7 committed
MKL_INT *Bx, *By;  
double *Bnz;
read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz);

kadir-hpi7's avatar
a  
kadir-hpi7 committed
/** construct csr storage for B matrix */
MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 );
MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 );
kadir-hpi7's avatar
a  
kadir-hpi7 committed
double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ),  64 );
coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);    
kadir-hpi7's avatar
a  
kadir-hpi7 committed
/** multiply two matrices */
double* Cval, time; 
MKL_INT* CJ; MKL_INT* CI;
kadir-hpi7's avatar
kadir-hpi7 committed
mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time);
kadir-hpi7's avatar
a  
kadir-hpi7 committed
int i;
for(i=0;i<=Am;i++)AI[i]--;    for(i=0;i<Annz;i++)AJ[i]--;
for(i=0;i<=Bm;i++)BI[i]--;    for(i=0;i<Bnnz;i++)BJ[i]--;
printmm_one(Am, Cval, CJ, CI);
kadir-hpi7's avatar
a  
kadir-hpi7 committed

/** In order to write the output C matrix to file, uncomment the following line */
//printfilemm_one(strpathC, Am, Bn, Cval, CJ, CI);

/** run my SpGEMM routine in order to find number of multiply-and-add operations */
long nummult = 0;
csi* xb = (csi*)calloc(Bn, sizeof(csi));
csv* x = (csv*)calloc(Bn, sizeof(csv));
csr* C = csr_multiply(Am, An, Annz, AI, AJ, Aval, Bm, Bn, Bnnz, BI, BJ, Bval, &nummult, xb, x);
kadir-hpi7's avatar
a  
kadir-hpi7 committed
double gflop = 2 * (double) nummult / 1e9;

/** print gflop per second and time */
printf("%d\t", nthreads);
printf("%g\t", (gflop/time));
printf("%g\t", time);
printf("%s\t", strpathA);
printf("%s\t", strpathB);
printf("%s\t", strpathC);
printf("\n");

/** free allocated space */
mkl_free(AI);
mkl_free(AJ);
mkl_free(Aval);
mkl_free(BI);
mkl_free(BJ);
mkl_free(Bval);
return 0;
} /* ENDOF main }}}*/
kadir-hpi7's avatar
a  
kadir-hpi7 committed

/** Prints matrix in CSR format */
void printmm_one(int m, double* Aval, int* AJ, int* AI){ //{{{

    if (option_print_matrices == OPTION_NOPRINT_MATRICES)
        return;
    int i;
    for(i = 0; i < m; i++) {
        printf("%d: ", i+1);
        int j;
        for(j = AI[i]-1; j < AI[i+1]-1; j++) {
            printf("%d:%g  ", AJ[j], Aval[j]);
        }
        printf("\n");
    }
    printf("\n");
kadir-hpi7's avatar
a  
kadir-hpi7 committed

/** Writes matrix in CSR format in to a file using Matrix Market format */
void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI){//{{{

    FILE* f = fopen(file, "w");
    if(f == NULL){
        printf("%s %s %d: %s cannot be opened to write matrix\n", __FILE__, __PRETTY_FUNCTION__, __LINE__, file);
        exit(1); 
    }
    int i;
    fprintf(f, "%%%%MatrixMarket matrix coordinate real general\n");
    fprintf(f, "%d %d %d\n", m, n, AI[m]-1);
    for(i = 0; i < m; i++) {
        int j;
        for(j = AI[i]-1; j < AI[i+1]-1; j++) {
            fprintf(f, "%d %d %g\n", i+1, AJ[j], Aval[j]);
        }
    }
    fclose(f);
kadir-hpi7's avatar
a  
kadir-hpi7 committed