Skip to content
mklspmv.c 7.25 KiB
Newer Older
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include "mkl.h"
#include "mkl_types.h"
#include "mkl_spblas.h"

#include "mmio.h"

// mkl_?csrgemv          y := A*x   one based
// void mkl_dcsrgemv         (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y);

// mkl_cspblas_?csrgemv  y := A*x   zero based
// void mkl_cspblas_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y);


// mkl_?csrmv            y := alpha*A*x + beta*y
// void mkl_dcsrmv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y);




// trans='n'
int main(int argc, char* argv[]) {
    int ret_code;
    MM_typecode matcode;
    FILE *f;
    int M, N, nz;   
    int i, *I, *J;
    double *val;

    if (argc != 5)
	{
		fprintf(stderr, "Usage: %s [martix-market-filename] [nworker] [num repeat] [null|row/col permutation file]\n", argv[0]);
		exit(1);
	}
    else    
    { 
        if ((f = fopen(argv[1], "r")) == NULL) 
            exit(1);
    }

	int nworker = atoi(argv[2]);
	int num_repeat = atoi(argv[3]);
	char* fnrcperm = argv[4];

    if (mm_read_banner(f, &matcode) != 0)
    {
        printf("Could not process Matrix Market banner.\n");
        exit(1);
    }


    /*  This is how one can screen matrix types if their application */
    /*  only supports a subset of the Matrix Market data types.      */

    if (mm_is_complex(matcode) && mm_is_matrix(matcode) && 
            mm_is_sparse(matcode) )
    {
        printf("Sorry, this application does not support ");
        printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
        exit(1);
    }

    /* find out size of sparse matrix .... */

    if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0)
        exit(1);


    /* reseve memory for matrices */

    I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
    J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
    val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double));


    /* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
    /*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
    /*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
	int realnnz = 0;
	
    for (i=0; i<nz; i++)
    {
		if(mm_is_pattern(matcode)) {
			fscanf(f, "%d %d\n", &I[realnnz], &J[realnnz]);
			val[realnnz] = 1.0;
		}
		else
			fscanf(f, "%d %d %lg\n", &I[realnnz], &J[realnnz], &val[realnnz]);
		I[realnnz]--;  /* adjust from 1-based to 0-based */
        J[realnnz]--;
		if(mm_is_symmetric(matcode) && I[realnnz] != J[realnnz]) {
			I[realnnz+1] = J[realnnz];
			J[realnnz+1] = I[realnnz];
			val[realnnz+1] = val[realnnz];
			realnnz++;
		}
		realnnz++;
    }
	
    if (f !=stdin) fclose(f);

    /************************/
    /* now write out matrix */
    /************************/

   // mm_write_banner(stdout, matcode);
   // mm_write_mtx_crd_size(stdout, M, N, nz);
	//for (i=0; i<realnnz; i++)	fprintf(stderr, "%d %d %20.19g\n", I[i]+1, J[i]+1, val[i]);

	//int* rperm = NULL;
	//int* cperm = NULL;

	if(!strcasecmp(fnrcperm, "null")) {
	} else {
		int* rperm = (int*)calloc(M, sizeof(int));
		int* cperm = (int*)calloc(N, sizeof(int));
		FILE* fpPerm = fopen(fnrcperm, "r");
		if(fpPerm == NULL) {
			fprintf(stderr, "%s cannot be opened\n", fnrcperm);
        		exit(-3);
		}
		for(i = 0; i < M; i++){
			fscanf(fpPerm, "%d\n", &rperm[i]);
		}
		for(i = 0; i < N; i++){
			fscanf(fpPerm, "%d\n", &cperm[i]);
		}
		fclose(fpPerm);
		for (i=0; i<realnnz; i++){
			I[i] = rperm[I[i]];
			J[i] = cperm[J[i]];
		}
	}
	
	
	MKL_INT		m = M, n = N, nnz = realnnz;
	double		alpha = 1.0, beta = 0.0;
	char		transa;
	char		matdescra[6];

	double* vecin	= (double*)	mkl_malloc( n	*	sizeof( double ),  64 );
	double* vecout 	= (double*)	mkl_malloc( m	*	sizeof( double ),  64 );
	double* Acsr	= (double*)	mkl_malloc( nnz	*	sizeof( double ),  64 );
	MKL_INT* AJ		= (MKL_INT*)mkl_malloc( nnz	*	sizeof( MKL_INT ), 64 );
	MKL_INT* AI		= (MKL_INT*)mkl_malloc( (n+1)	*	sizeof( MKL_INT ), 64 );

	// coo to csr
	MKL_INT info = 0;
	MKL_INT job[8];
      
	job[1]=0; // zero based indexing in csr
	job[2]=0; // zero based indexing in coo
	job[3]=2; // I don't know
	job[4]=nnz; // nnz

	job[0]=1;  // coo to csr
	job[5]=0;  // Acsr and AJR allocated by user
//void mkl_dcsrcoo (MKL_INT * job, MKL_INT * n, double *Acsr, MKL_INT * AJR, MKL_INT *AIR, MKL_INT * nnz, double *Acoo, MKL_INT * ir, MKL_INT * jc, MKL_INT * info);
	mkl_dcsrcoo (job,&m, Acsr, AJ, AI, &nnz, val, I, J, &info);

/*	for(i = 0; i < m; i++) {
		printf("%d: ", i+1);
		int j;
		for(j = AI[i]; j < AI[i+1]; j++) {
			printf("%d:%g  ", AJ[j]+1, Acsr[j]);
		}
		printf("\n");
	}
*/
	for(i = 0; i < n; i++) {
		vecin[i] = 1.0; 
	}
	for(i = 0; i < m; i++) {
		vecout[i] = 0.0;
	}

	transa = 'n';
	int max_threads = 0;
	int micdev = 0;
	#pragma offload target(mic:micdev) inout(max_threads)
	{
		max_threads = mkl_get_max_threads();
		mkl_set_num_threads(nworker);
	}   printf("MIC started \n"); fflush(stdout);
	#pragma offload target(mic:micdev) \
		in(transa) \
		in(m) \
		in(Acsr:length(nnz)	free_if(0)) \
		in(AI:length(n+1)	free_if(0)) \
		in(AJ:length(nnz)	free_if(0)) \
		in(vecin:length(n)	free_if(0)) \
		in(vecout:length(m)	free_if(0))
	{}
	#pragma offload target(mic:micdev) \
		in(transa) \
		in(m) \
		in(Acsr:length(nnz)	alloc_if(0)	free_if(0)) \
		in(AI:length(n+1)	alloc_if(0)	free_if(0)) \
		in(AJ:length(nnz)	alloc_if(0)	free_if(0)) \
		in(vecin:length(n)	alloc_if(0)	free_if(0)) \
		in(vecout:length(m)	alloc_if(0)	free_if(0))
	{
		int i;
		for(i = 0; i < 1; i++) {
			mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecin, vecout);
		}
	}

	#pragma offload target(mic:micdev) \
		in(transa) \
		in(m) \
		nocopy(Acsr:length(nnz)	alloc_if(0)	free_if(0)) \
		nocopy(AI:length(n+1)	alloc_if(0)	free_if(0)) \
		nocopy(AJ:length(nnz)	alloc_if(0)	free_if(0)) \
		nocopy(vecin:length(n)	alloc_if(0)	free_if(0)) \
		nocopy(vecout:length(m)	alloc_if(0)	free_if(0))
	{
		int i;
		for(i = 0; i < 1; i++) {
			mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecin, vecout);
		}
	}
	#pragma offload target(mic:micdev) \
		nocopy(Acsr:length(nnz)	alloc_if(0)	free_if(1)) \
		nocopy(AI:length(n+1)	alloc_if(0)	free_if(1)) \
		nocopy(AJ:length(nnz)	alloc_if(0)	free_if(1)) \
		nocopy(vecin:length(n)	alloc_if(0)	free_if(1)) \
		out(vecout:length(m)	alloc_if(0)	free_if(1))
	{}
	double* vecincpu	= (double*)	mkl_malloc( n	*	sizeof( double ),  64 );
	double* vecoutcpu 	= (double*)	mkl_malloc( m	*	sizeof( double ),  64 );
	for(i = 0; i < n; i++) {
		vecincpu[i] = 1.0; 
	}
	for(i = 0; i < m; i++) {
		vecoutcpu[i] = 0.0;
	}
	double s_initial = dsecnd();
	//mkl_cspblas_dcsrgemv(&transa, &m, values, rowIndex, columns, sol_vec, rhs_vec);
	for(i = 0; i < 1; i++) {
		mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecincpu, vecoutcpu);
	}
	double s_elapsed = dsecnd() - s_initial; // seconds

	double maxdiff = 0.00001;
	for(i = 0; i < m; i++){
		double diff = fabs(vecout[i] - vecoutcpu[i]);
		if(diff>maxdiff){
			printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]);
		}
	}
	printf("\n");
	printf("%s %d %d %g %d %s\n", argv[1], nworker, max_threads, s_elapsed, num_repeat, fnrcperm);
//	mkl_free(A);

	return 0;

}