Commit ebe961ec authored by kadir-hpi7's avatar kadir-hpi7

initial version of MKL SpGEMM on CPU

parent fac684fa
make;./mklspmm ../mtx/cp2k-h2o-e6.mtx 30 100 null
CFLAGS:=-O0 -g3 #FIXME
#CFLAGS:=-O1
all: mklspmm
#all: mklspmv mklspmm
mklspmv: mklspmv.o mmio.o
icc mklspmv.o mmio.o -o $@ -mkl -lm -offload-option,mic,ld,"-rpath /opt/intel/mic/myo/lib/ -rpath /opt/intel/mic/coi/device-linux-release/lib/"
mklspmm: mklspmm.o mmio.o
icc mklspmm.o mmio.o -o $@ -mkl -lm -offload-option,mic,ld,"-rpath /opt/intel/mic/myo/lib/ -rpath /opt/intel/mic/coi/device-linux-release/lib/"
.c.o:
icc $(CFLAGS) $(OPTION) -o $@ -c $<
run:
./mv ../mtx/tinyA.mtx
clean:
@rm mmio.o mklspmv mklspmm mklspmm.o mklspmv.o
make;./mklspmm ../mtx/smallA.mtx ../mtx/smallA.mtx 30 100 -1 null null perm5.txt PRINTMATRICES
This diff is collapsed.
#ifndef __COMMONMM_H__
#define __COMMONMM_H_
#define FIELD_LENGTH 128
//char filename[FIELD_LENGTH];
enum spmv_target { use_cpu, use_mkl, use_mic, use_mkl_mic, use_mic_rowcyclic };
char *target_str[] = { "CPU", "MKL", "MIC", "MKL_MIC", "MIC_ROWCYCLIC" };
enum enum_datastructure { DS_ICSR, DS_CSR };
char *str_datastructure[] = { "ICSR", "CSR" };
char *str_algorithm[] = { "OUTER", "INNER" };
__declspec(target(mic)) int datastructure;
typedef int csi;
typedef double csv;
__declspec(target(mic)) csv zero = 0.0;
__declspec(target(mic)) int use_sparse_c;
__declspec(target(mic)) int size_dacc;
__declspec(target(mic)) csv* dacc;
typedef struct csr_t {
csi m;
csi n;
csi nzmax;
csi nr;
csi* r;
csi* p;
csi* j;
csv* x;
int numInMats; // number of input matrices stored in this struct
int numRows;
int nnonempty_rows;
int numCols;
int totalnnz;
// int totalnnzmax;
int* nItems; // nnz of each matrix
int* pirow;
int size_pirow;
int* pnr;
int* h_rowDelimiters;
int size_h_rowDelimiters;
int* h_cols;
csv* h_val;
char filename[FIELD_LENGTH];
} csr;
#endif
/*
* Matrix Market I/O example program
*
* Read a real (non-complex) sparse matrix from a Matrix Market (v. 2.0) file.
* and copies it to stdout. This porgram does nothing useful, but
* illustrates common usage of the Matrix Matrix I/O routines.
* (See http://math.nist.gov/MatrixMarket for details.)
*
* Usage: a.out [filename] > output
*
*
* NOTES:
*
* 1) Matrix Market files are always 1-based, i.e. the index of the first
* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE
* OFFSETS ACCORDINGLY offsets accordingly when reading and writing
* to files.
*
* 2) ANSI C requires one to use the "l" format modifier when reading
* double precision floating point numbers in scanf() and
* its variants. For example, use "%lf", "%lg", or "%le"
* when reading doubles, otherwise errors will occur.
*/
#include <stdio.h>
#include <stdlib.h>
#include "mmio.h"
int main(int argc, char *argv[])
{
int ret_code;
MM_typecode matcode;
FILE *f;
int M, N, nz;
int i, *I, *J;
double *val;
if (argc < 2)
{
fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]);
exit(1);
}
else
{
if ((f = fopen(argv[1], "r")) == NULL)
exit(1);
}
if (mm_read_banner(f, &matcode) != 0)
{
printf("Could not process Matrix Market banner.\n");
exit(1);
}
/* This is how one can screen matrix types if their application */
/* only supports a subset of the Matrix Market data types. */
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode) )
{
printf("Sorry, this application does not support ");
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
exit(1);
}
/* find out size of sparse matrix .... */
if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0)
exit(1);
/* reseve memory for matrices */
I = (int *) malloc(nz * sizeof(int));
J = (int *) malloc(nz * sizeof(int));
val = (double *) malloc(nz * sizeof(double));
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
for (i=0; i<nz; i++)
{
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
I[i]--; /* adjust from 1-based to 0-based */
J[i]--;
}
if (f !=stdin) fclose(f);
/************************/
/* now write out matrix */
/************************/
mm_write_banner(stdout, matcode);
mm_write_mtx_crd_size(stdout, M, N, nz);
for (i=0; i<nz; i++)
fprintf(stdout, "%d %d %20.19g\n", I[i]+1, J[i]+1, val[i]);
return 0;
}
/*
* Matrix Market I/O example program
*
* Create a small sparse, coordinate matrix and print it out
* in Matrix Market (v. 2.0) format to standard output.
*
* (See http://math.nist.gov/MatrixMarket for details.)
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "mmio.h"
#define nz 4
#define M 10
#define N 10
int main()
{
MM_typecode matcode;
int I[nz] = { 0, 4, 2, 8 };
int J[nz] = { 3, 8, 7, 5 };
double val[nz] = {1.1, 2.2, 3.2, 4.4};
int i;
mm_initialize_typecode(&matcode);
mm_set_matrix(&matcode);
mm_set_coordinate(&matcode);
mm_set_real(&matcode);
mm_write_banner(stdout, matcode);
mm_write_mtx_crd_size(stdout, M, N, nz);
/* NOTE: matrix market files use 1-based indices, i.e. first element
of a vector has index 1, not 0. */
for (i=0; i<nz; i++)
fprintf(stdout, "%d %d %10.3g\n", I[i]+1, J[i]+1, val[i]);
return 0;
}
/*******************************************************************************
! Copyright(C) 1999-2013 Intel Corporation. All Rights Reserved.
!
! The source code, information and material ("Material") contained herein is
! owned by Intel Corporation or its suppliers or licensors, and title to such
! Material remains with Intel Corporation or its suppliers or licensors. The
! Material contains proprietary information of Intel or its suppliers and
! licensors. The Material is protected by worldwide copyright laws and treaty
! provisions. No part of the Material may be used, copied, reproduced,
! modified, published, uploaded, posted, transmitted, distributed or disclosed
! in any way without Intel's prior express written permission. No license
! under any patent, copyright or other intellectual property rights in the
! Material is granted to or conferred upon you, either expressly, by
! implication, inducement, estoppel or otherwise. Any license under such
! intellectual property rights must be express and approved by Intel in
! writing.
!
! *Third Party trademarks are the property of their respective owners.
!
! Unless otherwise agreed by Intel in writing, you may not remove or alter
! this notice or any other notice embedded in Materials by Intel or Intel's
! suppliers or licensors in any way.
!
!*******************************************************************************
! Content:
! Intel(R) Math Kernel Library (MKL) examples interface
!******************************************************************************/
#ifndef _MKL_EXAMPLE_H_
#define _MKL_EXAMPLE_H_
#include "mkl_types.h"
#include "mkl_cblas.h"
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define COMMENTS ':'
#define MAX_STRING_LEN 512
#define MIN(a,b) (((a) > (b)) ? (b) : (a))
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define GENERAL_MATRIX 0
#define UPPER_MATRIX 1
#define LOWER_MATRIX -1
#define FULLPRINT 0
#define SHORTPRINT 1
#ifdef MKL_ILP64
#define INT_FORMAT "%lld"
#else
#define INT_FORMAT "%d"
#endif
/*
* ===========================================
* Prototypes for example program functions
* ===========================================
*/
int GetIntegerParameters(FILE*, ...);
int GetScalarsD(FILE*, ...);
int GetScalarsS(FILE*, ...);
int GetScalarsC(FILE*, ...);
int GetScalarsZ(FILE*, ...);
int GetCblasCharParameters(FILE *in_file, ...);
MKL_INT MaxValue(MKL_INT, MKL_INT*);
MKL_INT GetVectorI(FILE*, MKL_INT, MKL_INT*);
MKL_INT GetVectorS(FILE*, MKL_INT, float*, MKL_INT);
MKL_INT GetVectorC(FILE*, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetVectorD(FILE*, MKL_INT, double*, MKL_INT);
MKL_INT GetVectorZ(FILE*, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, float*, MKL_INT*);
MKL_INT GetArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, double*, MKL_INT*);
MKL_INT GetArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*);
MKL_INT GetArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*);
MKL_INT GetBandArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT);
MKL_INT GetBandArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT);
MKL_INT GetBandArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetBandArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetValuesI(FILE *, MKL_INT *, MKL_INT, MKL_INT);
MKL_INT GetValuesC(FILE *, MKL_Complex8*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesZ(FILE *, MKL_Complex16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesD(FILE *, double*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesS(FILE *, float*, MKL_INT, MKL_INT, MKL_INT);
void PrintParameters( char*, ... );
void PrintTrans(MKL_INT, CBLAS_TRANSPOSE*, CBLAS_TRANSPOSE*);
void PrintOrder(CBLAS_ORDER*);
void PrintVectorI(MKL_INT, MKL_INT*, char*);
void PrintVectorS(int, MKL_INT, float*, MKL_INT, char*);
void PrintVectorC(int, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintVectorD(int, MKL_INT, double*, MKL_INT, char*);
void PrintVectorZ(int, MKL_INT, MKL_Complex16*, MKL_INT, char*);
void PrintArrayS(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, float*, MKL_INT*, char*);
void PrintArrayD(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, double*, MKL_INT*, char*);
void PrintArrayC(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*, char*);
void PrintArrayZ(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*, char*);
void PrintBandArrayS(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT, char*);
void PrintBandArrayD(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT, char*);
void PrintBandArrayC(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintBandArrayZ(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT, char*);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _MKL_EXAMPLE_H_ */
This diff is collapsed.
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include "mkl.h"
#include "mkl_types.h"
#include "mkl_spblas.h"
#include "mmio.h"
// mkl_?csrgemv y := A*x one based
// void mkl_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y);
// mkl_cspblas_?csrgemv y := A*x zero based
// void mkl_cspblas_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y);
// mkl_?csrmv y := alpha*A*x + beta*y
// void mkl_dcsrmv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y);
// trans='n'
int main(int argc, char* argv[]) {
int ret_code;
MM_typecode matcode;
FILE *f;
int M, N, nz;
int i, *I, *J;
double *val;
if (argc != 5)
{
fprintf(stderr, "Usage: %s [martix-market-filename] [nworker] [num repeat] [null|row/col permutation file]\n", argv[0]);
exit(1);
}
else
{
if ((f = fopen(argv[1], "r")) == NULL)
exit(1);
}
int nworker = atoi(argv[2]);
int num_repeat = atoi(argv[3]);
char* fnrcperm = argv[4];
if (mm_read_banner(f, &matcode) != 0)
{
printf("Could not process Matrix Market banner.\n");
exit(1);
}
/* This is how one can screen matrix types if their application */
/* only supports a subset of the Matrix Market data types. */
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode) )
{
printf("Sorry, this application does not support ");
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
exit(1);
}
/* find out size of sparse matrix .... */
if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0)
exit(1);
/* reseve memory for matrices */
I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double));
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
int realnnz = 0;
for (i=0; i<nz; i++)
{
if(mm_is_pattern(matcode)) {
fscanf(f, "%d %d\n", &I[realnnz], &J[realnnz]);
val[realnnz] = 1.0;
}
else
fscanf(f, "%d %d %lg\n", &I[realnnz], &J[realnnz], &val[realnnz]);
I[realnnz]--; /* adjust from 1-based to 0-based */
J[realnnz]--;
if(mm_is_symmetric(matcode) && I[realnnz] != J[realnnz]) {
I[realnnz+1] = J[realnnz];
J[realnnz+1] = I[realnnz];
val[realnnz+1] = val[realnnz];
realnnz++;
}
realnnz++;
}
if (f !=stdin) fclose(f);
/************************/
/* now write out matrix */
/************************/
// mm_write_banner(stdout, matcode);
// mm_write_mtx_crd_size(stdout, M, N, nz);
//for (i=0; i<realnnz; i++) fprintf(stderr, "%d %d %20.19g\n", I[i]+1, J[i]+1, val[i]);
//int* rperm = NULL;
//int* cperm = NULL;
if(!strcasecmp(fnrcperm, "null")) {
} else {
int* rperm = (int*)calloc(M, sizeof(int));
int* cperm = (int*)calloc(N, sizeof(int));
FILE* fpPerm = fopen(fnrcperm, "r");
if(fpPerm == NULL) {
fprintf(stderr, "%s cannot be opened\n", fnrcperm);
exit(-3);
}
for(i = 0; i < M; i++){
fscanf(fpPerm, "%d\n", &rperm[i]);
}
for(i = 0; i < N; i++){
fscanf(fpPerm, "%d\n", &cperm[i]);
}
fclose(fpPerm);
for (i=0; i<realnnz; i++){
I[i] = rperm[I[i]];
J[i] = cperm[J[i]];
}
}
MKL_INT m = M, n = N, nnz = realnnz;
double alpha = 1.0, beta = 0.0;
char transa;
char matdescra[6];
double* vecin = (double*) mkl_malloc( n * sizeof( double ), 64 );
double* vecout = (double*) mkl_malloc( m * sizeof( double ), 64 );
double* Acsr = (double*) mkl_malloc( nnz * sizeof( double ), 64 );
MKL_INT* AJ = (MKL_INT*)mkl_malloc( nnz * sizeof( MKL_INT ), 64 );
MKL_INT* AI = (MKL_INT*)mkl_malloc( (n+1) * sizeof( MKL_INT ), 64 );
// coo to csr
MKL_INT info = 0;
MKL_INT job[8];
job[1]=0; // zero based indexing in csr
job[2]=0; // zero based indexing in coo
job[3]=2; // I don't know
job[4]=nnz; // nnz
job[0]=1; // coo to csr
job[5]=0; // Acsr and AJR allocated by user
//void mkl_dcsrcoo (MKL_INT * job, MKL_INT * n, double *Acsr, MKL_INT * AJR, MKL_INT *AIR, MKL_INT * nnz, double *Acoo, MKL_INT * ir, MKL_INT * jc, MKL_INT * info);
mkl_dcsrcoo (job,&m, Acsr, AJ, AI, &nnz, val, I, J, &info);
/* for(i = 0; i < m; i++) {
printf("%d: ", i+1);
int j;
for(j = AI[i]; j < AI[i+1]; j++) {
printf("%d:%g ", AJ[j]+1, Acsr[j]);
}
printf("\n");
}
*/
for(i = 0; i < n; i++) {
vecin[i] = 1.0;
}
for(i = 0; i < m; i++) {
vecout[i] = 0.0;
}
transa = 'n';
int max_threads = 0;
int micdev = 0;
#pragma offload target(mic:micdev) inout(max_threads)
{
max_threads = mkl_get_max_threads();
mkl_set_num_threads(nworker);
} printf("MIC started \n"); fflush(stdout);
#pragma offload target(mic:micdev) \
in(transa) \
in(m) \
in(Acsr:length(nnz) free_if(0)) \
in(AI:length(n+1) free_if(0)) \
in(AJ:length(nnz) free_if(0)) \
in(vecin:length(n) free_if(0)) \
in(vecout:length(m) free_if(0))
{}
#pragma offload target(mic:micdev) \
in(transa) \
in(m) \
in(Acsr:length(nnz) alloc_if(0) free_if(0)) \
in(AI:length(n+1) alloc_if(0) free_if(0)) \
in(AJ:length(nnz) alloc_if(0) free_if(0)) \
in(vecin:length(n) alloc_if(0) free_if(0)) \
in(vecout:length(m) alloc_if(0) free_if(0))
{
int i;
for(i = 0; i < 1; i++) {
mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecin, vecout);
}
}
#pragma offload target(mic:micdev) \
in(transa) \
in(m) \
nocopy(Acsr:length(nnz) alloc_if(0) free_if(0)) \
nocopy(AI:length(n+1) alloc_if(0) free_if(0)) \
nocopy(AJ:length(nnz) alloc_if(0) free_if(0)) \
nocopy(vecin:length(n) alloc_if(0) free_if(0)) \
nocopy(vecout:length(m) alloc_if(0) free_if(0))
{
int i;
for(i = 0; i < 1; i++) {
mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecin, vecout);
}
}
#pragma offload target(mic:micdev) \
nocopy(Acsr:length(nnz) alloc_if(0) free_if(1)) \
nocopy(AI:length(n+1) alloc_if(0) free_if(1)) \
nocopy(AJ:length(nnz) alloc_if(0) free_if(1)) \
nocopy(vecin:length(n) alloc_if(0) free_if(1)) \
out(vecout:length(m) alloc_if(0) free_if(1))
{}
double* vecincpu = (double*) mkl_malloc( n * sizeof( double ), 64 );
double* vecoutcpu = (double*) mkl_malloc( m * sizeof( double ), 64 );
for(i = 0; i < n; i++) {
vecincpu[i] = 1.0;
}
for(i = 0; i < m; i++) {
vecoutcpu[i] = 0.0;
}
double s_initial = dsecnd();
//mkl_cspblas_dcsrgemv(&transa, &m, values, rowIndex, columns, sol_vec, rhs_vec);
for(i = 0; i < 1; i++) {
mkl_cspblas_dcsrgemv(&transa, &m, Acsr, AI, AJ, vecincpu, vecoutcpu);
}
double s_elapsed = dsecnd() - s_initial; // seconds
double maxdiff = 0.00001;
for(i = 0; i < m; i++){
double diff = fabs(vecout[i] - vecoutcpu[i]);
if(diff>maxdiff){
printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]);