diff --git a/spgemm/mkl_shmem/COMMAND.HISTORY.txt b/spgemm/mkl_shmem/COMMAND.HISTORY.txt new file mode 100644 index 0000000000000000000000000000000000000000..854b6396bc78ab6005f5f6820247abf62580a144 --- /dev/null +++ b/spgemm/mkl_shmem/COMMAND.HISTORY.txt @@ -0,0 +1 @@ +make;./mklspmm ../mtx/cp2k-h2o-e6.mtx 30 100 null diff --git a/spgemm/mkl_shmem/Makefile b/spgemm/mkl_shmem/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..edbcab876cb0e15df2cf2ee04e7eb6effd50deb2 --- /dev/null +++ b/spgemm/mkl_shmem/Makefile @@ -0,0 +1,21 @@ +CFLAGS:=-O0 -g3 #FIXME +#CFLAGS:=-O1 + +all: mklspmm + +#all: mklspmv mklspmm + +mklspmv: mklspmv.o mmio.o + icc mklspmv.o mmio.o -o $@ -mkl -lm -offload-option,mic,ld,"-rpath /opt/intel/mic/myo/lib/ -rpath /opt/intel/mic/coi/device-linux-release/lib/" + +mklspmm: mklspmm.o mmio.o + icc mklspmm.o mmio.o -o $@ -mkl -lm -offload-option,mic,ld,"-rpath /opt/intel/mic/myo/lib/ -rpath /opt/intel/mic/coi/device-linux-release/lib/" + +.c.o: + icc $(CFLAGS) $(OPTION) -o $@ -c $< + +run: + ./mv ../mtx/tinyA.mtx + +clean: + @rm mmio.o mklspmv mklspmm mklspmm.o mklspmv.o diff --git a/spgemm/mkl_shmem/QUICKSTART.txt b/spgemm/mkl_shmem/QUICKSTART.txt new file mode 100644 index 0000000000000000000000000000000000000000..08275d166f9570da8c42b6d81d272a107ac4f8a6 --- /dev/null +++ b/spgemm/mkl_shmem/QUICKSTART.txt @@ -0,0 +1 @@ +make;./mklspmm ../mtx/smallA.mtx ../mtx/smallA.mtx 30 100 -1 null null perm5.txt PRINTMATRICES diff --git a/spgemm/mkl_shmem/common_func.c b/spgemm/mkl_shmem/common_func.c new file mode 100644 index 0000000000000000000000000000000000000000..3f05d31b42f90382248d8537916eadc0fa6731bf --- /dev/null +++ b/spgemm/mkl_shmem/common_func.c @@ -0,0 +1,1570 @@ +/******************************************************************************* +! Copyright(C) 1999-2013 Intel Corporation. All Rights Reserved. +! +! The source code, information and material ("Material") contained herein is +! owned by Intel Corporation or its suppliers or licensors, and title to such +! Material remains with Intel Corporation or its suppliers or licensors. The +! Material contains proprietary information of Intel or its suppliers and +! licensors. The Material is protected by worldwide copyright laws and treaty +! provisions. No part of the Material may be used, copied, reproduced, +! modified, published, uploaded, posted, transmitted, distributed or disclosed +! in any way without Intel's prior express written permission. No license +! under any patent, copyright or other intellectual property rights in the +! Material is granted to or conferred upon you, either expressly, by +! implication, inducement, estoppel or otherwise. Any license under such +! intellectual property rights must be express and approved by Intel in +! writing. +! +! *Third Party trademarks are the property of their respective owners. +! +! Unless otherwise agreed by Intel in writing, you may not remove or alter +! this notice or any other notice embedded in Materials by Intel or Intel's +! suppliers or licensors in any way. +! +!******************************************************************************* +! Content: +! +!******************************************************************************/ + +#include +#include +#include +#include +#include + +#include "mkl_example.h" +#include "mkl_cblas.h" + +MKL_INT MaxValue(MKL_INT n, MKL_INT *x) +{ + MKL_INT i, indmax; + + indmax = x[0]; + for (i = 1; i < n; i++) + if (indmax < x[i]) indmax = x[i]; + return indmax; +} /* MaxValue */ + +MKL_INT GetVectorI(FILE *in_file, MKL_INT n, MKL_INT *x) +{ + return GetValuesI( in_file, x, 0, n ); +} /* GetVectorI */ + +MKL_INT GetVectorS(FILE *in_file, MKL_INT n, float *x, MKL_INT incx) +{ + return GetValuesS( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorS */ + +MKL_INT GetVectorD(FILE *in_file, MKL_INT n, double *x, MKL_INT incx) +{ + return GetValuesD( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorD */ + +MKL_INT GetVectorC(FILE *in_file, MKL_INT n, MKL_Complex8 *x, MKL_INT incx) +{ + return GetValuesC( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorC */ + +MKL_INT GetVectorZ(FILE *in_file, MKL_INT n, MKL_Complex16 *x, MKL_INT incx) +{ + return GetValuesZ( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorZ */ + +MKL_INT GetArrayS(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + float *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + float *addr; + + if( *order == CblasRowMajor ) { + if( flag == GENERAL_MATRIX ) { + for( i = 0; i < (*m); i++ ) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if( flag == UPPER_MATRIX ) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if( flag == LOWER_MATRIX ) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if( *order == CblasColMajor ) { + if( flag == GENERAL_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if( flag == UPPER_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if( flag == LOWER_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayS */ + +MKL_INT GetArrayD(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + double *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + double *addr; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayD */ + +MKL_INT GetArrayC(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + MKL_Complex8 *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + MKL_Complex8 *addr; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayC */ + +MKL_INT GetArrayZ(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, + MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + MKL_Complex16 *addr; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if(*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayZ */ + +MKL_INT GetBandArrayS(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, float *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + float *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesS( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + number = GetValuesS( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + number = GetValuesS( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } + return 0; +} /* GetBandArrayS */ + +MKL_INT GetBandArrayD(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, double *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows, number; + double *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesD( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesD( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesD( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayD */ + +MKL_INT GetBandArrayC(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, MKL_Complex8 *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + MKL_Complex8 *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesC( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesC( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesC( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayC */ + +MKL_INT GetBandArrayZ(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, MKL_Complex16 *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + MKL_Complex16 *addr, *addr1; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesZ( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesZ( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesZ( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayZ */ + +MKL_INT GetValuesI( FILE *in_file, MKL_INT *in_array, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + int value; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%d", &value) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i]=(MKL_INT)value; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} + +MKL_INT GetValuesD( FILE *in_file, double *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + double temp; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%lf", &temp) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld]=temp; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} /* GetValuesD */ + +MKL_INT GetValuesS( FILE *in_file, float *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + float temp; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%f", &temp) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld]=temp; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} /* GetValuesS */ + +MKL_INT GetValuesC( FILE *in_file, MKL_Complex8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + float re, im; + char seps[]=" ,()\t"; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps)) == NULL || + sscanf( str, "%f", &im) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i].real=re; + in_array[begin+i].imag=im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + } + return counter; +} /* GetValuesC */ + +MKL_INT GetValuesZ( FILE *in_file, MKL_Complex16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + double re, im; + char seps[]=" ,()\t"; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps))== NULL || + sscanf( str, "%lf", &im) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld].real=re; + in_array[begin+i*ld].imag=im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + } + return counter; +} /* GetValuesZ */ + +int GetIntegerParameters(FILE *in_file, ...) +{ + int counter=0; + char buf[MAX_STRING_LEN], *str; + MKL_INT *param; + va_list ap; + int value; + + do { + fgets(buf, MAX_STRING_LEN, in_file); + str = strtok( buf, " " ); + if( str == NULL ){ + printf("\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start(ap, in_file); + param = va_arg(ap, MKL_INT *); + + while ( 1 ) { + if( *str == COMMENTS ) break; + if( sscanf(str, "%d", &value ) != 1 ){ + printf("\n File format is inappropriate\n"); + return 0; + } + *param = (MKL_INT)value; + counter++; + if( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, MKL_INT * ); + } + va_end( ap ); + return counter; +} /* GetIntegerParameters */ + +int GetCblasCharParameters(FILE *in_file, ...) +{ + int counter=0; + char buf[MAX_STRING_LEN], *str; + int *param; + va_list ap; + + do { + fgets(buf, MAX_STRING_LEN, in_file); + str = strtok( buf, " " ); + if( str == NULL ){ + printf("\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start(ap, in_file); + param = va_arg(ap, int *); + + while ( 1 ) { + if( *str == COMMENTS ) break; + if( sscanf(str, "%d", param ) != 1 ){ + printf("\n File format is inappropriate\n"); + return 0; + } + counter++; + if( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, int * ); + } + va_end( ap ); + return counter; +} /* GetCblasCharParameters */ + +int GetScalarsD( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + double *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, double * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%lf", param ) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, double * ); + } + va_end( ap ); + return counter; +} /* GetScalarsD */ + +int GetScalarsS( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + float *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, float * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%f", param ) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, float * ); + } + va_end( ap ); + return counter; +} /* GetScalarsS */ + +int GetScalarsC( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + char seps[]=" ,()\t"; + float re, im; + MKL_Complex8 *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, MKL_Complex8 * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps))== NULL || + sscanf( str, "%f", &im) != 1 ){ + printf( "\n File format is inappropriate %f %f\n", re, im ); + return 0; + } + param->real = re; + param->imag = im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + param = va_arg( ap, MKL_Complex8 * ); + } + va_end( ap ); + return counter; +} /* GetScalarsC */ + +int GetScalarsZ( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + char seps[]=" ,()\t"; + double re, im; + MKL_Complex16 *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, MKL_Complex16 * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps)) == NULL || + sscanf( str, "%lf", &im) != 1 ){ + printf( "\n File format is inappropriate %f %f\n", re, im ); + return 0; + } + param->real = re; + param->imag = im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + param = va_arg( ap, MKL_Complex16 * ); + } + va_end( ap ); + return counter; +} /* GetScalarsZ */ + +void PrintParameters( char *names, ... ) +{ + CBLAS_ORDER *order; + CBLAS_SIDE *side; + CBLAS_UPLO *uplo; + CBLAS_DIAG *diag; + CBLAS_TRANSPOSE *trans; + char *p, *str, str1[MAX_STRING_LEN], buf[MAX_STRING_LEN]; + va_list ap; + char seps[]=" ,"; + MKL_INT i; + + printf("\n "); + va_start( ap, names ); + strcpy(buf, names); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n You must determine the parameters names\n"); + return; + } + do { + strcpy(str1, str); + p = str1; + for( i = 0; i < strlen(str1); i++ ) { + *p = tolower(str1[i]); + p++; + } + if ( strcmp( str1, "order" ) == 0 ) { + order = va_arg( ap, CBLAS_ORDER * ); + if ( (int)order == CblasRowMajor ) + printf("ORDER = CblasRowMajor "); + else if ( (int)order == CblasColMajor ) + printf("ORDER = CblasColMajor "); + } else if ( strcmp( str1, "side" ) == 0 ) { + side = va_arg( ap, CBLAS_SIDE * ); + if ( (int)side == CblasLeft ) + printf("SIDE = CblasLeft "); + else if ( (int)side == CblasRight ) + printf("SIDE = CblasRight "); + } else if ( strcmp( str1, "uplo" ) == 0 ) { + uplo = va_arg( ap, CBLAS_UPLO * ); + if ( (int)uplo == CblasUpper ) + printf("UPLO = CblasUpper "); + else if ( (int)uplo == CblasLower ) + printf("UPLO = CblasLower "); + } else if ( strcmp( str1, "diag" ) == 0 ) { + diag = va_arg( ap, CBLAS_DIAG * ); + if ( (int)diag == CblasUnit ) + printf("DIAG=CblasUnit "); + else if ( (int)diag == CblasNonUnit ) + printf("DIAG = CblasNonUnit "); + } else if ( ( strcmp( str1, "trans" ) == 0 ) || + ( strcmp( str1, "transa" ) == 0 ) || + ( strcmp( str1, "transb" ) == 0 ) ) { + trans = va_arg( ap, CBLAS_TRANSPOSE * ); + if ( (int)trans == CblasNoTrans ) + printf("%s = CblasNoTrans ", str); + else if ( (int)trans == CblasTrans ) + printf("%s = CblasTrans ", str); + else if ( (int)trans == CblasConjTrans ) + printf("%s = CblasConjTrans ", str); + } + } while ( (str = strtok( NULL, seps )) != NULL ); + va_end( ap ); + return; +} /* PrintParameters */ + +void PrintVectorI(MKL_INT n, MKL_INT *x, char *name) +{ + MKL_INT i; + + printf("\n VECTOR %s\n ", name); + for (i = 0; i < n; i++) + printf(" "INT_FORMAT, *(x+i)); + return; +} /* PrintVectorI */ + +void PrintVectorS(int flag, MKL_INT n, float *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("%6.2f ", *(x+i)); + return; +} /* PrintVectorS */ + +void PrintVectorD(int flag, MKL_INT n, double *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("%8.3f ", *(x+i)); + return; +} /* PrintVectorD */ + +void PrintVectorC(int flag, MKL_INT n, MKL_Complex8 *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag); + return; +} /* PrintVectorC */ + +void PrintVectorZ(int flag, MKL_INT n, MKL_Complex16 *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag); + return; +} /* PrintVectorZ */ + +void PrintArrayS(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, float *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + float *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayS */ + +void PrintArrayD(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, double *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + double *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, + name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayD */ + +void PrintArrayC(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_Complex8 *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + MKL_Complex8 *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, + name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } /* if */ + } else if(*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayC */ + +void PrintArrayZ(CBLAS_ORDER *order, int flag1, int flag2, + MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda, char *name) +{ + MKL_INT i, j; + MKL_Complex16 *addr; + + if (flag1 == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda); + else + printf("\n ARRAY %s", name); + + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayZ */ + +void PrintBandArrayS(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + float *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + float *addr, *addr1; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("%6.2f ", *(addr1+j+l*lda) ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + } + } /* if */ + return; +} /* PrintBandArrayS */ + +void PrintBandArrayD(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + double *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + double *addr, *addr1; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("%6.2f ", *(addr1+j+l*lda) ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + } + } /* if */ + return; +} /* PrintBandArrayD */ + +void PrintBandArrayC(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + MKL_Complex8 *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + MKL_Complex8 *addr, *addr1; + MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+j+l*lda)->real, (addr1+j+l*lda)->imag ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + } + } /* if */ + return; +} /* PrintBandArrayC */ + +void PrintBandArrayZ(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + MKL_Complex16 *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + MKL_Complex16 *addr, *addr1; + MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+j+l*lda)->real, (addr1+j+l*lda)->imag ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + } + } /* if */ + return; +} /* PrintBandArrayZ */ + diff --git a/spgemm/mkl_shmem/commonmm.h b/spgemm/mkl_shmem/commonmm.h new file mode 100644 index 0000000000000000000000000000000000000000..7ccafda803dcfe24b76a78c37a7d27fdae3a2013 --- /dev/null +++ b/spgemm/mkl_shmem/commonmm.h @@ -0,0 +1,53 @@ +#ifndef __COMMONMM_H__ +#define __COMMONMM_H_ + +#define FIELD_LENGTH 128 +//char filename[FIELD_LENGTH]; + +enum spmv_target { use_cpu, use_mkl, use_mic, use_mkl_mic, use_mic_rowcyclic }; +char *target_str[] = { "CPU", "MKL", "MIC", "MKL_MIC", "MIC_ROWCYCLIC" }; +enum enum_datastructure { DS_ICSR, DS_CSR }; +char *str_datastructure[] = { "ICSR", "CSR" }; +char *str_algorithm[] = { "OUTER", "INNER" }; + +__declspec(target(mic)) int datastructure; + +typedef int csi; +typedef double csv; +__declspec(target(mic)) csv zero = 0.0; + +__declspec(target(mic)) int use_sparse_c; +__declspec(target(mic)) int size_dacc; +__declspec(target(mic)) csv* dacc; + + + + +typedef struct csr_t { + csi m; + csi n; + csi nzmax; + csi nr; + csi* r; + csi* p; + csi* j; + csv* x; + int numInMats; // number of input matrices stored in this struct + int numRows; + int nnonempty_rows; + int numCols; + int totalnnz; + // int totalnnzmax; + int* nItems; // nnz of each matrix + int* pirow; + int size_pirow; + int* pnr; + int* h_rowDelimiters; + int size_h_rowDelimiters; + int* h_cols; + csv* h_val; + char filename[FIELD_LENGTH]; +} csr; + + +#endif diff --git a/spgemm/mkl_shmem/example_read.c b/spgemm/mkl_shmem/example_read.c new file mode 100644 index 0000000000000000000000000000000000000000..af0f6e9c08b47a3e7b3b5763f547b8522415356c --- /dev/null +++ b/spgemm/mkl_shmem/example_read.c @@ -0,0 +1,104 @@ +/* +* Matrix Market I/O example program +* +* Read a real (non-complex) sparse matrix from a Matrix Market (v. 2.0) file. +* and copies it to stdout. This porgram does nothing useful, but +* illustrates common usage of the Matrix Matrix I/O routines. +* (See http://math.nist.gov/MatrixMarket for details.) +* +* Usage: a.out [filename] > output +* +* +* NOTES: +* +* 1) Matrix Market files are always 1-based, i.e. the index of the first +* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE +* OFFSETS ACCORDINGLY offsets accordingly when reading and writing +* to files. +* +* 2) ANSI C requires one to use the "l" format modifier when reading +* double precision floating point numbers in scanf() and +* its variants. For example, use "%lf", "%lg", or "%le" +* when reading doubles, otherwise errors will occur. +*/ + +#include +#include +#include "mmio.h" + +int main(int argc, char *argv[]) +{ + int ret_code; + MM_typecode matcode; + FILE *f; + int M, N, nz; + int i, *I, *J; + double *val; + + if (argc < 2) + { + fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]); + exit(1); + } + else + { + if ((f = fopen(argv[1], "r")) == NULL) + exit(1); + } + + if (mm_read_banner(f, &matcode) != 0) + { + printf("Could not process Matrix Market banner.\n"); + exit(1); + } + + + /* This is how one can screen matrix types if their application */ + /* only supports a subset of the Matrix Market data types. */ + + if (mm_is_complex(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode) ) + { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + + /* find out size of sparse matrix .... */ + + if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) + exit(1); + + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i +#include +#include "mmio.h" + +#define nz 4 +#define M 10 +#define N 10 + +int main() +{ + MM_typecode matcode; + int I[nz] = { 0, 4, 2, 8 }; + int J[nz] = { 3, 8, 7, 5 }; + double val[nz] = {1.1, 2.2, 3.2, 4.4}; + int i; + + mm_initialize_typecode(&matcode); + mm_set_matrix(&matcode); + mm_set_coordinate(&matcode); + mm_set_real(&matcode); + + mm_write_banner(stdout, matcode); + mm_write_mtx_crd_size(stdout, M, N, nz); + + /* NOTE: matrix market files use 1-based indices, i.e. first element + of a vector has index 1, not 0. */ + + for (i=0; i + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#define COMMENTS ':' +#define MAX_STRING_LEN 512 + +#define MIN(a,b) (((a) > (b)) ? (b) : (a)) +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) + + +#define GENERAL_MATRIX 0 +#define UPPER_MATRIX 1 +#define LOWER_MATRIX -1 + +#define FULLPRINT 0 +#define SHORTPRINT 1 + +#ifdef MKL_ILP64 + #define INT_FORMAT "%lld" +#else + #define INT_FORMAT "%d" +#endif + +/* + * =========================================== + * Prototypes for example program functions + * =========================================== + */ + +int GetIntegerParameters(FILE*, ...); +int GetScalarsD(FILE*, ...); +int GetScalarsS(FILE*, ...); +int GetScalarsC(FILE*, ...); +int GetScalarsZ(FILE*, ...); +int GetCblasCharParameters(FILE *in_file, ...); +MKL_INT MaxValue(MKL_INT, MKL_INT*); +MKL_INT GetVectorI(FILE*, MKL_INT, MKL_INT*); +MKL_INT GetVectorS(FILE*, MKL_INT, float*, MKL_INT); +MKL_INT GetVectorC(FILE*, MKL_INT, MKL_Complex8*, MKL_INT); +MKL_INT GetVectorD(FILE*, MKL_INT, double*, MKL_INT); +MKL_INT GetVectorZ(FILE*, MKL_INT, MKL_Complex16*, MKL_INT); +MKL_INT GetArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, float*, MKL_INT*); +MKL_INT GetArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, double*, MKL_INT*); +MKL_INT GetArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*); +MKL_INT GetArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*); +MKL_INT GetBandArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT); +MKL_INT GetBandArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT); +MKL_INT GetBandArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT); +MKL_INT GetBandArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT); +MKL_INT GetValuesI(FILE *, MKL_INT *, MKL_INT, MKL_INT); +MKL_INT GetValuesC(FILE *, MKL_Complex8*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesZ(FILE *, MKL_Complex16*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesD(FILE *, double*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesS(FILE *, float*, MKL_INT, MKL_INT, MKL_INT); +void PrintParameters( char*, ... ); +void PrintTrans(MKL_INT, CBLAS_TRANSPOSE*, CBLAS_TRANSPOSE*); +void PrintOrder(CBLAS_ORDER*); +void PrintVectorI(MKL_INT, MKL_INT*, char*); +void PrintVectorS(int, MKL_INT, float*, MKL_INT, char*); +void PrintVectorC(int, MKL_INT, MKL_Complex8*, MKL_INT, char*); +void PrintVectorD(int, MKL_INT, double*, MKL_INT, char*); +void PrintVectorZ(int, MKL_INT, MKL_Complex16*, MKL_INT, char*); +void PrintArrayS(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, float*, MKL_INT*, char*); +void PrintArrayD(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, double*, MKL_INT*, char*); +void PrintArrayC(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*, char*); +void PrintArrayZ(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*, char*); +void PrintBandArrayS(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT, char*); +void PrintBandArrayD(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT, char*); +void PrintBandArrayC(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT, char*); +void PrintBandArrayZ(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT, char*); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _MKL_EXAMPLE_H_ */ diff --git a/spgemm/mkl_shmem/mklspmm.c b/spgemm/mkl_shmem/mklspmm.c new file mode 100644 index 0000000000000000000000000000000000000000..1b78d7f7d8e836042204498e5e90cd1c36dcb485 --- /dev/null +++ b/spgemm/mkl_shmem/mklspmm.c @@ -0,0 +1,734 @@ +/*includes {{{*/ +#include +#include +#include +#include "mkl.h" +#include "mkl_types.h" +#include "mkl_spblas.h" + +#include "commonmm.h" +#include "mmio.h" +/*}}}*/ +/* global varibles and defines {{{*/ +int option_host; +#define OPTION_HOST_CPU -1 +#define OPTION_HOST_MIC 0 +int micdev; +__declspec(target(mic)) int max_threads = 0; +__declspec(target(mic)) int nworker = 0; +__declspec(target(mic)) int num_repeat; +__declspec(target(mic)) int option_print_matrices = 0; +#define OPTION_NOPRINT_MATRICES 0 +#define OPTION_PRINT_MATRICES 1 +__declspec(target(mic)) int option_printfile_matrices = 0; +#define OPTION_NOPRINTFILE_MATRICES 0 +#define OPTION_PRINTFILE_MATRICES 1 +__declspec(target(mic)) char transa = 'n'; +__declspec(target(mic)) double time_mic_numeric_mm; +__declspec(target(mic)) double time_mic_symbolic_mm; +/*}}}*/ +/* method declarations {{{*/ + int +main(int argc, char* argv[]); + __declspec(target(mic)) void +printmm_one(int m, double* Aval, int* AJ, int* AI); + __declspec(target(mic)) void +printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); + void +printmm_zero(int m, double* Aval, int* AJ, int* AI); +/*}}}*/ + +/*csr util{{{*/ +//#include "../../csr/sspmxm.h" +#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) + +csr *csr_spfree(csr *A); +/* free workspace and return a sparse matrix result */ +csr *csr_done(csr *C, void *w, void *x, csi ok) +{ +//#ifdef KADEBUG +// printf("%d:%s:%s():%d, %s(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, +// __LINE__, C->name, C->m, C->n, C->nzmax); +//#endif +// cs_free(w); /* free workspace */ +// cs_free(x); + return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ +} + + +/* wrapper for free */ +void *cs_free(void *p) +{ + if (p) + free(p); /* free p if it is not already NULL */ + return(NULL); /* return NULL to simplify the use of cs_free */ +} +/* wrapper for realloc */ +void *csr_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("%d:reallocation failed, pnew is NULL\n", __LINE__); + } +//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); + return((*ok) ? pnew : p); /* return original p if failure */ +} + +/* wrapper for realloc */ +void *cs_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("reallocation failed\n"); + } + return((*ok) ? pnew : p); /* return original p if failure */ +} + + +/* change the max # of entries sparse matrix */ +csi csr_sprealloc(csr *A, csi nzmax) +{ +//printf("%d: nzmax=%d\n", __LINE__, nzmax); + csi ok, oki = 0, okj = 1, okx = 1; + if (!A) + return(0); + if (nzmax <= 0) + nzmax = A->p[A->m]; + A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); + if (A->x) + A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + ok = (oki && okj && okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + if (ok) + A->nzmax = nzmax; +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + return(ok); +} + +/* free a sparse matrix */ +csr *csr_spfree(csr *A) +{ + if (!A) + return(NULL); /* do nothing if A already NULL */ + cs_free(A->p); + A->p = NULL; + cs_free(A->j); + A->j = NULL; + cs_free(A->x); + A->x = NULL; + cs_free(A->r); + A->r = NULL; +// cs_free(A->name); + cs_free(A); /* free the cs struct and return NULL */ + return NULL; +} +/* allocate a sparse matrix (triplet form or compressed-ROW form) */ +csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) // floatType f is to suppress compile error +{ +//printf("m=%d n=%d nzmax=%d\n", m, n, nzmax); + csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ +// A->name = calloc(MTX_NAME_LEN, sizeof(char)); +// sprintf(A->name, "N/A"); + if (!A) { + perror("sparse allocation failed"); + return(NULL); /* out of memory */ + } + A->m = m; /* define dimensions and nzmax */ + A->n = n; + A->nzmax = nzmax = CS_MAX(nzmax, 0); +//printf("A m=%d n=%d nzmax=%d\n", A->m, A->n, A->nzmax); + A->nr = 0; // number of nonzero rows + A->p = (csi*)calloc(m + 2, sizeof(csi)); + A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); + A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); + return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); +}/*}}}*/ +/*csr_multiply{{{*/ +/* C = A*B +If nonzero counts of matrices are zero or number of rows/cols is zero, it is advised not to call this function. +*/ +// run 8 CUT 0.1 NNZ MTX ~/matrices/nlpkkt200.mtx ~/matrices/nlpkkt200.mtx tmp stdout MNAC SERIAL +csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) +{ + csv tf = 0; + csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, + bnz, values = 1; + csv *Cx; + csr *C; + //printf("%s:%d\n", __FILE__, __LINE__); + if (An != Bm) + return(NULL); + //printf("%s:%d\n", __FILE__, __LINE__); + if (Anzmax == 0 || Bnzmax == 0) { + C = csr_spalloc(Am, Bn, 0, values, 0, tf); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + One of matrice is Zero! C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + return C; + } +// printf("%s:%d\n", __FILE__, __LINE__); + m = Am; + anz = Ap[Am]; + n = Bn; + bnz = Bp[Bm]; + for(i = 0; i < n; i++) xb[i] = 0; + //csi* xb = ka_calloc(n, sizeof(csi)); /* get workspace */ + for(i = 0; i < n; i++) + xb[i] = 0; + values = (Ax != NULL) && (Bx != NULL); + //csv* x = values ? ka_calloc(n, sizeof(csv)) : NULL; /* get workspace */ + csi tnz = (anz + bnz) * 2; +//printf("A->m=%d B->n=%d tnz=%d\n", A->m, B->n, tnz); + C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ +// sprintf(C->name, "C=(%s)*(%s)", A->name, B->name); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + allocated C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + if (!C || !xb || (values && !x)) + return (csr_done(C, xb, x, 0)); +// csi zero = 0; +// for(i = 0; i < n; i++) +// xb[i] = zero; + Cp = C->p; + //csi* oldj = C->j; +//printf("C->m=%d C->n=%d C->nzmax=%d C->j[0]=%d\n", C->m, C->n, C->nzmax, C->j[0]); + for (i = 0; i < m; i++) { +//C->j != oldj ? printf("Changed old=%u new=%u\n", oldj, C->j):0; + if ( ( (nz + n) > C->nzmax ) ) { + if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), CANNOT BE\ + enlargened C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + return (csr_done(C, xb, x, 0)); // out of memory + } else { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), enlargened \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + } +//printf("After expansion, C->nzmax=%d\n", C->nzmax); + } + Cj = C->j; + Cx = C->x; /* C->j and C->x may be reallocated */ + //printf("i=%d C->j[0]=%d\n", i, C->j[0]); + Cp[i] = nz; /* row i of C starts here */ + for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { + j = Aj[jp]; + for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { + k = Bj[kp]; /* B(i,j) is nonzero */ +//if(k > n || k < 0)printf("k=%d kp=%d n=%d: %d\n", k, kp, n,__LINE__); + if (xb[k] != i + 1) { + xb[k] = i + 1; /* i is new entry in column j */ + // if(nz > C->nzmax) { + // printf("%d: Error nz>nzmax %d>%d\n", __LINE__, nz, C->nzmax); + // } +//if(nz > C->nzmax || nz < 0)printf("nz: %d\n", __LINE__); + Cj[nz++] = k; /* add i to pattern of C(:,j) */ + + if (x) { + // C->j != oldj ? printf("B Changed old=%u new=%u\n", oldj, C->j):0; + x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ + // C->j != oldj ? printf("A Changed old=%u new=%u\n", oldj, C->j):0; + (*nummult)++; + } + } else if (x) { + x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ + (*nummult)++; + } + } + } + if (values) + for (p = Cp[i]; p < nz; p++) + Cx[p] = x[Cj[p]]; + } + +// printf("%s:%d nz=%d\n", __FILE__, __LINE__, nz); + Cp[m] = nz; /* finalize the last row of C */ + csr_sprealloc(C, 0); /* remove extra space from C */ +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), trimmed \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ +// cs_free(xb); + xb = NULL; +// cs_free(x); + x = NULL; + return C;//(csr_done(C, xb, x, 1)); /* success; free workspace, return C */ +}/*}}}*/ + + + void +mkl_cpu_spmm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, /*{{{*/ + MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI) { + +MKL_INT* CJ = NULL; +double* Cval = NULL; +MKL_INT sort = 3; // sort everything +MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); +MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr +MKL_INT ierr; +MKL_INT request = 1; // symbolic multiplication +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + +request = 2; //numeric multiplication +int Cnnz = CI[Am]-1; +int Cval_size = Cnnz + 1; +CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); +Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); +//printmm_one(Am, Cval, CJ, CI);//printf("Cnnz=%d\n", Cnnz); +*pCval = Cval; *pCJ = CJ; *pCI = CI; //printf("Cnnz_host:%d\n", Cnnz); +} /* ENDOF mkl_cpu_spmm }}}*/ + void +mkl_mic_spmm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI,/*{{{*/ + MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI) { + + int i; + #pragma offload target(mic:micdev) inout(max_threads) + { + max_threads = mkl_get_max_threads(); + mkl_set_num_threads(nworker); + } // printf("MIC started \n"); fflush(stdout); + #pragma offload target(mic:micdev) \ + in(transa) \ + in(Am) \ + in(An) \ + in(Bn) \ + in(Aval:length(Annz) free_if(0)) \ + in(AI:length(Am+1) free_if(0)) \ + in(AJ:length(Annz) free_if(0)) \ + in(Bval:length(Bnnz) free_if(0)) \ + in(BI:length(An+1) free_if(0)) \ + in(BJ:length(Bnnz) free_if(0)) + {} + + + //void mkl_dcsrmultcsr (char *transa, MKL_INT *job, MKL_INT *sort, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *a, MKL_INT *ja, MKL_INT *ia, double *b, MKL_INT *jb, MKL_INT *ib, double *c, MKL_INT *jc, MKL_INT *ic, MKL_INT *nnzmax, MKL_INT *ierr); + + +// double s_initial = dsecnd(); +// double s_elapsed = dsecnd() - s_initial; // seconds + + MKL_INT Cnnz_host = -1; + MKL_INT* CI = NULL; + MKL_INT* CJ = NULL; + double* Cval = NULL; + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 2; //numeric multiplication + MKL_INT sort = 3; // sort everything + time_mic_symbolic_mm = 0.0; + #pragma offload target(mic:micdev) in(i) in(ierr) in(nnzmax) in(request) in(sort) in(transa) in(Am) in(An) in(Bn) \ + out(time_mic_symbolic_mm) out(Cnnz_host)\ + in(Aval:length(Annz) alloc_if(0) free_if(0)) \ + in(AI:length(Am+1) alloc_if(0) free_if(0)) \ + in(AJ:length(Annz) alloc_if(0) free_if(0)) \ + in(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + in(BI:length(An+1) alloc_if(0) free_if(0)) \ + in(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 1; // symbolic multiplication + //MKL_INT sort = 7; // do not sort anything + + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + double s_initial = dsecnd(); + for(i = 0; i < num_repeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + time_mic_symbolic_mm = (dsecnd() - s_initial) / num_repeat; // seconds + + request = 2; //numeric multiplication + int Cnnz = CI[Am]-1; + int Cval_size = Cnnz + 1; Cnnz_host = Cnnz; + CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); + Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + printmm_one(Am, Cval, CJ, CI);//printf("Cnnz=%d\n", Cnnz); + sort = 7; // do not sort anything + request = 2; // numeric multiplication + }//printf("Cnnz_mic: %d\n", Cnnz_host);exit(-1); + time_mic_numeric_mm = 0.0; + #pragma offload target(mic:micdev) nocopy(request) nocopy(sort) nocopy(i) nocopy(ierr) nocopy(nnzmax) nocopy(transa) nocopy(Am) nocopy(An) nocopy(Bn) \ + out(time_mic_numeric_mm) \ + nocopy(Aval:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(AI:length(Am+1) alloc_if(0) free_if(0)) \ + nocopy(AJ:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(BI:length(An+1) alloc_if(0) free_if(0)) \ + nocopy(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + //sort = 7 + //request = 1; 2'ye gore sure iki katina cikiyor + //request = 0; 2'ye gore sure uc katina cikiyor + + //request = 2 + //sort = 3; 7'ye gore %30 daha yavas + //printf("sort:%d request:%d\n", sort, request); // prints sort:7 request:2 + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + double s_initial = dsecnd(); + for(i = 0; i < num_repeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + time_mic_numeric_mm = (dsecnd() - s_initial) / num_repeat; // seconds + } + if (option_printfile_matrices == OPTION_PRINTFILE_MATRICES) { + int nelm_CI = Am + 2; + int nelm_CJ = Cnnz_host + 1; + int nelm_Cval = Cnnz_host + 1; + __declspec(target(mic)) MKL_INT* CI_host = (MKL_INT*)mkl_malloc( (nelm_CI) * sizeof( MKL_INT ), 64 ); + __declspec(target(mic)) MKL_INT* CJ_host = (MKL_INT*)mkl_malloc( (nelm_CJ) * sizeof( MKL_INT ), 64 ); + __declspec(target(mic)) double* Cval_host = (double*)mkl_malloc( (nelm_Cval) * sizeof( double ), 64 ); + #pragma offload target(mic:micdev) in(nelm_CI)\ + inout(CI_host:length(nelm_CI) alloc_if(1) free_if(0)) \ + inout(CJ_host:length(nelm_CJ) alloc_if(1) free_if(0)) \ + inout(Cval_host:length(nelm_Cval) alloc_if(1) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + int i; + for(i = 0; i < nelm_CI; i++) CI_host[i] = CI[i]; + for(i = 0; i < nelm_CJ; i++) {CJ_host[i] = CJ[i]; Cval_host[i] = Cval[i];} + } + *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; //printf("Cnnz_host:%d\n", Cnnz_host); + } +} /* ENDOF mkl_mic_spmm }}}*/ + void +read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ + +int i, M, N, nz, *I, *J; +double* val; +int ret_code; +MM_typecode matcode; +FILE* f; +if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} +/* READ MATRIX */ +if (mm_read_banner(f, &matcode) != 0) { + printf("Could not process Matrix Market banner.\n"); exit(1); +} +/* This is how one can screen matrix types if their application */ +/* only supports a subset of the Matrix Market data types. */ +if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); +} +/* find out size of sparse matrix .... */ +if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); +/* reseve memory for matrices */ +I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); +*pI = I; +*pJ = J; +*pval = val; +/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ +/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ +/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ +int realnnz = 0; +for (i=0; imaxdiff){ + printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]); + } + } + printf("\n"); +*///}}} diff --git a/spgemm/mkl_shmem/mklspmv.c b/spgemm/mkl_shmem/mklspmv.c new file mode 100644 index 0000000000000000000000000000000000000000..0bb617bd0c13e5ea9f2f09e6f4c39804335fceb6 --- /dev/null +++ b/spgemm/mkl_shmem/mklspmv.c @@ -0,0 +1,261 @@ +#include +#include +#include +#include "mkl.h" +#include "mkl_types.h" +#include "mkl_spblas.h" + +#include "mmio.h" + +// mkl_?csrgemv y := A*x one based +// void mkl_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y); + +// mkl_cspblas_?csrgemv y := A*x zero based +// void mkl_cspblas_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y); + + +// mkl_?csrmv y := alpha*A*x + beta*y +// void mkl_dcsrmv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y); + + + + +// trans='n' +int main(int argc, char* argv[]) { + int ret_code; + MM_typecode matcode; + FILE *f; + int M, N, nz; + int i, *I, *J; + double *val; + + if (argc != 5) + { + fprintf(stderr, "Usage: %s [martix-market-filename] [nworker] [num repeat] [null|row/col permutation file]\n", argv[0]); + exit(1); + } + else + { + if ((f = fopen(argv[1], "r")) == NULL) + exit(1); + } + + int nworker = atoi(argv[2]); + int num_repeat = atoi(argv[3]); + char* fnrcperm = argv[4]; + + if (mm_read_banner(f, &matcode) != 0) + { + printf("Could not process Matrix Market banner.\n"); + exit(1); + } + + + /* This is how one can screen matrix types if their application */ + /* only supports a subset of the Matrix Market data types. */ + + if (mm_is_complex(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode) ) + { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + + /* find out size of sparse matrix .... */ + + if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) + exit(1); + + + /* reseve memory for matrices */ + + I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); + J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); + val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); + + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + int realnnz = 0; + + for (i=0; imaxdiff){ + printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]); + } + } + printf("\n"); + printf("%s %d %d %g %d %s\n", argv[1], nworker, max_threads, s_elapsed, num_repeat, fnrcperm); +// mkl_free(A); + + return 0; + +} + diff --git a/spgemm/mkl_shmem/mmio.c b/spgemm/mkl_shmem/mmio.c new file mode 100644 index 0000000000000000000000000000000000000000..c250ff2aed998fe65248537a8b19a359206187ce --- /dev/null +++ b/spgemm/mkl_shmem/mmio.c @@ -0,0 +1,511 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + + +#include +#include +#include +#include + +#include "mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *I, *J; + + if ((f = fopen(fname, "r")) == NULL) + return -1; + + + if (mm_read_banner(f, &matcode) != 0) + { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } + + + + if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", + mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *I_ = I; + *J_ = J; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i +#include +#include +#include "mkl.h" +#include "mkl_types.h" +#include "mkl_spblas.h" + +#include "mmio.h" +/*}}}*/ +/* global varibles and defines {{{*/ +int max_threads = 0; +int nworker = 0; +int num_repeat; +int option_print_matrices = 0; +#define OPTION_NOPRINT_MATRICES 0 +#define OPTION_PRINT_MATRICES 1 +int option_printfile_matrices = 0; +#define OPTION_NOPRINTFILE_MATRICES 0 +#define OPTION_PRINTFILE_MATRICES 1 +char transa = 'n'; +double time_mic_numeric_mm; +double time_mic_symbolic_mm; +typedef int csi; +typedef double csv; +csv zero = 0.0; + +typedef struct csr_t { + csi m; + csi n; + csi nzmax; + csi nr; + csi* r; + csi* p; + csi* j; + csv* x; + int numInMats; // number of input matrices stored in this struct + int numRows; + int nnonempty_rows; + int numCols; + int totalnnz; + // int totalnnzmax; + int* nItems; // nnz of each matrix + int* pirow; + int size_pirow; + int* pnr; + int* h_rowDelimiters; + int size_h_rowDelimiters; + int* h_cols; + csv* h_val; +} csr; +/*}}}*/ +/* method declarations {{{*/ +int main(int argc, char* argv[]); +void printmm_one(int m, double* Aval, int* AJ, int* AI); +void printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); +void printmm_zero(int m, double* Aval, int* AJ, int* AI); +/*}}}*/ +/*csr util{{{*/ +//#include "../../csr/sspmxm.h" +#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) + +csr *csr_spfree(csr *A); +/* free workspace and return a sparse matrix result */ +csr *csr_done(csr *C, void *w, void *x, csi ok) +{ +//#ifdef KADEBUG +// printf("%d:%s:%s():%d, %s(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, +// __LINE__, C->name, C->m, C->n, C->nzmax); +//#endif +// cs_free(w); /* free workspace */ +// cs_free(x); + return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ +} + + +/* wrapper for free */ +void *cs_free(void *p) +{ + if (p) + free(p); /* free p if it is not already NULL */ + return(NULL); /* return NULL to simplify the use of cs_free */ +} +/* wrapper for realloc */ +void *csr_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("%d:reallocation failed, pnew is NULL\n", __LINE__); + } +//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); + return((*ok) ? pnew : p); /* return original p if failure */ +} + +/* wrapper for realloc */ +void *cs_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("reallocation failed\n"); + } + return((*ok) ? pnew : p); /* return original p if failure */ +} + + +/* change the max # of entries sparse matrix */ +csi csr_sprealloc(csr *A, csi nzmax) +{ +//printf("%d: nzmax=%d\n", __LINE__, nzmax); + csi ok, oki = 0, okj = 1, okx = 1; + if (!A) + return(0); + if (nzmax <= 0) + nzmax = A->p[A->m]; + A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); + if (A->x) + A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + ok = (oki && okj && okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + if (ok) + A->nzmax = nzmax; +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + return(ok); +} + +/* free a sparse matrix */ +csr *csr_spfree(csr *A) +{ + if (!A) + return(NULL); /* do nothing if A already NULL */ + cs_free(A->p); + A->p = NULL; + cs_free(A->j); + A->j = NULL; + cs_free(A->x); + A->x = NULL; + cs_free(A->r); + A->r = NULL; +// cs_free(A->name); + cs_free(A); /* free the cs struct and return NULL */ + return NULL; +} +/* allocate a sparse matrix (triplet form or compressed-ROW form) */ +csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) // floatType f is to suppress compile error +{ +//printf("m=%d n=%d nzmax=%d\n", m, n, nzmax); + csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ +// A->name = calloc(MTX_NAME_LEN, sizeof(char)); +// sprintf(A->name, "N/A"); + if (!A) { + perror("sparse allocation failed"); + return(NULL); /* out of memory */ + } + A->m = m; /* define dimensions and nzmax */ + A->n = n; + A->nzmax = nzmax = CS_MAX(nzmax, 0); +//printf("A m=%d n=%d nzmax=%d\n", A->m, A->n, A->nzmax); + A->nr = 0; // number of nonzero rows + A->p = (csi*)calloc(m + 2, sizeof(csi)); + A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); + A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); + return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); +}/*}}}*/ +/*csr_multiply{{{*/ +/* C = A*B +If nonzero counts of matrices are zero or number of rows/cols is zero, it is advised not to call this function. +*/ +// run 8 CUT 0.1 NNZ MTX ~/matrices/nlpkkt200.mtx ~/matrices/nlpkkt200.mtx tmp stdout MNAC SERIAL +csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) +{ + csv tf = 0; + csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, + bnz, values = 1; + csv *Cx; + csr *C; + //printf("%s:%d\n", __FILE__, __LINE__); + if (An != Bm) + return(NULL); + //printf("%s:%d\n", __FILE__, __LINE__); + if (Anzmax == 0 || Bnzmax == 0) { + C = csr_spalloc(Am, Bn, 0, values, 0, tf); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + One of matrice is Zero! C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + return C; + } +// printf("%s:%d\n", __FILE__, __LINE__); + m = Am; + anz = Ap[Am]; + n = Bn; + bnz = Bp[Bm]; + for(i = 0; i < n; i++) xb[i] = 0; + //csi* xb = ka_calloc(n, sizeof(csi)); /* get workspace */ + for(i = 0; i < n; i++) + xb[i] = 0; + values = (Ax != NULL) && (Bx != NULL); + //csv* x = values ? ka_calloc(n, sizeof(csv)) : NULL; /* get workspace */ + csi tnz = (anz + bnz) * 2; +//printf("A->m=%d B->n=%d tnz=%d\n", A->m, B->n, tnz); + C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ +// sprintf(C->name, "C=(%s)*(%s)", A->name, B->name); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + allocated C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + if (!C || !xb || (values && !x)) + return (csr_done(C, xb, x, 0)); +// csi zero = 0; +// for(i = 0; i < n; i++) +// xb[i] = zero; + Cp = C->p; + //csi* oldj = C->j; +//printf("C->m=%d C->n=%d C->nzmax=%d C->j[0]=%d\n", C->m, C->n, C->nzmax, C->j[0]); + for (i = 0; i < m; i++) { +//C->j != oldj ? printf("Changed old=%u new=%u\n", oldj, C->j):0; + if ( ( (nz + n) > C->nzmax ) ) { + if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), CANNOT BE\ + enlargened C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + return (csr_done(C, xb, x, 0)); // out of memory + } else { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), enlargened \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + } +//printf("After expansion, C->nzmax=%d\n", C->nzmax); + } + Cj = C->j; + Cx = C->x; /* C->j and C->x may be reallocated */ + //printf("i=%d C->j[0]=%d\n", i, C->j[0]); + Cp[i] = nz; /* row i of C starts here */ + for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { + j = Aj[jp]; + for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { + k = Bj[kp]; /* B(i,j) is nonzero */ +//if(k > n || k < 0)printf("k=%d kp=%d n=%d: %d\n", k, kp, n,__LINE__); + if (xb[k] != i + 1) { + xb[k] = i + 1; /* i is new entry in column j */ + // if(nz > C->nzmax) { + // printf("%d: Error nz>nzmax %d>%d\n", __LINE__, nz, C->nzmax); + // } +//if(nz > C->nzmax || nz < 0)printf("nz: %d\n", __LINE__); + Cj[nz++] = k; /* add i to pattern of C(:,j) */ + + if (x) { + // C->j != oldj ? printf("B Changed old=%u new=%u\n", oldj, C->j):0; + x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ + // C->j != oldj ? printf("A Changed old=%u new=%u\n", oldj, C->j):0; + (*nummult)++; + } + } else if (x) { + x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ + (*nummult)++; + } + } + } + if (values) + for (p = Cp[i]; p < nz; p++) + Cx[p] = x[Cj[p]]; + } + +// printf("%s:%d nz=%d\n", __FILE__, __LINE__, nz); + Cp[m] = nz; /* finalize the last row of C */ + csr_sprealloc(C, 0); /* remove extra space from C */ +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), trimmed \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ +// cs_free(xb); + xb = NULL; +// cs_free(x); + x = NULL; + return C;//(csr_done(C, xb, x, 1)); /* success; free workspace, return C */ +}/*}}}*/ +void mkl_cpu_spmm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, /*{{{*/ + MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI) { + +MKL_INT* CJ = NULL; +double* Cval = NULL; +MKL_INT sort = 3; // sort everything +MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); +MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr +MKL_INT ierr; +MKL_INT request = 1; // symbolic multiplication +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + +request = 2; //numeric multiplication +int Cnnz = CI[Am]-1; +int Cval_size = Cnnz + 1; +CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); +Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); +//printmm_one(Am, Cval, CJ, CI);//printf("Cnnz=%d\n", Cnnz); +*pCval = Cval; *pCJ = CJ; *pCI = CI; //printf("Cnnz_host:%d\n", Cnnz); +} /* ENDOF mkl_cpu_spmm }}}*/ +void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ + +int i, M, N, nz, *I, *J; +double* val; +int ret_code; +MM_typecode matcode; +FILE* f; +if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} +/* READ MATRIX */ +if (mm_read_banner(f, &matcode) != 0) { + printf("Could not process Matrix Market banner.\n"); exit(1); +} +/* This is how one can screen matrix types if their application */ +/* only supports a subset of the Matrix Market data types. */ +if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); +} +/* find out size of sparse matrix .... */ +if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); +/* reseve memory for matrices */ +I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); +*pI = I; +*pJ = J; +*pval = val; +/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ +/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ +/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ +int realnnz = 0; +for (i=0; i +#include +#include +#include +#include + +#include "mkl_example.h" +#include "mkl_cblas.h" + +MKL_INT MaxValue(MKL_INT n, MKL_INT *x) +{ + MKL_INT i, indmax; + + indmax = x[0]; + for (i = 1; i < n; i++) + if (indmax < x[i]) indmax = x[i]; + return indmax; +} /* MaxValue */ + +MKL_INT GetVectorI(FILE *in_file, MKL_INT n, MKL_INT *x) +{ + return GetValuesI( in_file, x, 0, n ); +} /* GetVectorI */ + +MKL_INT GetVectorS(FILE *in_file, MKL_INT n, float *x, MKL_INT incx) +{ + return GetValuesS( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorS */ + +MKL_INT GetVectorD(FILE *in_file, MKL_INT n, double *x, MKL_INT incx) +{ + return GetValuesD( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorD */ + +MKL_INT GetVectorC(FILE *in_file, MKL_INT n, MKL_Complex8 *x, MKL_INT incx) +{ + return GetValuesC( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorC */ + +MKL_INT GetVectorZ(FILE *in_file, MKL_INT n, MKL_Complex16 *x, MKL_INT incx) +{ + return GetValuesZ( in_file, x, 1, 0, (1+(n-1)*abs(incx)) ); +} /* GetVectorZ */ + +MKL_INT GetArrayS(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + float *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + float *addr; + + if( *order == CblasRowMajor ) { + if( flag == GENERAL_MATRIX ) { + for( i = 0; i < (*m); i++ ) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if( flag == UPPER_MATRIX ) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if( flag == LOWER_MATRIX ) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesS( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if( *order == CblasColMajor ) { + if( flag == GENERAL_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if( flag == UPPER_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if( flag == LOWER_MATRIX ) { + for( j = 0; j < (*n); j++ ) { + addr = a + j*(*lda); + number = GetValuesS( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayS */ + +MKL_INT GetArrayD(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + double *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + double *addr; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesD( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesD( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayD */ + +MKL_INT GetArrayC(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, MKL_INT *m, MKL_INT *n, + MKL_Complex8 *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + MKL_Complex8 *addr; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesC( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesC( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayC */ + +MKL_INT GetArrayZ(FILE *in_file, CBLAS_ORDER *order, MKL_INT flag, + MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda) +{ + MKL_INT i, j, number; + MKL_Complex16 *addr; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + if (flag == GENERAL_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, *n ); + if( number != *n ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, i, *n-i ); + if( number != *n-i ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + addr = a + i*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, i+1 ); + if( number != i+1 ) return 1; + } /* for */ + } /* if */ + } else if(*order == CblasColMajor) { + if (flag == GENERAL_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, *m ); + if( number != *m ) return 1; + } /* for */ + } else if (flag == UPPER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, 0, j+1 ); + if( number != j+1 ) return 1; + } /* for */ + } else if (flag == LOWER_MATRIX) { + for (j = 0; j < (*n); j++) { + addr = a + j*(*lda); + number = GetValuesZ( in_file, addr, 1, j, *m-j ); + if( number != *m-j ) return 1; + } /* for */ + } /* if */ + } /* if */ + return 0; +} /* GetArrayZ */ + +MKL_INT GetBandArrayS(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, float *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + float *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesS( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + number = GetValuesS( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + number = GetValuesS( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } + return 0; +} /* GetBandArrayS */ + +MKL_INT GetBandArrayD(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, double *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows, number; + double *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesD( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesD( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesD( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayD */ + +MKL_INT GetBandArrayC(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, MKL_Complex8 *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + MKL_Complex8 *addr, *addr1; + char buf[MAX_STRING_LEN]; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesC( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesC( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesC( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayC */ + +MKL_INT GetBandArrayZ(FILE *in_file, CBLAS_ORDER *order, MKL_INT kl, MKL_INT ku, + MKL_INT m, MKL_INT n, MKL_Complex16 *a, MKL_INT lda) +{ + MKL_INT i, j; + MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number; + MKL_Complex16 *addr, *addr1; + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 ); + if( number != j_end-j_start+1 ) return 1; + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + j = j_start*lda; addr1 = a+i+i_start; + number = GetValuesZ( in_file, addr1, lda, j, n-ku_rows ); + if( number != n-ku_rows ) return 1; + j_start--; + } + + j_end = MIN(m,n); + addr1 = a+ku; + number = GetValuesZ( in_file, addr1, lda, 0, j_end ); + if( number != j_end ) return 1; + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a+ku+i; + number = GetValuesZ( in_file, addr1, lda, 0, kl1 ); + if( number != kl1 ) return 1; + } + } /* if */ + return 0; +} /* GetBandArrayZ */ + +MKL_INT GetValuesI( FILE *in_file, MKL_INT *in_array, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + int value; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%d", &value) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i]=(MKL_INT)value; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} + +MKL_INT GetValuesD( FILE *in_file, double *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + double temp; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%lf", &temp) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld]=temp; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} /* GetValuesD */ + +MKL_INT GetValuesS( FILE *in_file, float *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + float temp; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%f", &temp) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld]=temp; + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + } + return counter; +} /* GetValuesS */ + +MKL_INT GetValuesC( FILE *in_file, MKL_Complex8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + float re, im; + char seps[]=" ,()\t"; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps)) == NULL || + sscanf( str, "%f", &im) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i].real=re; + in_array[begin+i].imag=im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + } + return counter; +} /* GetValuesC */ + +MKL_INT GetValuesZ( FILE *in_file, MKL_Complex16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers ) +{ + MKL_INT i, counter=0; + double re, im; + char seps[]=" ,()\t"; + char buf[MAX_STRING_LEN], *str; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + for( i = 0; i < max_numbers; i++ ) { + if ( *str==COMMENTS ) break; + if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps))== NULL || + sscanf( str, "%lf", &im) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + in_array[begin+i*ld].real=re; + in_array[begin+i*ld].imag=im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + } + return counter; +} /* GetValuesZ */ + +int GetIntegerParameters(FILE *in_file, ...) +{ + int counter=0; + char buf[MAX_STRING_LEN], *str; + MKL_INT *param; + va_list ap; + int value; + + do { + fgets(buf, MAX_STRING_LEN, in_file); + str = strtok( buf, " " ); + if( str == NULL ){ + printf("\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start(ap, in_file); + param = va_arg(ap, MKL_INT *); + + while ( 1 ) { + if( *str == COMMENTS ) break; + if( sscanf(str, "%d", &value ) != 1 ){ + printf("\n File format is inappropriate\n"); + return 0; + } + *param = (MKL_INT)value; + counter++; + if( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, MKL_INT * ); + } + va_end( ap ); + return counter; +} /* GetIntegerParameters */ + +int GetCblasCharParameters(FILE *in_file, ...) +{ + int counter=0; + char buf[MAX_STRING_LEN], *str; + int *param; + va_list ap; + + do { + fgets(buf, MAX_STRING_LEN, in_file); + str = strtok( buf, " " ); + if( str == NULL ){ + printf("\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start(ap, in_file); + param = va_arg(ap, int *); + + while ( 1 ) { + if( *str == COMMENTS ) break; + if( sscanf(str, "%d", param ) != 1 ){ + printf("\n File format is inappropriate\n"); + return 0; + } + counter++; + if( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, int * ); + } + va_end( ap ); + return counter; +} /* GetCblasCharParameters */ + +int GetScalarsD( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + double *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, double * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%lf", param ) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, double * ); + } + va_end( ap ); + return counter; +} /* GetScalarsD */ + +int GetScalarsS( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + float *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, " " ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, float * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%f", param ) != 1 ){ + printf( "\n File format is inappropriate\n" ); + return 0; + } + counter++; + if ( (str = strtok( NULL, " " )) == NULL ) break; + param = va_arg( ap, float * ); + } + va_end( ap ); + return counter; +} /* GetScalarsS */ + +int GetScalarsC( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + char seps[]=" ,()\t"; + float re, im; + MKL_Complex8 *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, MKL_Complex8 * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps))== NULL || + sscanf( str, "%f", &im) != 1 ){ + printf( "\n File format is inappropriate %f %f\n", re, im ); + return 0; + } + param->real = re; + param->imag = im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + param = va_arg( ap, MKL_Complex8 * ); + } + va_end( ap ); + return counter; +} /* GetScalarsC */ + +int GetScalarsZ( FILE *in_file, ... ) +{ + int i, counter=0; + char buf[MAX_STRING_LEN], *str; + char seps[]=" ,()\t"; + double re, im; + MKL_Complex16 *param; + va_list ap; + + do { + fgets( buf, MAX_STRING_LEN, in_file ); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n File format is inappropriate\n"); + return 0; + } + } while ( *str == COMMENTS ); + va_start( ap, in_file ); + param = va_arg( ap, MKL_Complex16 * ); + while ( 1 ) { + if ( *str == COMMENTS ) break; + if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps)) == NULL || + sscanf( str, "%lf", &im) != 1 ){ + printf( "\n File format is inappropriate %f %f\n", re, im ); + return 0; + } + param->real = re; + param->imag = im; + counter++; + if ( (str = strtok( NULL, seps )) == NULL ) break; + param = va_arg( ap, MKL_Complex16 * ); + } + va_end( ap ); + return counter; +} /* GetScalarsZ */ + +void PrintParameters( char *names, ... ) +{ + CBLAS_ORDER *order; + CBLAS_SIDE *side; + CBLAS_UPLO *uplo; + CBLAS_DIAG *diag; + CBLAS_TRANSPOSE *trans; + char *p, *str, str1[MAX_STRING_LEN], buf[MAX_STRING_LEN]; + va_list ap; + char seps[]=" ,"; + MKL_INT i; + + printf("\n "); + va_start( ap, names ); + strcpy(buf, names); + str = strtok( buf, seps ); + if( str == NULL ){ + printf( "\n You must determine the parameters names\n"); + return; + } + do { + strcpy(str1, str); + p = str1; + for( i = 0; i < strlen(str1); i++ ) { + *p = tolower(str1[i]); + p++; + } + if ( strcmp( str1, "order" ) == 0 ) { + order = va_arg( ap, CBLAS_ORDER * ); + if ( (int)order == CblasRowMajor ) + printf("ORDER = CblasRowMajor "); + else if ( (int)order == CblasColMajor ) + printf("ORDER = CblasColMajor "); + } else if ( strcmp( str1, "side" ) == 0 ) { + side = va_arg( ap, CBLAS_SIDE * ); + if ( (int)side == CblasLeft ) + printf("SIDE = CblasLeft "); + else if ( (int)side == CblasRight ) + printf("SIDE = CblasRight "); + } else if ( strcmp( str1, "uplo" ) == 0 ) { + uplo = va_arg( ap, CBLAS_UPLO * ); + if ( (int)uplo == CblasUpper ) + printf("UPLO = CblasUpper "); + else if ( (int)uplo == CblasLower ) + printf("UPLO = CblasLower "); + } else if ( strcmp( str1, "diag" ) == 0 ) { + diag = va_arg( ap, CBLAS_DIAG * ); + if ( (int)diag == CblasUnit ) + printf("DIAG=CblasUnit "); + else if ( (int)diag == CblasNonUnit ) + printf("DIAG = CblasNonUnit "); + } else if ( ( strcmp( str1, "trans" ) == 0 ) || + ( strcmp( str1, "transa" ) == 0 ) || + ( strcmp( str1, "transb" ) == 0 ) ) { + trans = va_arg( ap, CBLAS_TRANSPOSE * ); + if ( (int)trans == CblasNoTrans ) + printf("%s = CblasNoTrans ", str); + else if ( (int)trans == CblasTrans ) + printf("%s = CblasTrans ", str); + else if ( (int)trans == CblasConjTrans ) + printf("%s = CblasConjTrans ", str); + } + } while ( (str = strtok( NULL, seps )) != NULL ); + va_end( ap ); + return; +} /* PrintParameters */ + +void PrintVectorI(MKL_INT n, MKL_INT *x, char *name) +{ + MKL_INT i; + + printf("\n VECTOR %s\n ", name); + for (i = 0; i < n; i++) + printf(" "INT_FORMAT, *(x+i)); + return; +} /* PrintVectorI */ + +void PrintVectorS(int flag, MKL_INT n, float *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("%6.2f ", *(x+i)); + return; +} /* PrintVectorS */ + +void PrintVectorD(int flag, MKL_INT n, double *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("%8.3f ", *(x+i)); + return; +} /* PrintVectorD */ + +void PrintVectorC(int flag, MKL_INT n, MKL_Complex8 *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag); + return; +} /* PrintVectorC */ + +void PrintVectorZ(int flag, MKL_INT n, MKL_Complex16 *x, MKL_INT incx, char *name) +{ + MKL_INT i; + + switch(flag) { + case 0: + printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ", + name, name, incx); + break; + case 1: + printf("\n VECTOR %s\n ", name); + break; + default: + break; + } /* switch */ + for (i = 0; i < (1+(n-1)*abs(incx)); i++) + printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag); + return; +} /* PrintVectorZ */ + +void PrintArrayS(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, float *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + float *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("%6.2f ", *(addr+j)); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < (*m); i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("%6.2f ", *(addr+j*(*lda))); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayS */ + +void PrintArrayD(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, double *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + double *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, + name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("%8.3f ", *(addr+j)); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("%8.3f ", *(addr+j*(*lda))); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayD */ + +void PrintArrayC(CBLAS_ORDER *order, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_Complex8 *a, + MKL_INT *lda, char *name) +{ + MKL_INT i, j; + MKL_Complex8 *addr; + + switch(flag1) { + case 0: + printf("\n ARRAY %s LD%s=" INT_FORMAT, + name, name, *lda); + break; + case 1: + printf("\n ARRAY %s", name); + break; + default: + break; + } /* switch */ + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag); + } /* for */ + } /* if */ + } else if(*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayC */ + +void PrintArrayZ(CBLAS_ORDER *order, int flag1, int flag2, + MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda, char *name) +{ + MKL_INT i, j; + MKL_Complex16 *addr; + + if (flag1 == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda); + else + printf("\n ARRAY %s", name); + + if (*order == CblasRowMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i*(*lda); + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j)->real, + (addr+j)->imag); + } /* for */ + } /* if */ + } else if (*order == CblasColMajor) { + if (flag2 == GENERAL_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == UPPER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j=0; j < i; j++) + printf(" "); + for (j = i; j < *n; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } else if (flag2 == LOWER_MATRIX) { + for (i = 0; i < *m; i++) { + printf("\n "); + addr = a + i; + for (j = 0; j <= i; j++) + printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real, + (addr+j*(*lda))->imag); + } /* for */ + } /* if */ + } /* if */ + return; +} /* PrintArrayZ */ + +void PrintBandArrayS(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + float *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + float *addr, *addr1; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("%6.2f ", *(addr1+j+l*lda) ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + } + } /* if */ + return; +} /* PrintBandArrayS */ + +void PrintBandArrayD(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + double *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + double *addr, *addr1; + MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("%6.2f ", *(addr1+l)); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("%6.2f ", *(addr1+j+l*lda) ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("%6.2f ", *(addr1+l*lda) ); + } + } /* if */ + return; +} /* PrintBandArrayD */ + +void PrintBandArrayC(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + MKL_Complex8 *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + MKL_Complex8 *addr, *addr1; + MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+j+l*lda)->real, (addr1+j+l*lda)->imag ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + } + } /* if */ + return; +} /* PrintBandArrayC */ + +void PrintBandArrayZ(CBLAS_ORDER *order, int flag, + MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n, + MKL_Complex16 *a, MKL_INT lda, char *name) +{ + MKL_INT i, j, l; + MKL_Complex16 *addr, *addr1; + MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows; + + if (flag == 0) + printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT, + name, name, lda, kl, ku); + else + printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda); + + if (*order == CblasRowMajor) { + for( i = 0; i < MIN( m, n ); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = ( i - kl <= 0 ) ? i : kl; + ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku; + j_start = kl - kl1; + j_end = j_start + kl1 + ku1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) { + printf("\n "); + addr = a + i*lda; + kl1 = n - i + kl; + j_start = ( kl > n ) ? kl - n : n - kl; + j_end = j_start + kl1 - 1; + addr1 = addr + j_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < j_end-j_start+1; l++ ) + printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag); + } + } else if (*order == CblasColMajor) { + i_start = (ku > n ) ? ku - n + 1 : 0; + ku_rows = (ku > n ) ? n - 1 : ku; + j_start = ku_rows; + for( i = 0; i < ku_rows; i++ ) { + printf("\n "); + j = j_start*lda; addr1 = a + i + i_start; + for( l=0; l < j_start; l++ ) + printf(" "); + for( l=0; l < n-ku_rows; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+j+l*lda)->real, (addr1+j+l*lda)->imag ); + j_start--; + } + + j_end = MIN(m,n); + addr1 = a + ku; + printf("\n "); + for( l=0; l < j_end; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + + kl_rows = ( kl <= m-1 ) ? kl : m - 1; + for ( i = 1; i < kl_rows+1; i++ ) { + printf("\n "); + kl1 = ( i+j_end <= m ) ? j_end : m - i; + addr1 = a + ku + i; + for( l=0; l < kl1; l++ ) + printf("(%6.2f,%6.2f) ", + (addr1+l*lda)->real, (addr1+l*lda)->imag ); + } + } /* if */ + return; +} /* PrintBandArrayZ */ + diff --git a/spgemm/mkl_xphi/commonmm.h b/spgemm/mkl_xphi/commonmm.h new file mode 100644 index 0000000000000000000000000000000000000000..7ccafda803dcfe24b76a78c37a7d27fdae3a2013 --- /dev/null +++ b/spgemm/mkl_xphi/commonmm.h @@ -0,0 +1,53 @@ +#ifndef __COMMONMM_H__ +#define __COMMONMM_H_ + +#define FIELD_LENGTH 128 +//char filename[FIELD_LENGTH]; + +enum spmv_target { use_cpu, use_mkl, use_mic, use_mkl_mic, use_mic_rowcyclic }; +char *target_str[] = { "CPU", "MKL", "MIC", "MKL_MIC", "MIC_ROWCYCLIC" }; +enum enum_datastructure { DS_ICSR, DS_CSR }; +char *str_datastructure[] = { "ICSR", "CSR" }; +char *str_algorithm[] = { "OUTER", "INNER" }; + +__declspec(target(mic)) int datastructure; + +typedef int csi; +typedef double csv; +__declspec(target(mic)) csv zero = 0.0; + +__declspec(target(mic)) int use_sparse_c; +__declspec(target(mic)) int size_dacc; +__declspec(target(mic)) csv* dacc; + + + + +typedef struct csr_t { + csi m; + csi n; + csi nzmax; + csi nr; + csi* r; + csi* p; + csi* j; + csv* x; + int numInMats; // number of input matrices stored in this struct + int numRows; + int nnonempty_rows; + int numCols; + int totalnnz; + // int totalnnzmax; + int* nItems; // nnz of each matrix + int* pirow; + int size_pirow; + int* pnr; + int* h_rowDelimiters; + int size_h_rowDelimiters; + int* h_cols; + csv* h_val; + char filename[FIELD_LENGTH]; +} csr; + + +#endif diff --git a/spgemm/mkl_xphi/example_read.c b/spgemm/mkl_xphi/example_read.c new file mode 100644 index 0000000000000000000000000000000000000000..af0f6e9c08b47a3e7b3b5763f547b8522415356c --- /dev/null +++ b/spgemm/mkl_xphi/example_read.c @@ -0,0 +1,104 @@ +/* +* Matrix Market I/O example program +* +* Read a real (non-complex) sparse matrix from a Matrix Market (v. 2.0) file. +* and copies it to stdout. This porgram does nothing useful, but +* illustrates common usage of the Matrix Matrix I/O routines. +* (See http://math.nist.gov/MatrixMarket for details.) +* +* Usage: a.out [filename] > output +* +* +* NOTES: +* +* 1) Matrix Market files are always 1-based, i.e. the index of the first +* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE +* OFFSETS ACCORDINGLY offsets accordingly when reading and writing +* to files. +* +* 2) ANSI C requires one to use the "l" format modifier when reading +* double precision floating point numbers in scanf() and +* its variants. For example, use "%lf", "%lg", or "%le" +* when reading doubles, otherwise errors will occur. +*/ + +#include +#include +#include "mmio.h" + +int main(int argc, char *argv[]) +{ + int ret_code; + MM_typecode matcode; + FILE *f; + int M, N, nz; + int i, *I, *J; + double *val; + + if (argc < 2) + { + fprintf(stderr, "Usage: %s [martix-market-filename]\n", argv[0]); + exit(1); + } + else + { + if ((f = fopen(argv[1], "r")) == NULL) + exit(1); + } + + if (mm_read_banner(f, &matcode) != 0) + { + printf("Could not process Matrix Market banner.\n"); + exit(1); + } + + + /* This is how one can screen matrix types if their application */ + /* only supports a subset of the Matrix Market data types. */ + + if (mm_is_complex(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode) ) + { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + + /* find out size of sparse matrix .... */ + + if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) + exit(1); + + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i +#include +#include "mmio.h" + +#define nz 4 +#define M 10 +#define N 10 + +int main() +{ + MM_typecode matcode; + int I[nz] = { 0, 4, 2, 8 }; + int J[nz] = { 3, 8, 7, 5 }; + double val[nz] = {1.1, 2.2, 3.2, 4.4}; + int i; + + mm_initialize_typecode(&matcode); + mm_set_matrix(&matcode); + mm_set_coordinate(&matcode); + mm_set_real(&matcode); + + mm_write_banner(stdout, matcode); + mm_write_mtx_crd_size(stdout, M, N, nz); + + /* NOTE: matrix market files use 1-based indices, i.e. first element + of a vector has index 1, not 0. */ + + for (i=0; i + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#define COMMENTS ':' +#define MAX_STRING_LEN 512 + +#define MIN(a,b) (((a) > (b)) ? (b) : (a)) +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) + + +#define GENERAL_MATRIX 0 +#define UPPER_MATRIX 1 +#define LOWER_MATRIX -1 + +#define FULLPRINT 0 +#define SHORTPRINT 1 + +#ifdef MKL_ILP64 + #define INT_FORMAT "%lld" +#else + #define INT_FORMAT "%d" +#endif + +/* + * =========================================== + * Prototypes for example program functions + * =========================================== + */ + +int GetIntegerParameters(FILE*, ...); +int GetScalarsD(FILE*, ...); +int GetScalarsS(FILE*, ...); +int GetScalarsC(FILE*, ...); +int GetScalarsZ(FILE*, ...); +int GetCblasCharParameters(FILE *in_file, ...); +MKL_INT MaxValue(MKL_INT, MKL_INT*); +MKL_INT GetVectorI(FILE*, MKL_INT, MKL_INT*); +MKL_INT GetVectorS(FILE*, MKL_INT, float*, MKL_INT); +MKL_INT GetVectorC(FILE*, MKL_INT, MKL_Complex8*, MKL_INT); +MKL_INT GetVectorD(FILE*, MKL_INT, double*, MKL_INT); +MKL_INT GetVectorZ(FILE*, MKL_INT, MKL_Complex16*, MKL_INT); +MKL_INT GetArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, float*, MKL_INT*); +MKL_INT GetArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, double*, MKL_INT*); +MKL_INT GetArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*); +MKL_INT GetArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*); +MKL_INT GetBandArrayS(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT); +MKL_INT GetBandArrayD(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT); +MKL_INT GetBandArrayC(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT); +MKL_INT GetBandArrayZ(FILE*, CBLAS_ORDER*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT); +MKL_INT GetValuesI(FILE *, MKL_INT *, MKL_INT, MKL_INT); +MKL_INT GetValuesC(FILE *, MKL_Complex8*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesZ(FILE *, MKL_Complex16*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesD(FILE *, double*, MKL_INT, MKL_INT, MKL_INT); +MKL_INT GetValuesS(FILE *, float*, MKL_INT, MKL_INT, MKL_INT); +void PrintParameters( char*, ... ); +void PrintTrans(MKL_INT, CBLAS_TRANSPOSE*, CBLAS_TRANSPOSE*); +void PrintOrder(CBLAS_ORDER*); +void PrintVectorI(MKL_INT, MKL_INT*, char*); +void PrintVectorS(int, MKL_INT, float*, MKL_INT, char*); +void PrintVectorC(int, MKL_INT, MKL_Complex8*, MKL_INT, char*); +void PrintVectorD(int, MKL_INT, double*, MKL_INT, char*); +void PrintVectorZ(int, MKL_INT, MKL_Complex16*, MKL_INT, char*); +void PrintArrayS(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, float*, MKL_INT*, char*); +void PrintArrayD(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, double*, MKL_INT*, char*); +void PrintArrayC(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*, char*); +void PrintArrayZ(CBLAS_ORDER*, int, int, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*, char*); +void PrintBandArrayS(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT, char*); +void PrintBandArrayD(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT, char*); +void PrintBandArrayC(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT, char*); +void PrintBandArrayZ(CBLAS_ORDER*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT, char*); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _MKL_EXAMPLE_H_ */ diff --git a/spgemm/mkl_xphi/mklspmm.c b/spgemm/mkl_xphi/mklspmm.c new file mode 100644 index 0000000000000000000000000000000000000000..1b78d7f7d8e836042204498e5e90cd1c36dcb485 --- /dev/null +++ b/spgemm/mkl_xphi/mklspmm.c @@ -0,0 +1,734 @@ +/*includes {{{*/ +#include +#include +#include +#include "mkl.h" +#include "mkl_types.h" +#include "mkl_spblas.h" + +#include "commonmm.h" +#include "mmio.h" +/*}}}*/ +/* global varibles and defines {{{*/ +int option_host; +#define OPTION_HOST_CPU -1 +#define OPTION_HOST_MIC 0 +int micdev; +__declspec(target(mic)) int max_threads = 0; +__declspec(target(mic)) int nworker = 0; +__declspec(target(mic)) int num_repeat; +__declspec(target(mic)) int option_print_matrices = 0; +#define OPTION_NOPRINT_MATRICES 0 +#define OPTION_PRINT_MATRICES 1 +__declspec(target(mic)) int option_printfile_matrices = 0; +#define OPTION_NOPRINTFILE_MATRICES 0 +#define OPTION_PRINTFILE_MATRICES 1 +__declspec(target(mic)) char transa = 'n'; +__declspec(target(mic)) double time_mic_numeric_mm; +__declspec(target(mic)) double time_mic_symbolic_mm; +/*}}}*/ +/* method declarations {{{*/ + int +main(int argc, char* argv[]); + __declspec(target(mic)) void +printmm_one(int m, double* Aval, int* AJ, int* AI); + __declspec(target(mic)) void +printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI); + void +printmm_zero(int m, double* Aval, int* AJ, int* AI); +/*}}}*/ + +/*csr util{{{*/ +//#include "../../csr/sspmxm.h" +#define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) + +csr *csr_spfree(csr *A); +/* free workspace and return a sparse matrix result */ +csr *csr_done(csr *C, void *w, void *x, csi ok) +{ +//#ifdef KADEBUG +// printf("%d:%s:%s():%d, %s(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, +// __LINE__, C->name, C->m, C->n, C->nzmax); +//#endif +// cs_free(w); /* free workspace */ +// cs_free(x); + return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ +} + + +/* wrapper for free */ +void *cs_free(void *p) +{ + if (p) + free(p); /* free p if it is not already NULL */ + return(NULL); /* return NULL to simplify the use of cs_free */ +} +/* wrapper for realloc */ +void *csr_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("%d:reallocation failed, pnew is NULL\n", __LINE__); + } +//printf("%s:%d: n=%d ok=%d\n", __FUNCTION__, __LINE__, n, *ok); + return((*ok) ? pnew : p); /* return original p if failure */ +} + +/* wrapper for realloc */ +void *cs_realloc(void *p, csi n, size_t size, csi *ok) +{ + void *pnew = NULL; + pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ + *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ + if (pnew == NULL) { + printf("reallocation failed\n"); + } + return((*ok) ? pnew : p); /* return original p if failure */ +} + + +/* change the max # of entries sparse matrix */ +csi csr_sprealloc(csr *A, csi nzmax) +{ +//printf("%d: nzmax=%d\n", __LINE__, nzmax); + csi ok, oki = 0, okj = 1, okx = 1; + if (!A) + return(0); + if (nzmax <= 0) + nzmax = A->p[A->m]; + A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); + if (A->x) + A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + ok = (oki && okj && okx); +//printf("%d: ok=%d oki=%d okj=%d okx=%d\n", __LINE__, ok, oki, okj, okx); + +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + if (ok) + A->nzmax = nzmax; +//printf("%s:%d: A->nzmax=%d nzmax=%d\n", __FUNCTION__, __LINE__, A->nzmax, nzmax); + return(ok); +} + +/* free a sparse matrix */ +csr *csr_spfree(csr *A) +{ + if (!A) + return(NULL); /* do nothing if A already NULL */ + cs_free(A->p); + A->p = NULL; + cs_free(A->j); + A->j = NULL; + cs_free(A->x); + A->x = NULL; + cs_free(A->r); + A->r = NULL; +// cs_free(A->name); + cs_free(A); /* free the cs struct and return NULL */ + return NULL; +} +/* allocate a sparse matrix (triplet form or compressed-ROW form) */ +csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) // floatType f is to suppress compile error +{ +//printf("m=%d n=%d nzmax=%d\n", m, n, nzmax); + csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ +// A->name = calloc(MTX_NAME_LEN, sizeof(char)); +// sprintf(A->name, "N/A"); + if (!A) { + perror("sparse allocation failed"); + return(NULL); /* out of memory */ + } + A->m = m; /* define dimensions and nzmax */ + A->n = n; + A->nzmax = nzmax = CS_MAX(nzmax, 0); +//printf("A m=%d n=%d nzmax=%d\n", A->m, A->n, A->nzmax); + A->nr = 0; // number of nonzero rows + A->p = (csi*)calloc(m + 2, sizeof(csi)); + A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); + A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); + return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); +}/*}}}*/ +/*csr_multiply{{{*/ +/* C = A*B +If nonzero counts of matrices are zero or number of rows/cols is zero, it is advised not to call this function. +*/ +// run 8 CUT 0.1 NNZ MTX ~/matrices/nlpkkt200.mtx ~/matrices/nlpkkt200.mtx tmp stdout MNAC SERIAL +csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) +{ + csv tf = 0; + csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, + bnz, values = 1; + csv *Cx; + csr *C; + //printf("%s:%d\n", __FILE__, __LINE__); + if (An != Bm) + return(NULL); + //printf("%s:%d\n", __FILE__, __LINE__); + if (Anzmax == 0 || Bnzmax == 0) { + C = csr_spalloc(Am, Bn, 0, values, 0, tf); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + One of matrice is Zero! C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + return C; + } +// printf("%s:%d\n", __FILE__, __LINE__); + m = Am; + anz = Ap[Am]; + n = Bn; + bnz = Bp[Bm]; + for(i = 0; i < n; i++) xb[i] = 0; + //csi* xb = ka_calloc(n, sizeof(csi)); /* get workspace */ + for(i = 0; i < n; i++) + xb[i] = 0; + values = (Ax != NULL) && (Bx != NULL); + //csv* x = values ? ka_calloc(n, sizeof(csv)) : NULL; /* get workspace */ + csi tnz = (anz + bnz) * 2; +//printf("A->m=%d B->n=%d tnz=%d\n", A->m, B->n, tnz); + C = csr_spalloc(m, n, tnz, values, 0, tf); /* allocate result */ +// sprintf(C->name, "C=(%s)*(%s)", A->name, B->name); +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), \ + allocated C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ + if (!C || !xb || (values && !x)) + return (csr_done(C, xb, x, 0)); +// csi zero = 0; +// for(i = 0; i < n; i++) +// xb[i] = zero; + Cp = C->p; + //csi* oldj = C->j; +//printf("C->m=%d C->n=%d C->nzmax=%d C->j[0]=%d\n", C->m, C->n, C->nzmax, C->j[0]); + for (i = 0; i < m; i++) { +//C->j != oldj ? printf("Changed old=%u new=%u\n", oldj, C->j):0; + if ( ( (nz + n) > C->nzmax ) ) { + if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), CANNOT BE\ + enlargened C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, + __LINE__, A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + return (csr_done(C, xb, x, 0)); // out of memory + } else { +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), enlargened \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif */ + } +//printf("After expansion, C->nzmax=%d\n", C->nzmax); + } + Cj = C->j; + Cx = C->x; /* C->j and C->x may be reallocated */ + //printf("i=%d C->j[0]=%d\n", i, C->j[0]); + Cp[i] = nz; /* row i of C starts here */ + for (jp = Ap[i]; jp < Ap[i + 1]; jp++) { + j = Aj[jp]; + for (kp = Bp[j]; kp < Bp[j + 1]; kp++) { + k = Bj[kp]; /* B(i,j) is nonzero */ +//if(k > n || k < 0)printf("k=%d kp=%d n=%d: %d\n", k, kp, n,__LINE__); + if (xb[k] != i + 1) { + xb[k] = i + 1; /* i is new entry in column j */ + // if(nz > C->nzmax) { + // printf("%d: Error nz>nzmax %d>%d\n", __LINE__, nz, C->nzmax); + // } +//if(nz > C->nzmax || nz < 0)printf("nz: %d\n", __LINE__); + Cj[nz++] = k; /* add i to pattern of C(:,j) */ + + if (x) { + // C->j != oldj ? printf("B Changed old=%u new=%u\n", oldj, C->j):0; + x[k] = Ax[jp] * Bx[kp]; /* x(i) = beta*A(i,j) */ + // C->j != oldj ? printf("A Changed old=%u new=%u\n", oldj, C->j):0; + (*nummult)++; + } + } else if (x) { + x[k] += (Ax[jp] * Bx[kp]); /* i exists in C(:,j) already */ + (*nummult)++; + } + } + } + if (values) + for (p = Cp[i]; p < nz; p++) + Cx[p] = x[Cj[p]]; + } + +// printf("%s:%d nz=%d\n", __FILE__, __LINE__, nz); + Cp[m] = nz; /* finalize the last row of C */ + csr_sprealloc(C, 0); /* remove extra space from C */ +/*#ifdef KADEBUG + printf("%d:%s:%s():%d, A(%s,%lu,%lu,%lu), B(%s,%lu,%lu,%lu), trimmed \ + C(%lu,%lu,%lu)\n", me, __FILE__, __FUNCTION__, __LINE__, + A->name, A->m, A->n, A->nzmax, B->name, B->m, B->n, + B->nzmax, C->m, C->n, C->nzmax); +#endif*/ +// cs_free(xb); + xb = NULL; +// cs_free(x); + x = NULL; + return C;//(csr_done(C, xb, x, 1)); /* success; free workspace, return C */ +}/*}}}*/ + + + void +mkl_cpu_spmm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, /*{{{*/ + MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI) { + +MKL_INT* CJ = NULL; +double* Cval = NULL; +MKL_INT sort = 3; // sort everything +MKL_INT* CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); +MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr +MKL_INT ierr; +MKL_INT request = 1; // symbolic multiplication +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + +request = 2; //numeric multiplication +int Cnnz = CI[Am]-1; +int Cval_size = Cnnz + 1; +CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); +Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); +mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); +//printmm_one(Am, Cval, CJ, CI);//printf("Cnnz=%d\n", Cnnz); +*pCval = Cval; *pCJ = CJ; *pCI = CI; //printf("Cnnz_host:%d\n", Cnnz); +} /* ENDOF mkl_cpu_spmm }}}*/ + void +mkl_mic_spmm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI,/*{{{*/ + MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI) { + + int i; + #pragma offload target(mic:micdev) inout(max_threads) + { + max_threads = mkl_get_max_threads(); + mkl_set_num_threads(nworker); + } // printf("MIC started \n"); fflush(stdout); + #pragma offload target(mic:micdev) \ + in(transa) \ + in(Am) \ + in(An) \ + in(Bn) \ + in(Aval:length(Annz) free_if(0)) \ + in(AI:length(Am+1) free_if(0)) \ + in(AJ:length(Annz) free_if(0)) \ + in(Bval:length(Bnnz) free_if(0)) \ + in(BI:length(An+1) free_if(0)) \ + in(BJ:length(Bnnz) free_if(0)) + {} + + + //void mkl_dcsrmultcsr (char *transa, MKL_INT *job, MKL_INT *sort, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *a, MKL_INT *ja, MKL_INT *ia, double *b, MKL_INT *jb, MKL_INT *ib, double *c, MKL_INT *jc, MKL_INT *ic, MKL_INT *nnzmax, MKL_INT *ierr); + + +// double s_initial = dsecnd(); +// double s_elapsed = dsecnd() - s_initial; // seconds + + MKL_INT Cnnz_host = -1; + MKL_INT* CI = NULL; + MKL_INT* CJ = NULL; + double* Cval = NULL; + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 2; //numeric multiplication + MKL_INT sort = 3; // sort everything + time_mic_symbolic_mm = 0.0; + #pragma offload target(mic:micdev) in(i) in(ierr) in(nnzmax) in(request) in(sort) in(transa) in(Am) in(An) in(Bn) \ + out(time_mic_symbolic_mm) out(Cnnz_host)\ + in(Aval:length(Annz) alloc_if(0) free_if(0)) \ + in(AI:length(Am+1) alloc_if(0) free_if(0)) \ + in(AJ:length(Annz) alloc_if(0) free_if(0)) \ + in(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + in(BI:length(An+1) alloc_if(0) free_if(0)) \ + in(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + CI = (MKL_INT*)mkl_malloc( (Am+2) * sizeof( MKL_INT ), 64 ); + MKL_INT nnzmax = 0; // nnzmax is zero in case of symbolic&numeric usage of mkl_?csrmultcsr + MKL_INT ierr; + MKL_INT request = 1; // symbolic multiplication + //MKL_INT sort = 7; // do not sort anything + + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + double s_initial = dsecnd(); + for(i = 0; i < num_repeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + time_mic_symbolic_mm = (dsecnd() - s_initial) / num_repeat; // seconds + + request = 2; //numeric multiplication + int Cnnz = CI[Am]-1; + int Cval_size = Cnnz + 1; Cnnz_host = Cnnz; + CJ = (MKL_INT*)mkl_malloc( Cval_size * sizeof( MKL_INT ), 64 ); + Cval = (double*)mkl_malloc( Cval_size * sizeof( double ), 64 ); + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + printmm_one(Am, Cval, CJ, CI);//printf("Cnnz=%d\n", Cnnz); + sort = 7; // do not sort anything + request = 2; // numeric multiplication + }//printf("Cnnz_mic: %d\n", Cnnz_host);exit(-1); + time_mic_numeric_mm = 0.0; + #pragma offload target(mic:micdev) nocopy(request) nocopy(sort) nocopy(i) nocopy(ierr) nocopy(nnzmax) nocopy(transa) nocopy(Am) nocopy(An) nocopy(Bn) \ + out(time_mic_numeric_mm) \ + nocopy(Aval:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(AI:length(Am+1) alloc_if(0) free_if(0)) \ + nocopy(AJ:length(Annz) alloc_if(0) free_if(0)) \ + nocopy(Bval:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(BI:length(An+1) alloc_if(0) free_if(0)) \ + nocopy(BJ:length(Bnnz) alloc_if(0) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + //sort = 7 + //request = 1; 2'ye gore sure iki katina cikiyor + //request = 0; 2'ye gore sure uc katina cikiyor + + //request = 2 + //sort = 3; 7'ye gore %30 daha yavas + //printf("sort:%d request:%d\n", sort, request); // prints sort:7 request:2 + for(i = 0; i < 10; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + double s_initial = dsecnd(); + for(i = 0; i < num_repeat; i++) { + mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &An, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); + } + time_mic_numeric_mm = (dsecnd() - s_initial) / num_repeat; // seconds + } + if (option_printfile_matrices == OPTION_PRINTFILE_MATRICES) { + int nelm_CI = Am + 2; + int nelm_CJ = Cnnz_host + 1; + int nelm_Cval = Cnnz_host + 1; + __declspec(target(mic)) MKL_INT* CI_host = (MKL_INT*)mkl_malloc( (nelm_CI) * sizeof( MKL_INT ), 64 ); + __declspec(target(mic)) MKL_INT* CJ_host = (MKL_INT*)mkl_malloc( (nelm_CJ) * sizeof( MKL_INT ), 64 ); + __declspec(target(mic)) double* Cval_host = (double*)mkl_malloc( (nelm_Cval) * sizeof( double ), 64 ); + #pragma offload target(mic:micdev) in(nelm_CI)\ + inout(CI_host:length(nelm_CI) alloc_if(1) free_if(0)) \ + inout(CJ_host:length(nelm_CJ) alloc_if(1) free_if(0)) \ + inout(Cval_host:length(nelm_Cval) alloc_if(1) free_if(0)) \ + nocopy(CI: alloc_if(0) free_if(0)) \ + nocopy(CJ: alloc_if(0) free_if(0)) \ + nocopy(Cval: alloc_if(0) free_if(0)) + { + int i; + for(i = 0; i < nelm_CI; i++) CI_host[i] = CI[i]; + for(i = 0; i < nelm_CJ; i++) {CJ_host[i] = CJ[i]; Cval_host[i] = Cval[i];} + } + *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; //printf("Cnnz_host:%d\n", Cnnz_host); + } +} /* ENDOF mkl_mic_spmm }}}*/ + void +read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/ + +int i, M, N, nz, *I, *J; +double* val; +int ret_code; +MM_typecode matcode; +FILE* f; +if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);} +/* READ MATRIX */ +if (mm_read_banner(f, &matcode) != 0) { + printf("Could not process Matrix Market banner.\n"); exit(1); +} +/* This is how one can screen matrix types if their application */ +/* only supports a subset of the Matrix Market data types. */ +if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1); +} +/* find out size of sparse matrix .... */ +if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); +/* reseve memory for matrices */ +I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); +val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); +*pI = I; +*pJ = J; +*pval = val; +/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ +/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ +/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ +int realnnz = 0; +for (i=0; imaxdiff){ + printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]); + } + } + printf("\n"); +*///}}} diff --git a/spgemm/mkl_xphi/mklspmv.c b/spgemm/mkl_xphi/mklspmv.c new file mode 100644 index 0000000000000000000000000000000000000000..0bb617bd0c13e5ea9f2f09e6f4c39804335fceb6 --- /dev/null +++ b/spgemm/mkl_xphi/mklspmv.c @@ -0,0 +1,261 @@ +#include +#include +#include +#include "mkl.h" +#include "mkl_types.h" +#include "mkl_spblas.h" + +#include "mmio.h" + +// mkl_?csrgemv y := A*x one based +// void mkl_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y); + +// mkl_cspblas_?csrgemv y := A*x zero based +// void mkl_cspblas_dcsrgemv (char *transa, MKL_INT *m, double *a, MKL_INT *ia, MKL_INT *ja, double *x, double *y); + + +// mkl_?csrmv y := alpha*A*x + beta*y +// void mkl_dcsrmv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y); + + + + +// trans='n' +int main(int argc, char* argv[]) { + int ret_code; + MM_typecode matcode; + FILE *f; + int M, N, nz; + int i, *I, *J; + double *val; + + if (argc != 5) + { + fprintf(stderr, "Usage: %s [martix-market-filename] [nworker] [num repeat] [null|row/col permutation file]\n", argv[0]); + exit(1); + } + else + { + if ((f = fopen(argv[1], "r")) == NULL) + exit(1); + } + + int nworker = atoi(argv[2]); + int num_repeat = atoi(argv[3]); + char* fnrcperm = argv[4]; + + if (mm_read_banner(f, &matcode) != 0) + { + printf("Could not process Matrix Market banner.\n"); + exit(1); + } + + + /* This is how one can screen matrix types if their application */ + /* only supports a subset of the Matrix Market data types. */ + + if (mm_is_complex(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode) ) + { + printf("Sorry, this application does not support "); + printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + + /* find out size of sparse matrix .... */ + + if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) + exit(1); + + + /* reseve memory for matrices */ + + I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); + J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int)); + val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double)); + + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + int realnnz = 0; + + for (i=0; imaxdiff){ + printf("%g\t%g\t%g\n", diff, vecout[i], vecoutcpu[i]); + } + } + printf("\n"); + printf("%s %d %d %g %d %s\n", argv[1], nworker, max_threads, s_elapsed, num_repeat, fnrcperm); +// mkl_free(A); + + return 0; + +} + diff --git a/spgemm/mkl_xphi/mmio.c b/spgemm/mkl_xphi/mmio.c new file mode 100644 index 0000000000000000000000000000000000000000..c250ff2aed998fe65248537a8b19a359206187ce --- /dev/null +++ b/spgemm/mkl_xphi/mmio.c @@ -0,0 +1,511 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + + +#include +#include +#include +#include + +#include "mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *I, *J; + + if ((f = fopen(fname, "r")) == NULL) + return -1; + + + if (mm_read_banner(f, &matcode) != 0) + { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } + + + + if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", + mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *I_ = I; + *J_ = J; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i