Commit 978f7381 authored by petros.anastasiadis's avatar petros.anastasiadis
Browse files

Initial commit

parents
/****************************************************/
Basic helpfull functions used by some of the programs.
Function explanation and usage included in corresponding header(.h) files.
Modify at your own risk!
->input.c
20/05/2017: Completed
->util.c
06/09/2017: Completed
->matrix_op.c
13/09/2017: Completed
14/09/2017: Modified for array transpose
->gpu_util.c
14/09/2017: Completed
->timer.c
06/09/2017: Completed
19/09/2017: Modified for better precision
/****************************************************/
/*
* Helpfull functions for SpMV multiplication
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "dmv.h"
void dmv_serial(double **a, const double *x, double *y,
size_t n, size_t m)
{
size_t i, j;
double yi;
for (i = 0; i < n; ++i) {
yi = 0.0 ;
for (j = 0; j < m; ++j) {
yi += a[i][j]*x[j];
}
y[i] = yi;
}
}
void dmv_omp(double **a, const double *x, double *y,
size_t n, size_t m)
{
size_t i, j;
#pragma omp parallel for private(i,j) shared(n,m,a,y) schedule(dynamic)
for (i = 0; i < n; ++i) {
register double _yi = 0;
for (j = 0; j < m; ++j) {
_yi += a[i][j]*x[j];
}
y[i] = _yi;
}
}
void dmv_csr(int * csrPtr, int *csrCol, double * csrVal, double *x, double *ys, int n)
{
int i, j;
for (i = 0; i < n; ++i) {
double yi = 0;
for (j = csrPtr[i]; j < csrPtr[i + 1]; j++) yi += csrVal[j] * x[csrCol[j]];
ys[i] = yi;
}
}
int vec_equals(const double *v1, const double *v2, size_t n, double eps)
{
size_t i,k=0;
for (i = 0; i < n; ++i) {
if (fabs(v1[i] - v2[i]) > eps) k++;
}
return k;
}
void vec_print(const double *v, size_t n)
{
size_t i;
for (i = 0; i < n; ++i)
printf("%f\n", v[i]);
}
/*
* Some GPU utility functions for SpMV multiplication
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
#include <cuda.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include "gpu_util.h"
const char *gpu_get_errmsg(cudaError_t err)
{
return cudaGetErrorString(err);
}
const char *gpu_get_last_errmsg()
{
return gpu_get_errmsg(cudaGetLastError());
}
void cudaCheckErrors(const char * msg)
{
cudaError_t __err = cudaGetLastError();
if (__err != cudaSuccess) {
printf("\nFatal error: %s (%s)\n", msg, cudaGetErrorString(__err));
exit(1);
}
}
void *gpu_alloc(size_t count)
{
void *ret;
if (cudaMalloc(&ret, count) != cudaSuccess) {
printf("Gpu alloc failed: %s\n", gpu_get_last_errmsg());
exit(1);
}
return ret;
}
void gpu_free(void *gpuptr)
{
cudaFree(gpuptr);
}
int copy_to_gpu(const void *host, void *gpu, size_t count)
{
if (cudaMemcpy(gpu, host, count, cudaMemcpyHostToDevice) != cudaSuccess){
printf("Copy to GPU failed: %s\n", gpu_get_last_errmsg());
exit(1);
}
return 1;
}
int copy_from_gpu(void *host, const void *gpu, size_t count)
{
if (cudaMemcpy(host, gpu, count, cudaMemcpyDeviceToHost) != cudaSuccess){
printf("Copy to Host failed: %s\n", gpu_get_last_errmsg());
exit(1);
}
return 1;
}
/*
* some GPU utility functions
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
* Use of Unified memory is advised, but not always optimal
*/
void gpu_free(void *gpuptr); /* Free GPU memory*/
void *gpu_alloc(size_t count); /* Allocate 'count' bytes in GPU memory (error safe) */
int copy_to_gpu(const void *host, void *gpu, size_t count); /* Copy 'count' bytes from host to gpu memory (error safe) */
int copy_from_gpu(void *host, const void *gpu, size_t count); /* Copy 'count' bytes from gpu to host memory (error safe) */
void cudaCheckErrors(const char * msg); /* Check GPU for errors */
const char *gpu_get_errmsg(cudaError_t err);
const char *gpu_get_last_errmsg();
/*
* Some basic functions for mtx reading and formating
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include "matrix_op.h"
#include "util.h"
#include "input.h"
int mtx_read(int ** I, int ** cooCol, double ** cooVal, int * n, int * m, int * n_z, char * name)
{
char c;
char *type, *format, *var_type, *symmetry, *string=NULL;
FILE *fp ;
size_t len=0;
if ((fp=fopen(name, "r"))==NULL) error("Invalid File");
else if ((strstr(name, "mtx"))==NULL) error("Invalid File Type (*.mtx)");
getline(&string, &len, fp);
strtok(string," ");
type = strtok(NULL," ");
format = strtok(NULL," ");
var_type = strtok(NULL," ");
symmetry = strtok(NULL,"\n");
//printf("type=%s, format=%s, var_type=%s, ", type, format, var_type);
if (strcmp(type,"matrix")){
printf("type=%s unsupported...terminating\n\n\n\n\n\n\n\n\n\n\n\n", type);
exit(1);
}
if (strcmp(format,"coordinate")){
printf("format=%s unsupported...terminating\n\n\n\n\n\n\n\n\n\n\n\n", format);
exit(1);
}
if (strcmp(var_type,"integer") && strcmp(var_type,"real")){
printf("Var_type=%s unsupported...terminating\n\n\n\n\n\n\n\n\n\n\n\n", var_type);
exit(1);
}
while((c=getc(fp))=='%') while( (c=getc(fp))!='\n') ;
ungetc(c, fp);
int k, lines = 0, sym_k=0;
fscanf(fp,"%d %d %d", n, m, &lines);
//printf("n=%d, m=%d, lines=%d, ", *n, *m, lines);
*n_z = 0;
if (!strcmp(symmetry,"symmetric")){
get_nz_symmetric(n_z, name);
//printf("symmetry=symmetric\n");
}
else if (!strcmp(symmetry,"general")) {
*n_z=lines;
//printf("symmetry=general\n");
}
else {
printf("Invalid symmetry value:%s\n", symmetry);
return 0;
}
//printf("n_z=%d\n", *n_z);
*I = (int *) malloc(*n_z * sizeof(int));
*cooCol = (int *) malloc(*n_z * sizeof(int));
*cooVal = (double *) malloc(*n_z * sizeof(double));
double dum;
if ( !*I || !*cooCol || !*cooVal ) return 0;
if (!strcmp(symmetry,"symmetric")){
for (k = 0; k < lines; k++) {
fscanf(fp,"%d %d %lf", &((*I)[sym_k]), &((*cooCol)[sym_k]), &dum);
(*cooVal)[sym_k]=(double) dum;
(*I)[sym_k]--;
(*cooCol)[sym_k]--;
sym_k++;
if ((*I)[sym_k-1] != (*cooCol)[sym_k-1]) {
(*I)[sym_k] = (*cooCol)[sym_k-1];
(*cooCol)[sym_k] = (*I)[sym_k-1];
(*cooVal)[sym_k] = (*cooVal)[sym_k-1];
sym_k++;
}
}
if (sym_k!=*n_z){
printf("Error in symmetric read: sym_k=%d n_z=%d\n", sym_k, *n_z);
return 0;
}
}
else if (!strcmp(symmetry,"general")) for (k = 0; k < lines; k++){
fscanf(fp,"%d %d %lf", &((*I)[k]), &((*cooCol)[k]), &dum);
(*cooVal)[k] = (double) dum;
(*I)[k]--;
(*cooCol)[k]--;
}
quickSort( *I, *cooCol, *cooVal, 0, *n_z-1);
fclose(fp);
return 1;
}
void get_nz_symmetric( int * n_z, char* name)
{
char c;
FILE *fp ;
if ((fp=fopen(name, "r"))==NULL){
printf("Problem in symmetric read pass\n");
exit(1);
}
while((c=getc(fp))=='%') while( (c=getc(fp))!='\n') ;
ungetc(c, fp);
int k, i, j, n, m, lines;
double x;
fscanf(fp,"%d %d %d", &n, &m, &lines);
for (k = 0; k < lines; k++){
fscanf(fp,"%d %d %lf", &i, &j, &x);
(*n_z)++;
if(i!=j) (*n_z)++;
}
}
void csr_transform(double ** A, int n, int m, int n_z, double *csrValA, int *csrRowPtrA, int *csrColIndA)
{
int i,j,k=0;
for (i = 0; i < n; i++){
csrRowPtrA[i]=k;
for (j = 0; j < m; j++){
if (A[i][j]!=0.0){
csrValA[k]=A[i][j];
csrColIndA[k]= j;
k++;
}
}
}
csrRowPtrA[i]=k;
if (k!=n_z) printf("Error at non zeroes: %d\n", k-n_z);
return;
}
void quickSort( int *a, int * b, double * c, int l, int r)
{
int j;
if( l < r )
{ // divide and conquer
j = partition( a, b, c, l, r);
quickSort( a, b, c, l, j-1);
quickSort( a, b, c, j+1, r);
}
}
int partition( int *a, int * b, double * c, int l, int r)
{
int pivot, i, j, t;
double t1;
pivot = a[l];
i = l; j = r+1;
while(1)
{
do ++i; while( a[i] <= pivot && i <= r );
do --j; while( a[j] > pivot );
if( i >= j ) break;
t = a[i]; a[i] = a[j]; a[j] = t;
t = b[i]; b[i] = b[j]; b[j] = t;
t1 = c[i]; c[i] = c[j]; c[j] = t1;
}
t = a[l]; a[l] = a[j]; a[j] = t;
t = b[l]; b[l] = b[j]; b[j] = t;
t1 = c[l]; c[l] = c[j]; c[j] = t1;
return j;
}
/*
* Some basic functions for mtx reading and formating
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
/* a quicksort implementation (required for sorted coo impementation) */
void quickSort( int *a, int * b, double * c, int l, int r);
int partition( int *a, int * b, double * c, int l, int r);
/* Read functions for sparce matrices (mtx format) */
void get_nz_symmetric( int * n_z, char* name);
int mtx_read(int ** I, int ** cooCol, double ** cooVal, int * n, int * m, int * n_z, char * name);
void csr_transform(double **, int, int, int, double *, int *, int *);
int read_mtx_coo(char *, int *, int * , double *, int *, int * , int * );
/*
*
* matrix_op.c -- Basic Matrix transform/split operations
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*
*/
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include "matrix_op.h"
void vec_init(double *v, size_t n, double val)
{
size_t i;
for (i = 0; i < n; ++i) {
v[i] = val;
}
}
void vec_init_rand(double *v, size_t n, double max)
{
srand48(42); // should only be called once
size_t i;
for (i = 0; i < n; ++i) {
v[i] = max*(double) drand48();
}
}
void vec_init_rand_p(double *v, size_t n, size_t np, double max)
{
srand48(42); // should only be called once
size_t i;
for (i = 0; i < n; ++i) {
v[i] = (double) drand48();
}
for (i = n; i < np; ++i) {
v[i] = 0.0;
}
}
void matrix_init_rand(double **v, size_t n, size_t m, double max)
{
srand48(42); // should only be called once
size_t i,j;
for (i = 0; i < n; ++i)
for (j = 0; j < m; ++j)
v[i][j] = max*(double) drand48();
}
void ser_matrix_init_rand(double *v, size_t n, size_t m, double max)
{
srand48(42); // should only be called once
size_t i,j;
for (i = 0; i < n; ++i)
for (j = 0; j < m; ++j)
v[i*m+j] = max*(double) drand48();
}
void ser_matrix_init_rand_p(double *v, size_t n, size_t m, size_t np, double max)
{
srand48(42); // should only be called once
size_t i,j;
for (i = 0; i < n; ++i)
for (j = 0; j < m; ++j)
v[i*m+j] = max*(double) drand48();
for (i = n*m; i < n*m+np; ++i) v[i] = 0.0;
}
void matrix_col_major(double *M, double *A, size_t n, size_t m)
{
size_t i,j;
for (i = 0; i < n; ++i)
for (j = 0; j < m; ++j)
A[j*n+i] = M[i*m+j];
}
void matrix_row_major(double **M, double *A, size_t n, size_t m)
{
size_t i,j;
for (i = 0; i < n; ++i)
for (j = 0; j < m; ++j)
A[i*n+j] = M[i][j];
}
void regenerate_matrix_coo(double **M, int *I, int *cooCol, double *cooVal, int n, int m, int n_z)
{
int i;
for (i = 0; i < n_z; ++i) M[I[i]][cooCol[i]] = cooVal[i];
}
/*
*
* matrix_op.h -- Basic Matrix transform/split operations
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*
*/
void vec_init(double *v, size_t n, double val); /* Initialize n vector to 'val' */
void vec_init_rand(double *v, size_t n, double max); /* Initialize n vector to random values between 0 and 'max' (Constant seed for error checking) */
void vec_init_rand_p(double *v, size_t n, size_t np, double max); /* Initialize n+np vector to random values between 0 and 'max' for n elems and 0.0 for padding */
void matrix_init_rand(double **v, size_t n, size_t m, double max); /* Initialize v[n][n] matrix to random values between 0 and 'max' */
void ser_matrix_init_rand(double *v, size_t n, size_t m, double max); /* Initialize v[n*m] matrix to random values between 0 and 'max' */
void ser_matrix_init_rand_p(double *v, size_t n, size_t m, size_t np, double max); /* Initialize v[n*m+np] matrix to random values between 0 and 'max' for n*m elems and 0.0 for padding */
void matrix_col_major(double *M, double *A, size_t n, size_t m); /* Transform row major M[n*m] to column major A[n*m] */
void matrix_row_major(double **M, double *A, size_t n, size_t m); /* Transform column major M[n][m] to column major A[n*m] */
void regenerate_matrix_coo(double **M, int * I, int * cooCol, double * cooVal, int n, int m, int n_z); /* Generate (sparse?) M[n][m] matrix from given COO format */
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <inttypes.h>
double csecond(void) {
struct timespec tms;
if (clock_gettime(CLOCK_REALTIME,&tms)) {
return (0.0);
}
/* seconds, multiplied with 1 million */
int64_t micros = tms.tv_sec * 1000000;
/* Add full microseconds */
micros += tms.tv_nsec/1000;
/* round up if necessary */
if (tms.tv_nsec % 1000 >= 500) {
++micros;
}
return( (double) micros /1000000.0) ;
}
double csecond(void); /* A function to calculate current time. Use: double x = csecond(); ... y = csecond(); time = y-x; */
/*
* util.c -- Some usefull functions for error checking
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "util.h"
void error(const char * msg)
{
perror(msg);
exit(1);
}
void check_result(double *test, double *orig, size_t n)
{
size_t i_fail = vec_equals(test, orig, n, 0.00001);
if (!i_fail) ; //printf("Checked, ");
else printf("FAILED %ld times", i_fail );
}
void report_results(double timer)
{
printf("t= %lf ms\n",1000.0/NR_ITER*timer);
}
void report_mpi_results(double comm_timer, double comp_timer)
{
printf("comp_t= %lf ms, comm_t= %lf ms\n",1000.0/NR_ITER*comp_timer, 1000.0*comm_timer);
}
int vec_equals(const double *v1, const double *v2, size_t n, double eps)
{
size_t i,k=0;
for (i = 0; i < n; ++i) {
if (fabs(v1[i] - v2[i]) > eps) k++;
}
return k;
}
/*
* util.h -- Some usefull functions for error checking
*
* Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr)
*/
#include "timer.h"
#define NR_ITER 100
void error(const char * msg); /* A function for error printing and exiting */
void report_results(double time); /* Print timer results */
void report_mpi_results(double comm_timer, double comp_timer);
void check_result(double *test, double *orig, size_t n); /* Check vector result */
int vec_equals(const double *v1, const double *v2, size_t n, double eps); /* Check vector v1[n], v2[n] equality with 'eps' precision */
CC=g++
ICC =icc
DEBUG ?= 1
CFLAGS=-O3 -lm -Wall -mavx -march=ivybridge -mtune=ivybridge -fopenmp
#CFLAGS=-O3 -lm -Wall -mavx2 -mfma -march=haswell -mtune=haswell