/*
 * A Serial implementation of the Matrix-Vector multiplication
 * 
 * Author: Petros Anastasiadis(panastas@cslab.ece.ntua.gr) 
 */

#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include "/users/guest/petyros/Training/External_Functions/matrix_op.h"
#include "/users/guest/petyros/Training/External_Functions/util.h"
#include "/users/guest/petyros/Training/External_Functions/input.h"

int main(int argc, char **argv)
{
	/* Initializations */
	int i, j, k, n, m;
	int *I, *cooCol, n_z, sparse=0;
	double *cooVal, timer;


	/* File Input to COO */
	if (argc < 2) error("Too few Arguments");
	else if ( argc == 2) /* ./Program Input_File */
	{
		if(!mtx_read(&I, &cooCol, &cooVal, &n, &m, &n_z, argv[1])) error("input and/or COO convertion failed");
		sparse = 1;
	}
	else if ( argc == 3) { /*./Program N M */
		n = atoi(argv[1]);
		m = atoi(argv[2]);		
	}
	else error("Too many Arguments");

	double *x 			= (double *) malloc(m * sizeof(*x));
	double *y	= (double *) malloc(n * sizeof(*y));
	double *M 			= (double *) malloc(n * m * sizeof(*M));

	#pragma omp parallel for schedule(static) /* Initialize data for each thread in corresponding socket with first-touch policy */
	for( i=0 ; i<n ; ++i){
		for ( j=0 ; j<m ; ++j) M[i*m+j]=0.0;	
		//printf( "Initialize data Thread=%d i=%d\n", omp_get_thread_num(), i);
	}


	if( !y || !x || !M ) error("memory allocation failed");

	/* Initialize matrices */
	if (sparse) {
		; //regenerate_matrix_coo(M, I, cooCol, cooVal, n, m, n_z); /* Sparse matrices read from .mtx format */
	}
	else ser_matrix_init_rand(M,n,m,1.0); /* Normal matrices generated randomly */

	/* Initialize vectors */
	vec_init_rand(x, m, 1.0);
	vec_init(y, n, 0.0);
	

	/* OpenMP Kernel */
	printf("OpenMP Version(N=%d, M=%d, Threads=%s): ", n, m, getenv("OMP_NUM_THREADS"));
	timer = csecond();
	for (i = 0; i < NR_ITER; ++i){
		register double	yi = 0;
		#pragma omp parallel for private(j,yi) shared(n,m,M,y) schedule(static) /* Each thread computes n/thread_num contiguous elements of y */
		for (k = 0; k < n; ++k) {
			//printf( "Compute Thread=%d i=%d\n", omp_get_thread_num(), k);
        	yi = 0.0;
        	for (j = 0; j < m; ++j) yi += M[k*m+j]*x[j];
        	y[k] = yi;
    	}
	}
	timer = csecond() - timer;
#ifdef _DEBUG_ /* Output y in a file for debug purposes */
        		FILE * fp;
				char * filename = "/users/guest/petyros/Training/Outputs/Debug/OpenMP_aff.out" ;
				if(( fp = fopen( filename, "w")) == NULL)  error("Output file creation failed\n");
        		for (k = 0; k < n; ++k) fprintf(fp, "%lf ", y[k]) ;
				fclose(fp) ;
#endif
	report_results(timer);

	return 0;
}

	
