Commit 67c95221 authored by petros.anastasiadis's avatar petros.anastasiadis

Added presentation + guide

parent 0ef3b512
......@@ -3,3 +3,4 @@
**/*.out
**/*.err
**/*.debug
**/*.pptx
......@@ -12,10 +12,10 @@ CPP=g++
MPICC=mpicc
NVCC=nvcc
CFLAGS=-O3 -lm -Wall -mavx -march=ivybridge -mtune=ivybridge -lrt
#CPU_COMPILE= $(CC) $(CFLAGS) -I$(EXT_DIR)
#CPU_COMPILE_OMP = $(CPU_COMPILE) -fopenmp
#MPI_COMPILE= $(MPICC) -I$(EXT_DIR)
#MPI_OMP_COMPILE= $(MPI_COMPILE) -fopenmp
CPU_COMPILE= $(CC) $(CFLAGS) -I$(EXT_DIR)
CPU_COMPILE_OMP = $(CPU_COMPILE) -fopenmp
MPI_COMPILE= $(MPICC) -I$(EXT_DIR)
MPI_OMP_COMPILE= $(MPI_COMPILE) -fopenmp
GPU_MPI_CXX = $(NVCC) -L $(I_MPI_ROOT)/lib64 -lmpi -ccbin mpiicc
LDFLAGS ?=-L $(CUDA_PREFIX)/lib64 -lcudart -lcublas -lcusparse -lm -lrt
GPU_COMPILE = $(NVCC) -I $(CUDA_PREFIX)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS)
......@@ -25,11 +25,11 @@ CPU_COMPILE_CUDA = $(CPP) $(CFLAGS) -I$(EXT_DIR) $(LDFLAGS)
#compile with icc
ICC =icc
MPICC=mpiicc
ICFLAGS=-O3 -Wall -axCORE-AVX2,CORE-AVX-I -lrt -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm
CPU_COMPILE= $(ICC) $(ICFLAGS) -I$(EXT_DIR)
CPU_COMPILE_OMP = $(CPU_COMPILE) -qopenmp
MPI_COMPILE= $(MPICC) $(ICFLAGS) -I$(EXT_DIR)
MPI_OMP_COMPILE= $(MPI_COMPILE) -mt_mpi -qopenmp
ICFLAGS=-O3 -Wall -axCORE-AVX2,CORE-AVX-I
#CPU_COMPILE= $(ICC) $(ICFLAGS) -I$(EXT_DIR)
#CPU_COMPILE_OMP = $(CPU_COMPILE) -qopenmp
#MPI_COMPILE= $(MPICC) $(ICFLAGS) -I$(EXT_DIR)
#MPI_OMP_COMPILE= $(MPI_COMPILE) -mt_mpi -qopenmp
ifeq ($(DEBUG), 1)
CPU_COMPILE += -D_DEBUG_
......
......@@ -7,7 +7,7 @@ PROGRAMS= Serial.exe
all: $(PROGRAMS)
Serial.exe: $(OBJECTS) $(SOURCE)
$(CPU_COMPILE) $(SOURCE) -o $@ $(OBJECTS)
$(CPU_COMPILE) $(SOURCE) -o $@ $(OBJECTS) -I$(BLASROOT)/include -L$(BLASROOT)/lib -lopenblas -lpthread -lrt -lm -lgfortran
%.o: $(EXT_DIR)%.c
$(CPU_COMPILE_OBJ) -o $@ $<
......
......@@ -12,8 +12,7 @@
#include "matrix_op.h"
#include "util.h"
#include "input.h"
#include "mkl.h"
#include "mkl_blas.h"
//#include <cblas.h>
int main(int argc, char **argv)
{
......
......@@ -14,10 +14,10 @@
#SBATCH --ntasks=1 # Number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # Number of nodes requested
#SBATCH --ntasks-per-node=1 # Tasks per node
#SBATCH --cpus-per-task=1 # Threads per task
#SBATCH --cpus-per-task=40 # Threads per task
#SBATCH --time=00:40:00 # walltime
#SBATCH --mem=50G # memory per NODE
#SBATCH --partition=taskp # Partition
#SBATCH --partition=fat # Partition
#SBATCH --account=testproj # Accounting project
## LOAD MODULES ##
......@@ -32,7 +32,7 @@ module load cuda
## Change this to the directory of your executable!
gpu_prog="./Serial.exe"
export OPENBLAS_NUM_THREADS=40
for n;
do
srun $gpu_prog $n $n
......
File added
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment