diff --git a/.gitignore b/.gitignore index 92937d98af8f13c2fc9e76ec511209346a7ec9ed..b3362e161148121344637b129d15d4457092c566 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ **/*.out **/*.err **/*.debug +**/*.pptx diff --git a/External_Functions/compile.mk b/External_Functions/compile.mk index ac3ddeb6eb78b559bc0d6b397443033557c0ebe1..dc591802170f462c5fe6bb73e823189b3ad1b511 100644 --- a/External_Functions/compile.mk +++ b/External_Functions/compile.mk @@ -12,10 +12,10 @@ CPP=g++ MPICC=mpicc NVCC=nvcc CFLAGS=-O3 -lm -Wall -mavx -march=ivybridge -mtune=ivybridge -lrt -#CPU_COMPILE= $(CC) $(CFLAGS) -I$(EXT_DIR) -#CPU_COMPILE_OMP = $(CPU_COMPILE) -fopenmp -#MPI_COMPILE= $(MPICC) -I$(EXT_DIR) -#MPI_OMP_COMPILE= $(MPI_COMPILE) -fopenmp +CPU_COMPILE= $(CC) $(CFLAGS) -I$(EXT_DIR) +CPU_COMPILE_OMP = $(CPU_COMPILE) -fopenmp +MPI_COMPILE= $(MPICC) -I$(EXT_DIR) +MPI_OMP_COMPILE= $(MPI_COMPILE) -fopenmp GPU_MPI_CXX = $(NVCC) -L $(I_MPI_ROOT)/lib64 -lmpi -ccbin mpiicc LDFLAGS ?=-L $(CUDA_PREFIX)/lib64 -lcudart -lcublas -lcusparse -lm -lrt GPU_COMPILE = $(NVCC) -I $(CUDA_PREFIX)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS) @@ -25,11 +25,11 @@ CPU_COMPILE_CUDA = $(CPP) $(CFLAGS) -I$(EXT_DIR) $(LDFLAGS) #compile with icc ICC =icc MPICC=mpiicc -ICFLAGS=-O3 -Wall -axCORE-AVX2,CORE-AVX-I -lrt -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -CPU_COMPILE= $(ICC) $(ICFLAGS) -I$(EXT_DIR) -CPU_COMPILE_OMP = $(CPU_COMPILE) -qopenmp -MPI_COMPILE= $(MPICC) $(ICFLAGS) -I$(EXT_DIR) -MPI_OMP_COMPILE= $(MPI_COMPILE) -mt_mpi -qopenmp +ICFLAGS=-O3 -Wall -axCORE-AVX2,CORE-AVX-I +#CPU_COMPILE= $(ICC) $(ICFLAGS) -I$(EXT_DIR) +#CPU_COMPILE_OMP = $(CPU_COMPILE) -qopenmp +#MPI_COMPILE= $(MPICC) $(ICFLAGS) -I$(EXT_DIR) +#MPI_OMP_COMPILE= $(MPI_COMPILE) -mt_mpi -qopenmp ifeq ($(DEBUG), 1) CPU_COMPILE += -D_DEBUG_ diff --git a/Serial/Makefile b/Serial/Makefile index e650cf858745d84e7552d25e8bf8c3d143246efd..0cc479f0079c1249d8742291f70edcac68cfbbbb 100644 --- a/Serial/Makefile +++ b/Serial/Makefile @@ -7,7 +7,7 @@ PROGRAMS= Serial.exe all: $(PROGRAMS) Serial.exe: $(OBJECTS) $(SOURCE) - $(CPU_COMPILE) $(SOURCE) -o $@ $(OBJECTS) + $(CPU_COMPILE) $(SOURCE) -o $@ $(OBJECTS) -I$(BLASROOT)/include -L$(BLASROOT)/lib -lopenblas -lpthread -lrt -lm -lgfortran %.o: $(EXT_DIR)%.c $(CPU_COMPILE_OBJ) -o $@ $< diff --git a/Serial/Serial.c b/Serial/Serial.c index f4b06deea10e1dfe2aa2ff22ad12203c39438451..ffb76de7b1576023d1cfca03e4a902cd3f615bc6 100644 --- a/Serial/Serial.c +++ b/Serial/Serial.c @@ -12,8 +12,7 @@ #include "matrix_op.h" #include "util.h" #include "input.h" -#include "mkl.h" -#include "mkl_blas.h" +//#include int main(int argc, char **argv) { diff --git a/Serial/Serial.slurm b/Serial/Serial.slurm index b967290e7458d087c2095a0d967802915d494674..ea755f1caa75f6b09d70cc6b1430bdbf90f8404c 100644 --- a/Serial/Serial.slurm +++ b/Serial/Serial.slurm @@ -14,10 +14,10 @@ #SBATCH --ntasks=1 # Number of processor cores (i.e. tasks) #SBATCH --nodes=1 # Number of nodes requested #SBATCH --ntasks-per-node=1 # Tasks per node -#SBATCH --cpus-per-task=1 # Threads per task +#SBATCH --cpus-per-task=40 # Threads per task #SBATCH --time=00:40:00 # walltime #SBATCH --mem=50G # memory per NODE -#SBATCH --partition=taskp # Partition +#SBATCH --partition=fat # Partition #SBATCH --account=testproj # Accounting project ## LOAD MODULES ## @@ -32,7 +32,7 @@ module load cuda ## Change this to the directory of your executable! gpu_prog="./Serial.exe" - +export OPENBLAS_NUM_THREADS=40 for n; do srun $gpu_prog $n $n diff --git a/User_Guide.pdf b/User_Guide.pdf new file mode 100755 index 0000000000000000000000000000000000000000..2d85feb9f2f8ee542789513a8b9a2d55401f5974 Binary files /dev/null and b/User_Guide.pdf differ diff --git a/presentation.pdf b/presentation.pdf new file mode 100755 index 0000000000000000000000000000000000000000..c8db7419dd0747b7a9c6edf58de68f84803487fe Binary files /dev/null and b/presentation.pdf differ