Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
CC=g++
ICC =icc
DEBUG ?= 0 # Set to 1 for debug
CFLAGS=-O3 -lm -Wall -mavx -march=ivybridge -mtune=ivybridge -fopenmp
#CFLAGS=-O3 -lm -Wall -mavx2 -mfma -march=haswell -mtune=haswell
#CFLAGS=-O3 -Wall -xCORE-AVX-I
#CFLAGS=-O3 -Wall -xCORE-AVX2
#ICFLAGS=-O3 -Wall -qopenmp -axCORE-AVX2,CORE-AVX-I
# Need to -I this for user-defined functions to work
EXT_DIR = ../External_Functions/
MPI_PREFIX = $(I_MPI_ROOT)
CUDA_PREFIX = $(CUDAROOT)
GPU_MPI_CXX = nvcc -L $(I_MPI_ROOT)/lib64 -lmpi -ccbin mpiicc
GPU_CXX = nvcc
LDFLAGS ?=-L $(CUDA_PREFIX)/lib64 -lcudart -lcublas -lcusparse -lm -lrt
GPU_COMPILE = nvcc -I $(CUDA_PREFIX)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS)
GPU_MPI_COMPILE = $(GPU_MPI_CXX) -I $(CUDA_PREFIX)/include -I $(I_MPI_ROOT)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS)
CPU_COMPILE = $(CC) $(CFLAGS) -I$(EXT_DIR) $(LDFLAGS)
ifeq ($(DEBUG), 1)
CPU_COMPILE += -D_DEBUG_
GPU_COMPILE += -D_DEBUG_
GPU_MPI_COMPILE += -D_DEBUG_
endif
CPU_COMPILE_OBJ= $(CPU_COMPILE) -c
GPU_COMPILE_OBJ= $(GPU_COMPILE) -c
SOURCE = cuBLAS.cu cuBLAS_MultiGPU.cu cuda_SingleGPU.cu
OBJECTS = util.o matrix_op.o timer.o input.o gpu_util.o dmv_gpu.o
PROGRAMS= cuBLAS.exe cuBLAS_MultiGPU.exe cuda_SingleGPU.exe
all: $(PROGRAMS)
cuda_SingleGPU.exe: $(OBJECTS) cuda_SingleGPU.cu
$(GPU_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuda_SingleGPU.cu
cuBLAS_MultiGPU.exe: $(OBJECTS) cuBLAS_MultiGPU.cu
$(GPU_MPI_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuBLAS_MultiGPU.cu
cuBLAS.exe: $(OBJECTS) cuBLAS.cu
$(GPU_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuBLAS.cu
gpu_util.o: $(EXT_DIR)gpu_util.cu
$(GPU_COMPILE_OBJ) -o $@ $<
dmv_gpu.o: dmv_gpu.cu
$(GPU_COMPILE_OBJ) -o $@ $<
%.o: $(EXT_DIR)%.c
$(CPU_COMPILE_OBJ) -o $@ $<
%.o: %.h
clean:
$(RM) $(PROGRAMS) $(OBJECTS)