CC=g++ ICC =icc DEBUG ?= 0 # Set to 1 for debug CFLAGS=-O3 -lm -Wall -mavx -march=ivybridge -mtune=ivybridge -fopenmp #CFLAGS=-O3 -lm -Wall -mavx2 -mfma -march=haswell -mtune=haswell #CFLAGS=-O3 -Wall -xCORE-AVX-I #CFLAGS=-O3 -Wall -xCORE-AVX2 #ICFLAGS=-O3 -Wall -qopenmp -axCORE-AVX2,CORE-AVX-I # Need to -I this for user-defined functions to work EXT_DIR = ../External_Functions/ MPI_PREFIX = $(I_MPI_ROOT) CUDA_PREFIX = $(CUDAROOT) GPU_MPI_CXX = nvcc -L $(I_MPI_ROOT)/lib64 -lmpi -ccbin mpiicc GPU_CXX = nvcc LDFLAGS ?=-L $(CUDA_PREFIX)/lib64 -lcudart -lcublas -lcusparse -lm -lrt GPU_COMPILE = nvcc -I $(CUDA_PREFIX)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS) GPU_MPI_COMPILE = $(GPU_MPI_CXX) -I $(CUDA_PREFIX)/include -I $(I_MPI_ROOT)/include -arch sm_35 -I$(EXT_DIR) $(LDFLAGS) CPU_COMPILE = $(CC) $(CFLAGS) -I$(EXT_DIR) $(LDFLAGS) ifeq ($(DEBUG), 1) CPU_COMPILE += -D_DEBUG_ GPU_COMPILE += -D_DEBUG_ GPU_MPI_COMPILE += -D_DEBUG_ endif CPU_COMPILE_OBJ= $(CPU_COMPILE) -c GPU_COMPILE_OBJ= $(GPU_COMPILE) -c SOURCE = cuBLAS.cu cuBLAS_MultiGPU.cu cuda_SingleGPU.cu OBJECTS = util.o matrix_op.o timer.o input.o gpu_util.o dmv_gpu.o PROGRAMS= cuBLAS.exe cuBLAS_MultiGPU.exe cuda_SingleGPU.exe all: $(PROGRAMS) cuda_SingleGPU.exe: $(OBJECTS) cuda_SingleGPU.cu $(GPU_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuda_SingleGPU.cu cuBLAS_MultiGPU.exe: $(OBJECTS) cuBLAS_MultiGPU.cu $(GPU_MPI_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuBLAS_MultiGPU.cu cuBLAS.exe: $(OBJECTS) cuBLAS.cu $(GPU_COMPILE) -o $@ $(OBJECTS) $(LDFLAGS) cuBLAS.cu gpu_util.o: $(EXT_DIR)gpu_util.cu $(GPU_COMPILE_OBJ) -o $@ $< dmv_gpu.o: dmv_gpu.cu $(GPU_COMPILE_OBJ) -o $@ $< %.o: $(EXT_DIR)%.c $(CPU_COMPILE_OBJ) -o $@ $< %.o: %.h clean: $(RM) $(PROGRAMS) $(OBJECTS)