Commit 5ff71f76 authored by Damian Podareanu's avatar Damian Podareanu
Browse files

MPI implementation of matrix multiplication - tested with OpenMPI

parent 8cd2c62b
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Valeriu Codreanu <valeriu.codreanu@surfsara.nl>
#
# ==================================================================================================
# Packages are optional: if they are not present, certain code samples are not compiled
find_package(MPI) # Built-in in CMake
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/common.cmake)
# ==================================================================================================
if ("${DWARF_PREFIX}" STREQUAL "")
set(DWARF_PREFIX 1_dense)
endif()
# C++ compiler settings
find_package(Common)
select_compiler_flags(cxx_flags
GNU "-march=native" # I suggest remove "-O3" as this is controlled by the CMAKE_BUILD_TYPE
CLANG "-march=native" # same here
Intel "-axavx2,avx")
set(CXX_FLAGS ${cxx_flags})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CXX_FLAGS "${CXX_FLAGS} -Wall -Wno-comment")
if(APPLE)
set(CXX_FLAGS "${CXX_FLAGS} -Wa,-q")
endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS}")
set(CMAKE_CXX_COMPILE_FLAGS ${CMAKE_CXX_COMPILE_FLAGS} ${MPI_COMPILE_FLAGS})
set(CMAKE_CXX_LINK_FLAGS ${CMAKE_CXX_LINK_FLAGS} ${MPI_LINK_FLAGS})
# ==================================================================================================
# GEMM with the CUDA library
set(NAME ${DWARF_PREFIX}_gemm_mpi)
if (MPI_FOUND)
add_executable(${NAME} src/gemm_mpi.cpp)
target_link_libraries(${NAME} ${MPI_LIBRARIES})
install(TARGETS ${NAME} DESTINATION bin)
else()
message("** Skipping '${NAME}': no MPI")
dummy_install(${NAME} "MPI")
endif()
unset(NAME)
# ==================================================================================================
=======
README
=======
# 1. Code sample name
gemm
# 2. Description of the code sample package
This example demonstrates an MPI implementation
Additional pre-requisites:
* Open MPI there is an issue with Intel or mvapich. Will be fixed in future releases. For now it works also with those if something like "mpirun_rsh -np 2 node001 node001 ./runnable" is used
# 3. Release date
6 Dec 2016
# 4. Version history
1.0
Damian Podareanu <damian.podareanu@surfsara.nl>
# 6. Copyright / License of the code sample
Apache 2.0
# 7. Language(s)
C++
# 8. Parallelisation Implementation(s)
MPI
# 9. Level of the code sample complexity
Basic level
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Run the executable. Matrix size is defined in src/gemm_mpi.cpp
# 12. Sample input(s)
Input-data is generated automatically when running the program.
\ No newline at end of file
#include "stdio.h"
#include "mpi.h"
const int size = 1000;
float a[size][size];
float b[size][size];
float c[size][size];
void multiply(int istart, int iend)
{
for (int i = istart; i <= iend; ++i) {
for (int j = 0; j < size; ++j) {
for (int k = 0; k < size; ++k) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
}
int main(int argc, char* argv[])
{
int rank, nproc;
int istart, iend;
double start, end;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// MPI_Barrier(MPI_COMM_WORLD);
// start = MPI_Wtime();
if (rank == 0) {
// Initialize buffers.
for (int i = 0; i < size; ++i) {
for (int j = 0; j < size; ++j) {
a[i][j] = (float)i + j;
b[i][j] = (float)i - j;
c[i][j] = 0.0f;
}
}
}
// Broadcast matrices to all workers.
MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
// Partition work by i-for-loop.
istart = (size / nproc) * rank;
iend = (size / nproc) * (rank + 1) - 1;
// Compute matrix multiplication in [istart,iend]
// of i-for-loop.
// C <- C + A x B
multiply(istart, iend);
// Gather computed results.
MPI_Gather(c + (size/nproc*rank),
size*size/nproc,
MPI_FLOAT,
c + (size/nproc*rank),
size*size/nproc,
MPI_FLOAT,
0,
MPI_COMM_WORLD);
if (rank == 0) {
// Compute remaining multiplications
// when size % nproc > 0.
if (size % nproc > 0) {
multiply((size/nproc)*nproc, size-1);
}
}
// MPI_Barrier(MPI_COMM_WORLD);
// end = MPI_Wtime();
MPI_Finalize();
// if (rank == 0) { /* use time on master node */
// float msec_total = 0.0f;
// // Compute and print the performance
// float msec_per_matrix_mul = end-start;
// double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
// double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (msec_per_matrix_mul / 1000.0f);
// printf(
// "Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops\n",
// giga_flops,
// msec_per_matrix_mul,
// flops_per_matrix_mul);
// }
return 0;
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment