diff --git a/bhtree_mpi/CMakeLists.txt b/bhtree_mpi/CMakeLists.txt index ed3a099f173a823febea5e54b25da45a034fe06f..40a086e2b8bf638e7855f76daf13a961a92eee9b 100644 --- a/bhtree_mpi/CMakeLists.txt +++ b/bhtree_mpi/CMakeLists.txt @@ -24,9 +24,9 @@ if (MPI_FOUND) elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Wextra") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost -std=c++14") endif() - set_target_properties(${NAME} PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES) + set_target_properties(${NAME} PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED YES) target_link_libraries(${NAME} ${MPI_LIBRARIES} ) install(TARGETS ${NAME} DESTINATION bin) message("** Enabling '${NAME}': with MPI") diff --git a/bhtree_mpi/src/datastructures/Body.hpp b/bhtree_mpi/src/datastructures/Body.hpp index ff343ac5cccc5a4419f6774060a01f61f3ae952d..618602da4536d22a2e81312c75add2879fc2232d 100644 --- a/bhtree_mpi/src/datastructures/Body.hpp +++ b/bhtree_mpi/src/datastructures/Body.hpp @@ -13,7 +13,7 @@ namespace nbody { std::array dv{}; }; - struct Body { //TODO(steinret): figure out, why this is faster without initialization + struct Body { //TODO(steinret): ctor std::size_t id{}; std::array position{}; std::array velocity{}; diff --git a/bhtree_mpi/src/simulation/MpiSimulation.cpp b/bhtree_mpi/src/simulation/MpiSimulation.cpp index bfffd08e29bf65d5849d6ec6b788748afea5e983..2b052963661a1af82e60402377e5dd6c351dbf93 100644 --- a/bhtree_mpi/src/simulation/MpiSimulation.cpp +++ b/bhtree_mpi/src/simulation/MpiSimulation.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include "MpiSimulation.hpp" @@ -66,15 +67,12 @@ namespace nbody { } MpiSimulation::~MpiSimulation() { + flushSendStore(); //cleanup MPI types MPI_Type_free(&this->bodyType); MPI_Type_free(&this->boxType); delete this->tree; this->tree = nullptr; - while (!this->sendStores.empty()) { - delete[] this->sendStores.back().bodies; - this->sendStores.pop_back(); - } } std::size_t MpiSimulation::getNumberOfProcesses() const { @@ -90,13 +88,14 @@ namespace nbody { } //mpi send wrapper - void MpiSimulation::send(std::vector bodies, int target) { //TODO(steinret): MPI_BSend, remove SendStore - std::size_t bodySize = bodies.size(); - SendStore* store = this->availableSendStore(bodySize); + void MpiSimulation::send(const std::vector& bodies, int target) { + const auto bodySize = bodies.size(); //do unblocking send - memcpy(store->bodies, &(bodies[0]), bodySize * sizeof(Body)); - MPI_Isend(store->bodies, bodySize, this->bodyType, target, 0, MPI_COMM_WORLD, &store->request); + auto store = std::make_unique(bodies); //TODO(steinret): ask ponweist if this could be done w/o unique_ptr + + MPI_Isend(store->bodies.data(), bodySize, this->bodyType, target, 0, MPI_COMM_WORLD, &store->request); + sendStores.push_back(std::move(store)); } //mpi recv wrapper @@ -154,6 +153,7 @@ namespace nbody { } else { this->recv(this->bodies, 0); } + flushSendStore(); } void MpiSimulation::distributeDomains(const std::vector& localBodies) { @@ -181,32 +181,11 @@ namespace nbody { } } - //send stores are needed for unblocking sends, get available one and cleanup unused ones - SendStore* MpiSimulation::availableSendStore(std::size_t numElems) { - //determine if theere is a available store for non-blocking particle send - //cleanup of unused send stores is also done - auto it = std::begin(sendStores); - - while (it != std::end(sendStores)) { - bool completed; - - int mpiCompleted; - MPI_Test(&it->request, &mpiCompleted, MPI_STATUS_IGNORE); - completed = mpiCompleted != 0; - if (it->size >= numElems && completed) { - return &(*it); - } else if (completed) { - delete[] it->bodies; - it = this->sendStores.erase(it); - } else { - it++; - } - } - SendStore store; - store.bodies = new Body[numElems]; - store.size = numElems; - this->sendStores.push_back(store); - return &(this->sendStores.back()); + void MpiSimulation::flushSendStore() { + std::vector requests; + std::transform(std::begin(sendStores), std::end(sendStores), std::back_inserter(requests), [](const std::unique_ptr& ss) {return ss->request; }); + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + sendStores.clear(); } //distribute bodies needed by other processes for their local simlation @@ -232,6 +211,7 @@ namespace nbody { if (!this->tree->isCorrect()) { std::cerr << "wrong tree\n"; } + flushSendStore(); } void MpiSimulation::buildTree() { diff --git a/bhtree_mpi/src/simulation/MpiSimulation.hpp b/bhtree_mpi/src/simulation/MpiSimulation.hpp index 01ac3b6493b35128339a548c4ad630937407a37a..cc85929ef59b8f429fe9514e515d09cf24387260 100644 --- a/bhtree_mpi/src/simulation/MpiSimulation.hpp +++ b/bhtree_mpi/src/simulation/MpiSimulation.hpp @@ -6,13 +6,14 @@ #include #include #include +#include #include namespace nbody { struct SendStore { - Body* bodies; - MPI_Request request; - std::size_t size; + std::vector bodies; + MPI_Request request{ MPI_REQUEST_NULL }; + SendStore(const std::vector& b):bodies(b) {}; }; //MPI simulation @@ -22,10 +23,10 @@ namespace nbody { MPI_Datatype boxType; std::vector domains; Box overallDomain; - std::vector sendStores; + std::vector> sendStores; - virtual SendStore* availableSendStore(std::size_t numElems); - virtual void send(std::vector bodies, int target); + void flushSendStore(); + virtual void send(const std::vector& bodies, int target); virtual int recv(std::vector& bodies, int source); public: MpiSimulation(const std::string& inputFile);