#include #include #include #include "mpicomm.h" #include "mpitypes.h" #include "mesh.h" #if MPI_VERSION < 3 #pragma message("MPI_Version < 3 => Sparse collectives are not supported.") #endif // --------------------------------------------- Helper function declarations // ========================================================================== void mpi_broadcast_configuration_start(conf_t *configuration, MPI_Request *request) { MPI_Datatype conf_mpitype; mpitype_conf_init(&conf_mpitype); #if MPI_VERSION >= 3 MPI_Ibcast(configuration, 1, conf_mpitype, 0, MPI_COMM_WORLD, request); #else MPI_Bcast(configuration, 1, conf_mpitype, 0, MPI_COMM_WORLD); *request = MPI_REQUEST_NULL; #endif mpitype_conf_free(&conf_mpitype); } void mpi_broadcast_configuration_finish(MPI_Request *request) { // Note for case MPI_VERSION < 3: // Calling MPI_Wait with MPI_REQUEST_NULL is safe and will return immediately. MPI_Wait(request, MPI_STATUS_IGNORE); } void mpi_broadcast_decomposition(box_t *decomposition) { int nprocs; MPI_Datatype box_mpitype; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); mpitype_box_init(&box_mpitype); MPI_Bcast(decomposition, nprocs, box_mpitype, 0, MPI_COMM_WORLD); mpitype_box_free(&box_mpitype); } void mpi_create_graph_communicator(const mesh_t *mesh, const conf_t *configuration, MPI_Comm *graph_comm) { int n_neighbors = mesh->n_neighbors; int *buffer = malloc(3 * n_neighbors * sizeof(*buffer)); int *neighbor_ranks = buffer + 0 * n_neighbors; int *recv_weights = buffer + 1 * n_neighbors; int *send_weights = buffer + 2 * n_neighbors; const neighbor_info *nb; int allow_reorder = 1; int i; for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; neighbor_ranks[i] = nb->mpi_rank; recv_weights[i] = nb->halo_incoming.domain.volume; send_weights[i] = nb->halo_outgoing.domain.volume; } MPI_Dist_graph_create_adjacent( MPI_COMM_WORLD, n_neighbors, neighbor_ranks, recv_weights, n_neighbors, neighbor_ranks, send_weights, MPI_INFO_NULL, allow_reorder, graph_comm); free(buffer); } void mpi_halo_exchange_int_sparse_collective(int_field_t *field) { #if MPI_VERSION >= 3 const mesh_t *mesh = field->mesh; int *data = field->data; int n_neighbors = mesh->n_neighbors; int i; int *counts = malloc(n_neighbors * sizeof(*counts)); MPI_Aint *displs = malloc(n_neighbors * sizeof(*displs)); MPI_Datatype *types = malloc(2 * n_neighbors * sizeof(*types)); MPI_Datatype *send_types = types + 0 * n_neighbors; MPI_Datatype *recv_types = types + 1 * n_neighbors; for(i = 0; i < n_neighbors; i++) { counts[i] = 1; displs[i] = 0; send_types[i] = mesh->neighbors[i].halo_outgoing.transfer_type_int; recv_types[i] = mesh->neighbors[i].halo_incoming.transfer_type_int; } MPI_Neighbor_alltoallw( data, counts, displs, send_types, data, counts, displs, recv_types, mesh->communicator ); free(counts); free(displs); free(types); #else fprintf(stderr, "Your MPI implementation does not support sparse collective operations.\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); #endif } void mpi_halo_exchange_int_collective(int_field_t *field) { const mesh_t *mesh = field->mesh; int *data = field->data; int n_neighbors = mesh->n_neighbors; int nprocs, i, nb_rank; MPI_Comm_size(mesh->communicator, &nprocs); int *counts = malloc(nprocs * sizeof(*counts)); int *displs = malloc(nprocs * sizeof(*displs)); MPI_Datatype *types = malloc(2 * nprocs * sizeof(*types)); MPI_Datatype *send_types = types + 0 * nprocs; MPI_Datatype *recv_types = types + 1 * nprocs; for(i = 0; i < nprocs; i++) { counts[i] = 0; displs[i] = 0; send_types[i] = MPI_BYTE; recv_types[i] = MPI_BYTE; } for(i = 0; i < n_neighbors; i++) { nb_rank = mesh->neighbors[i].mpi_rank; counts[nb_rank] = 1; displs[nb_rank] = 0; send_types[nb_rank] = mesh->neighbors[i].halo_outgoing.transfer_type_int; recv_types[nb_rank] = mesh->neighbors[i].halo_incoming.transfer_type_int; } MPI_Alltoallw( data, counts, displs, send_types, data, counts, displs, recv_types, mesh->communicator ); free(counts); free(displs); free(types); } void mpi_halo_exchange_int_p2p_default(int_field_t *field) { const mesh_t *mesh = field->mesh; int *data = field->data; int n_neighbors = mesh->n_neighbors; int i, tag = 0; neighbor_info *nb; MPI_Request *requests = malloc(2 * n_neighbors * sizeof(*requests)); MPI_Request *recv_requests = requests + 1 * n_neighbors; MPI_Request *send_requests = requests + 0 * n_neighbors; // Post non-blocking receives for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Irecv( data, 1, nb->halo_incoming.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &recv_requests[i] ); } // Post non-blocking sends for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Isend( data, 1, nb->halo_outgoing.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &send_requests[i] ); } // Finish operations MPI_Waitall(2 * n_neighbors, requests, MPI_STATUSES_IGNORE); free(requests); } void mpi_halo_exchange_int_p2p_synchronous(int_field_t *field) { const mesh_t *mesh = field->mesh; int *data = field->data; int n_neighbors = mesh->n_neighbors; int i, tag = 0; neighbor_info *nb; MPI_Request *requests = malloc(2 * n_neighbors * sizeof(*requests)); MPI_Request *recv_requests = requests + 1 * n_neighbors; MPI_Request *send_requests = requests + 0 * n_neighbors; // Post non-blocking receives for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Irecv( data, 1, nb->halo_incoming.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &recv_requests[i] ); } // Post non-blocking synchronous sends for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Issend( data, 1, nb->halo_outgoing.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &send_requests[i] ); } // Finish operations MPI_Waitall(2 * n_neighbors, requests, MPI_STATUSES_IGNORE); free(requests); } void mpi_halo_exchange_int_p2p_ready(int_field_t *field) { const mesh_t *mesh = field->mesh; int *data = field->data; int n_neighbors = mesh->n_neighbors; int i, tag = 0; neighbor_info *nb; MPI_Request *requests = malloc(2 * n_neighbors * sizeof(*requests)); MPI_Request *recv_requests = requests + 1 * n_neighbors; MPI_Request *send_requests = requests + 0 * n_neighbors; // Post non-blocking receives for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Irecv( data, 1, nb->halo_incoming.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &recv_requests[i] ); } // Be sure that all receives for current communication round have been posted MPI_Barrier(mesh->communicator); // Post non-blocking ready sends for all neighbors for(i = 0; i < n_neighbors; i++) { nb = &mesh->neighbors[i]; MPI_Irsend( data, 1, nb->halo_outgoing.transfer_type_int, nb->mpi_rank, tag, mesh->communicator, &send_requests[i] ); } // Finish operations MPI_Waitall(2 * n_neighbors, requests, MPI_STATUSES_IGNORE); free(requests); } // --------------------------------------------------------- Helper functions