#include #include "world.h" void create_cart_comm(const conf_t *c, MPI_Comm *cart_comm); void world_init_io_type(world_t *world); void world_free_io_type(world_t *world); void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c); void world_free_neighborhood(world_t *world); void world_init_persistent_requests(world_t *world, const conf_t *c); void world_free_persistent_requests(world_t *world); // -------------------------------------------------------------------------- void create_cart_comm(const conf_t *c, MPI_Comm *cart_comm) { const int periods[] = {0, 0}; // non-periodic boundaries const int allow_reorder = 1; int comm_world_rank, new_rank; int local_ranks_different, ranks_reordered; if(debug_enabled(c)) printf("Creating Cartesian communicator...\n"); MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, cart_comm); MPI_Comm_rank(MPI_COMM_WORLD, &comm_world_rank); MPI_Comm_rank(*cart_comm, &new_rank); local_ranks_different = comm_world_rank != new_rank; MPI_Allreduce( &local_ranks_different, &ranks_reordered, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD ); if(debug_enabled(c)) printf( "INFO: MPI reordered ranks: %s\n", ranks_reordered ? "YES" : "NO" ); } void world_init(world_t *world, size_t *global_size, const conf_t *c) { int dim, lo, hi; int nprocs[2], periods[2], proc_coord[2]; char *buffer; size_t storage_size; MPI_Comm cart_comm; create_cart_comm(c, &cart_comm); MPI_Cart_get(cart_comm, 2, nprocs, periods, proc_coord); for(dim = 0; dim < 2; dim++) { lo = (proc_coord[dim]+0) * global_size[dim] / nprocs[dim]; hi = (proc_coord[dim]+1) * global_size[dim] / nprocs[dim]; world->global_size[dim] = global_size[dim]; world->local_size[dim] = hi - lo; world->local_start[dim] = lo; } storage_size = world_get_storage_size(world); world->cells_prev = malloc(storage_size); world->cells_next = malloc(storage_size); memset(world->cells_prev, ' ', storage_size); memset(world->cells_next, ' ', storage_size); world_init_io_type(world); world_init_neighborhood(world, cart_comm, nprocs, proc_coord, c); world_init_persistent_requests(world, c); MPI_Comm_free(&cart_comm); if(trace_enabled(c)) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf( "%03d: Local tile: [%ld %ld) x [%ld %ld)\n", rank, world->local_start[0], world->local_start[0]+world->local_size[0], world->local_start[1], world->local_start[1]+world->local_size[1] ); } } void world_free(world_t *world) { world_free_io_type(world); world_free_neighborhood(world); world_free_persistent_requests(world); free(world->cells_prev); free(world->cells_next); world->cells_prev = NULL; world->cells_next = NULL; } void world_init_io_type(world_t *world) { const int nx = world->local_size[0], ny = world->local_size[1]; const int sizes[] = {nx+2, ny+2}, subsizes[] = {nx, ny}, starts[] = {1, 1}; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_FORTRAN, MPI_CHAR, &world->transfer.io_type ); MPI_Type_commit(&world->transfer.io_type); } void world_free_io_type(world_t *world) { MPI_Type_free(&world->transfer.io_type); } void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c) { const int px = proc_coord[0], py = proc_coord[1]; const int npx = nprocs[0], npy = nprocs[1]; const int nx = world->local_size[0], ny = world->local_size[1]; struct halo_info_s { int proc_coord[2]; int subsizes[2]; int send_starts[2]; int recv_starts[2]; }; const struct halo_info_s halo[] = { // Target Proc | Subsize | Send start | Recv start { {px-1, py-1}, { 1, 1}, { 1, 1}, { 0, 0} }, // left upper { {px, py-1}, {nx, 1}, { 1, 1}, { 1, 0} }, // upper { {px+1, py-1}, { 1, 1}, {nx, 1}, {nx+1, 0} }, // right upper { {px-1, py }, { 1, ny}, { 1, 1}, { 0, 1} }, // left { {px+1, py }, { 1, ny}, {nx, 1}, {nx+1, 1} }, // right { {px-1, py+1}, { 1, 1}, { 1, ny}, { 0, ny+1} }, // left lower { {px, py+1}, {nx, 1}, { 1, ny}, { 1, ny+1} }, // lower { {px+1, py+1}, { 1, 1}, {nx, ny}, {nx+1, ny+1} }, // right lower }; size_t i, n; const int sizes[] = {nx+2, ny+2}; int *neighbor_ranks = world->transfer.neighbor_ranks; int weights[8]; MPI_Datatype *send_types = world->transfer.send_types; MPI_Datatype *recv_types = world->transfer.recv_types; n = 0; for(i = 0; i < 8; i++) { int x = halo[i].proc_coord[0]; int y = halo[i].proc_coord[1]; // Bounds check (Valid neighbor?) if(x >= 0 && x < npx && y >= 0 && y < npy) { int neighbor_rank; // Create send and recevie type MPI_Type_create_subarray(2, sizes, halo[i].subsizes, halo[i].send_starts, MPI_ORDER_FORTRAN, MPI_CHAR, &send_types[n] ); MPI_Type_commit(&send_types[n]); MPI_Type_create_subarray(2, sizes, halo[i].subsizes, halo[i].recv_starts, MPI_ORDER_FORTRAN, MPI_CHAR, &recv_types[n] ); MPI_Type_commit(&recv_types[n]); // Get rank of neighbor MPI_Cart_rank(cart_comm, halo[i].proc_coord, &neighbor_rank); neighbor_ranks[n] = neighbor_rank; weights[n] = halo[i].subsizes[0] * halo[i].subsizes[1]; n++; } } world->transfer.n_neighbors = n; if(c->transmission_mode == SPARSE_COLLECTIVE) { const int allow_reorder = 0; if(debug_enabled(c)) printf("Creating MPI distributed graph communicator...\n"); MPI_Dist_graph_create_adjacent(cart_comm, n, neighbor_ranks, weights, n, neighbor_ranks, weights, MPI_INFO_NULL, allow_reorder, &world->transfer.comm ); } else { MPI_Comm_dup(cart_comm, &world->transfer.comm); } } void world_free_neighborhood(world_t *world) { int i; const int n = world->transfer.n_neighbors; MPI_Datatype *send_types = world->transfer.send_types; MPI_Datatype *recv_types = world->transfer.recv_types; for(i = 0; i < n; i++) { MPI_Type_free(&send_types[i]); MPI_Type_free(&recv_types[i]); } MPI_Comm_free(&world->transfer.comm); } void world_init_persistent_requests(world_t *world, const conf_t *c) { size_t i; MPI_Request *requests = world->transfer.persistent_requests; if(c->transmission_mode == PERSISTENT_REQUEST) { const size_t n_neighbors = world->transfer.n_neighbors; const int tag = 0; int *neighbor_ranks = world->transfer.neighbor_ranks; MPI_Datatype *send_types = world->transfer.send_types, *recv_types = world->transfer.recv_types; MPI_Comm comm = world->transfer.comm; if(debug_enabled(c)) printf("Initializing persistent requests...\n"); for(i = 0; i < n_neighbors; i++) { MPI_Send_init( world->cells_prev, 1, send_types[i], neighbor_ranks[i], tag, comm, &requests[2*i] ); MPI_Recv_init( world->cells_prev, 1, recv_types[i], neighbor_ranks[i], tag, comm, &requests[2*i+1] ); } } else { for(i = 0; i < 16; i++) { requests[i] = MPI_REQUEST_NULL; } } } void world_free_persistent_requests(world_t *world) { const size_t n_neighbors = world->transfer.n_neighbors; size_t i; MPI_Request *requests = world->transfer.persistent_requests; for(i = 0; i < 2*n_neighbors; i++) { if(requests[i] != MPI_REQUEST_NULL) { MPI_Request_free(&requests[i]); } } } size_t world_get_storage_size(const world_t *world) { const size_t nx = world->local_size[0], ny = world->local_size[1]; return (nx+2)*(ny+2)*sizeof(char); }