Skip to content
world.c 7.94 KiB
Newer Older
void create_cart_comm(const conf_t *c, MPI_Comm *cart_comm);

void world_init_io_type(world_t *world);
void world_free_io_type(world_t *world);

void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c);
void world_free_neighborhood(world_t *world);

void world_init_persistent_requests(world_t *world, const conf_t *c);
void world_free_persistent_requests(world_t *world);

// --------------------------------------------------------------------------

void create_cart_comm(const conf_t *c, MPI_Comm *cart_comm)
{
   const int periods[] = {0, 0}; // non-periodic boundaries

   const int allow_reorder = 1;
   int comm_world_rank, new_rank;
   int local_ranks_different, ranks_reordered;

   if(debug_enabled(c)) printf("Creating Cartesian communicator...\n");
   MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, cart_comm);

   MPI_Comm_rank(MPI_COMM_WORLD, &comm_world_rank);
   MPI_Comm_rank(*cart_comm, &new_rank);

   local_ranks_different = comm_world_rank != new_rank;
   MPI_Allreduce(
      &local_ranks_different, &ranks_reordered, 1, MPI_INT,
      MPI_LOR, MPI_COMM_WORLD
   );

   if(debug_enabled(c)) printf(
       "INFO: MPI reordered ranks: %s\n", ranks_reordered ? "YES" : "NO"
   );
}

void world_init(world_t *world, size_t *global_size, const conf_t *c)
{
   int dim, lo, hi;
   int nprocs[2], periods[2], proc_coord[2];
   size_t storage_size;
   create_cart_comm(c, &cart_comm);
   MPI_Cart_get(cart_comm, 2, nprocs, periods, proc_coord);

   for(dim = 0; dim < 2; dim++) {
      lo = (proc_coord[dim]+0) * global_size[dim] / nprocs[dim];
      hi = (proc_coord[dim]+1) * global_size[dim] / nprocs[dim];

      world->global_size[dim] = global_size[dim];
      world->local_size[dim] = hi - lo;
      world->local_start[dim] = lo;
   }

   storage_size = world_get_storage_size(world);
   world->cells_prev = malloc(storage_size);
   world->cells_next = malloc(storage_size);
   memset(world->cells_prev, ' ', storage_size);
   memset(world->cells_next, ' ', storage_size);

   world_init_io_type(world);
   world_init_neighborhood(world, cart_comm, nprocs, proc_coord, c);
   world_init_persistent_requests(world, c);
   MPI_Comm_free(&cart_comm);

   if(trace_enabled(c)) {
      int rank;
      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
      printf(
         "%03d: Local tile: [%ld %ld) x [%ld %ld)\n", rank,
         world->local_start[0], world->local_start[0]+world->local_size[0],
         world->local_start[1], world->local_start[1]+world->local_size[1]
      );
   }
}

void world_free(world_t *world)
{
   world_free_io_type(world);
   world_free_neighborhood(world);
   world_free_persistent_requests(world);

   free(world->cells_prev);
   free(world->cells_next);
   world->cells_prev = NULL;
   world->cells_next = NULL;
}

void world_init_io_type(world_t *world) {
   const int nx = world->local_size[0],
             ny = world->local_size[1];

   const int sizes[] = {nx+2, ny+2},
             subsizes[] = {nx, ny},
             starts[] = {1, 1};

   MPI_Type_create_subarray(2,
      sizes, subsizes, starts,
      MPI_ORDER_FORTRAN, MPI_CHAR, &world->transfer.io_type
   );
   MPI_Type_commit(&world->transfer.io_type);
}

void world_free_io_type(world_t *world)
{
   MPI_Type_free(&world->transfer.io_type);
}

void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c)
{
   const int px = proc_coord[0],
             py = proc_coord[1];

   const int npx = nprocs[0],
             npy = nprocs[1];

   const int nx = world->local_size[0],
             ny = world->local_size[1];

   struct halo_info_s {
      int proc_coord[2];
      int subsizes[2];
      int send_starts[2];
      int recv_starts[2];
   };

   const struct halo_info_s halo[] = {
      // Target Proc | Subsize | Send start | Recv start
      { {px-1, py-1},  { 1,  1}, { 1,  1},    {   0,    0} }, // left upper
      { {px,   py-1},  {nx,  1}, { 1,  1},    {   1,    0} }, // upper
      { {px+1, py-1},  { 1,  1}, {nx,  1},    {nx+1,    0} }, // right upper
      { {px-1, py  },  { 1, ny}, { 1,  1},    {   0,    1} }, // left
      { {px+1, py  },  { 1, ny}, {nx,  1},    {nx+1,    1} }, // right
      { {px-1, py+1},  { 1,  1}, { 1, ny},    {   0, ny+1} }, // left lower
      { {px,   py+1},  {nx,  1}, { 1, ny},    {   1, ny+1} }, // lower
      { {px+1, py+1},  { 1,  1}, {nx, ny},    {nx+1, ny+1} }, // right lower
   };

   size_t i, n;
   const int sizes[] = {nx+2, ny+2};
   int *neighbor_ranks = world->transfer.neighbor_ranks;
   int weights[8];

   MPI_Datatype *send_types = world->transfer.send_types;
   MPI_Datatype *recv_types = world->transfer.recv_types;

   n = 0;
   for(i = 0; i < 8; i++) {
      int x = halo[i].proc_coord[0];
      int y = halo[i].proc_coord[1];

      // Bounds check (Valid neighbor?)
      if(x >= 0 && x < npx && y >= 0 && y < npy) {
         int neighbor_rank;

         // Create send and recevie type
         MPI_Type_create_subarray(2,
            sizes, halo[i].subsizes, halo[i].send_starts,
            MPI_ORDER_FORTRAN, MPI_CHAR, &send_types[n]
         );
         MPI_Type_commit(&send_types[n]);
         MPI_Type_create_subarray(2,
            sizes, halo[i].subsizes, halo[i].recv_starts,
            MPI_ORDER_FORTRAN, MPI_CHAR, &recv_types[n]
         );
         MPI_Type_commit(&recv_types[n]);

         // Get rank of neighbor
         MPI_Cart_rank(cart_comm, halo[i].proc_coord, &neighbor_rank);
         neighbor_ranks[n] = neighbor_rank;
         weights[n] = halo[i].subsizes[0] * halo[i].subsizes[1];
         n++;
      }
   }
   world->transfer.n_neighbors = n;

   if(c->transmission_mode == SPARSE_COLLECTIVE) {

      if(debug_enabled(c)) printf("Creating MPI distributed graph communicator...\n");
      MPI_Dist_graph_create_adjacent(cart_comm,
         n, neighbor_ranks, weights,
         n, neighbor_ranks, weights,
         MPI_INFO_NULL, allow_reorder, &world->transfer.comm
   } else {
      MPI_Comm_dup(cart_comm, &world->transfer.comm);
   }
}

void world_free_neighborhood(world_t *world)
{
   int i;
   const int n = world->transfer.n_neighbors;
   MPI_Datatype *send_types = world->transfer.send_types;
   MPI_Datatype *recv_types = world->transfer.recv_types;

   for(i = 0; i < n; i++) {
      MPI_Type_free(&send_types[i]);
      MPI_Type_free(&recv_types[i]);
   }
   MPI_Comm_free(&world->transfer.comm);
}

void world_init_persistent_requests(world_t *world, const conf_t *c)
{
   size_t i;
   MPI_Request *requests = world->transfer.persistent_requests;

   if(c->transmission_mode == PERSISTENT_REQUEST) {
      const size_t n_neighbors = world->transfer.n_neighbors;
      const int tag = 0;

      int *neighbor_ranks = world->transfer.neighbor_ranks;
      MPI_Datatype *send_types = world->transfer.send_types,
                   *recv_types = world->transfer.recv_types;

      MPI_Comm comm = world->transfer.comm;

      if(debug_enabled(c)) printf("Initializing persistent requests...\n");

      for(i = 0; i < n_neighbors; i++) {
         MPI_Send_init(
            world->cells_prev, 1, send_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i]
         );
         MPI_Recv_init(
            world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i+1]
         );
      }
   } else {
      for(i = 0; i < 16; i++) {
         requests[i] = MPI_REQUEST_NULL;
      }
   }
}

void world_free_persistent_requests(world_t *world)
{
   const size_t n_neighbors = world->transfer.n_neighbors;
   size_t i;

   MPI_Request *requests = world->transfer.persistent_requests;

   for(i = 0; i < 2*n_neighbors; i++) {
      if(requests[i] != MPI_REQUEST_NULL) {
         MPI_Request_free(&requests[i]);
      }
   }
}

size_t world_get_storage_size(const world_t *world)
{
   const size_t nx = world->local_size[0],