simulation.c

#include <stdlib.h>
#include <string.h>
#include <mpi.h>

#include "simulation.h"

#define ELECTRON_HEAD '@'
#define ELECTRON_TAIL '~'
#define WIRE '#'

void do_simulation_blocking_sparse(world_t *world, size_t n_generations);
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations);

void do_simulation_p2p_overlap(world_t *world, size_t n_generations);
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations);

void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations);

void wireworld_step_complete(world_t *world);
void wireworld_step_interior(world_t *world);
void wireworld_step_boundary(world_t *world);

void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by);

// --------------------------------------------------------------------------

void do_simulation(world_t *world, size_t n_generations, const conf_t *c)
{
   const int mode = c->transmission_mode | c->communication_computation_mode;

   switch(mode) {
      case SPARSE_COLLECTIVE | NO_OVERLAP:
         do_simulation_blocking_sparse(world, n_generations);
         break;

      case SPARSE_COLLECTIVE | OVERLAP:
         do_simulation_nonblocking_sparse(world, n_generations);
         break;

      case POINT_TO_POINT | NO_OVERLAP:
         do_simulation_p2p_no_overlap(world, n_generations);
         break;

      case POINT_TO_POINT | OVERLAP:
         do_simulation_p2p_overlap(world, n_generations);
         break;

      case PERSISTENT_REQUEST | NO_OVERLAP:
         do_simulation_persistent_request_no_overlap(world, n_generations);
         break;

      case PERSISTENT_REQUEST | OVERLAP:
         do_simulation_persistent_request_overlap(world, n_generations);
         break;

      default:
         fprintf(stderr, "Not yet implemented.\n");
         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
   }
}

void do_simulation_blocking_sparse(world_t *world, size_t n_generations)
{
   size_t g;
   char *tmp;

   const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
   const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};

   const MPI_Datatype *send_types = world->transfer.send_types,
                      *recv_types = world->transfer.recv_types;

   MPI_Comm comm = world->transfer.comm;

   for(g = 0; g < n_generations; g++) {
      tmp = world->cells_prev;
      world->cells_prev = world->cells_next;
      world->cells_next = tmp;

      MPI_Neighbor_alltoallw(
         world->cells_prev, counts, displs, send_types,
         world->cells_prev, counts, displs, recv_types,
         comm
      );
      wireworld_step_complete(world);
   }
}

void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations)
{
   size_t g;
   char *tmp;

   const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
   const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};

   const MPI_Datatype *send_types = world->transfer.send_types,
                      *recv_types = world->transfer.recv_types;

   MPI_Comm comm = world->transfer.comm;

   MPI_Request request;

   for(g = 0; g < n_generations; g++) {
      tmp = world->cells_prev;
      world->cells_prev = world->cells_next;
      world->cells_next = tmp;

      MPI_Ineighbor_alltoallw(
         world->cells_prev, counts, displs, send_types,
         world->cells_prev, counts, displs, recv_types,
         comm, &request
      );

      wireworld_step_interior(world);
      MPI_Wait(&request, MPI_STATUS_IGNORE);
      wireworld_step_boundary(world);
   }
}

void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations)
{
   const int tag = 0;
   const size_t n_neighbors = world->transfer.n_neighbors;

   const int *neighbor_ranks = world->transfer.neighbor_ranks;

   const MPI_Datatype *send_types = world->transfer.send_types,
                      *recv_types = world->transfer.recv_types;

   MPI_Comm comm = world->transfer.comm;

   size_t g, i;
   char *tmp;
   MPI_Request requests[16];

   for(g = 0; g < n_generations; g++) {
      tmp = world->cells_prev;
      world->cells_prev = world->cells_next;
      world->cells_next = tmp;

      for(i = 0; i < n_neighbors; i++) {
         MPI_Isend(
            world->cells_prev, 1, send_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i]
         );
         MPI_Irecv(
            world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i+1]
         );
      }
      MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);

      wireworld_step_complete(world);
   }
}

void do_simulation_p2p_overlap(world_t *world, size_t n_generations)
{
   const int tag = 0;
   const size_t n_neighbors = world->transfer.n_neighbors;

   const int *neighbor_ranks = world->transfer.neighbor_ranks;

   const MPI_Datatype *send_types = world->transfer.send_types,
                      *recv_types = world->transfer.recv_types;

   MPI_Comm comm = world->transfer.comm;

   size_t g, i;
   char *tmp;
   MPI_Request requests[16];

   for(g = 0; g < n_generations; g++) {
      tmp = world->cells_prev;
      world->cells_prev = world->cells_next;
      world->cells_next = tmp;

      for(i = 0; i < n_neighbors; i++) {
         MPI_Isend(
            world->cells_prev, 1, send_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i]
         );
         MPI_Irecv(
            world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
            tag, comm, &requests[2*i+1]
         );
      }
      wireworld_step_interior(world);

      MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);

      wireworld_step_boundary(world);
   }
}

void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations)
{
   const size_t n_neighbors = world->transfer.n_neighbors;
   const size_t sz = world_get_storage_size(world);

   size_t g, i;
   char *tmp = malloc(sz);
   MPI_Request *requests = world->transfer.persistent_requests;

   for(g = 0; g < n_generations; g++) {

      // Persistent send/receive requests have been initialized with pointer to world->cell_prev.
      // Swapping pointers here will therefore not work and swapping memory is necessary.
      //
      // Possible Optimization:
      // ----------------------
      // Alternately using two sets of persistent requests (one for world->cells_prev and world->cells_next).
      memcpy(tmp, world->cells_prev, sz);
      memcpy(world->cells_prev, world->cells_next, sz);
      memcpy(world->cells_next, tmp, sz);

      MPI_Startall(2*n_neighbors, requests);
      MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);

      wireworld_step_complete(world);
   }
   free(tmp);
}

void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations)
{
   const size_t n_neighbors = world->transfer.n_neighbors;
   const size_t sz = world_get_storage_size(world);

   size_t g, i;
   char *tmp = malloc(sz);

   MPI_Request *requests = world->transfer.persistent_requests;

   for(g = 0; g < n_generations; g++) {

      // See above note in the '_no_overlap' funciton.
      memcpy(tmp, world->cells_prev, sz);
      memcpy(world->cells_prev, world->cells_next, sz);
      memcpy(world->cells_next, tmp, sz);

      MPI_Startall(2*n_neighbors, requests);

      wireworld_step_interior(world);

      MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);

      wireworld_step_boundary(world);
   }
   free(tmp);
}

void wireworld_step_complete(world_t *world)
{
   const size_t nx = world->local_size[0],
                ny = world->local_size[1];

   const size_t DOWN = nx+2; // (+2 ... for halo cells)

   const size_t i_leftupper  = 1  +    DOWN;

   wireworld_step(world, i_leftupper, nx, ny);
}

void wireworld_step_interior(world_t *world)
{
   const size_t nx = world->local_size[0],
                ny = world->local_size[1];

   const size_t DOWN = nx+2; // (+2 ... for halo cells)

   const size_t i_leftupper  = 1  +    DOWN;

   wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
}

void wireworld_step_boundary(world_t *world)
{
   const size_t nx = world->local_size[0],
                ny = world->local_size[1];

   const size_t DOWN = nx+2; // (+2 ... for halo cells)

   const size_t i_leftupper  = 1  +    DOWN,
                i_rightupper = nx +    DOWN,
                i_leftlower  = 1  + ny*DOWN;

   wireworld_step(world, i_leftupper, nx, 1); // upper
   wireworld_step(world, i_leftlower, nx, 1); // lower
   wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left
   wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
}

void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by)
{
   const size_t L = -1, R = 1;
   const size_t D = world->local_size[0]+2; // (+2 ... for halo cells)
   const size_t U = -D;
   size_t x, y, i;
   int nheads;

   char *prev = world->cells_prev;
   char *next = world->cells_next;

   for(y = 0; y < by; y++) {
      i = i_start;
      for(x = 0; x < bx; x++) {
         switch(prev[i]) {
            // Electron heads become electron tails.
            case ELECTRON_HEAD: next[i] = ELECTRON_TAIL; break;

            // Electron tails become copper.
            case ELECTRON_TAIL: next[i] = WIRE; break;

            // New electron head replacing copper,
            // if 1 or 2 electron heads are in neighborhood.
            case WIRE:
                  nheads =
                     (prev[i+L+U] == ELECTRON_HEAD) +
                     (prev[i  +U] == ELECTRON_HEAD) +
                     (prev[i+R+U] == ELECTRON_HEAD) +
                     (prev[i+L  ] == ELECTRON_HEAD) +
                     (prev[i+R  ] == ELECTRON_HEAD) +
                     (prev[i+L+D] == ELECTRON_HEAD) +
                     (prev[i  +D] == ELECTRON_HEAD) +
                     (prev[i+R+D] == ELECTRON_HEAD);
                  if(nheads == 1 || nheads == 2) {
                     next[i] = ELECTRON_HEAD;
                  } else {
                     next[i] = WIRE;
                  }
                  break;

            default: break;
         }
         i++;
      }
      i_start += D;
   }
}