#include #include #include #include "simulation.h" #define ELECTRON_HEAD '@' #define ELECTRON_TAIL '~' #define WIRE '#' void do_simulation_blocking_sparse(world_t *world, size_t n_generations); void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations); void do_simulation_p2p_overlap(world_t *world, size_t n_generations); void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations); void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations); void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations); void wireworld_step_complete(world_t *world); void wireworld_step_interior(world_t *world); void wireworld_step_boundary(world_t *world); void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by); // -------------------------------------------------------------------------- void do_simulation(world_t *world, size_t n_generations, const conf_t *c) { const int mode = c->transmission_mode | c->communication_computation_mode; switch(mode) { case SPARSE_COLLECTIVE | NO_OVERLAP: do_simulation_blocking_sparse(world, n_generations); break; case SPARSE_COLLECTIVE | OVERLAP: do_simulation_nonblocking_sparse(world, n_generations); break; case POINT_TO_POINT | NO_OVERLAP: do_simulation_p2p_no_overlap(world, n_generations); break; case POINT_TO_POINT | OVERLAP: do_simulation_p2p_overlap(world, n_generations); break; case PERSISTENT_REQUEST | NO_OVERLAP: do_simulation_persistent_request_no_overlap(world, n_generations); break; case PERSISTENT_REQUEST | OVERLAP: do_simulation_persistent_request_overlap(world, n_generations); break; default: fprintf(stderr, "Not yet implemented.\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } } void do_simulation_blocking_sparse(world_t *world, size_t n_generations) { size_t g; char *tmp; const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1}; const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0}; const MPI_Datatype *send_types = world->transfer.send_types, *recv_types = world->transfer.recv_types; MPI_Comm comm = world->transfer.comm; for(g = 0; g < n_generations; g++) { tmp = world->cells_prev; world->cells_prev = world->cells_next; world->cells_next = tmp; MPI_Neighbor_alltoallw( world->cells_prev, counts, displs, send_types, world->cells_prev, counts, displs, recv_types, comm ); wireworld_step_complete(world); } } void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations) { size_t g; char *tmp; const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1}; const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0}; const MPI_Datatype *send_types = world->transfer.send_types, *recv_types = world->transfer.recv_types; MPI_Comm comm = world->transfer.comm; MPI_Request request; for(g = 0; g < n_generations; g++) { tmp = world->cells_prev; world->cells_prev = world->cells_next; world->cells_next = tmp; MPI_Ineighbor_alltoallw( world->cells_prev, counts, displs, send_types, world->cells_prev, counts, displs, recv_types, comm, &request ); wireworld_step_interior(world); MPI_Wait(&request, MPI_STATUS_IGNORE); wireworld_step_boundary(world); } } void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations) { const int tag = 0; const size_t n_neighbors = world->transfer.n_neighbors; const int *neighbor_ranks = world->transfer.neighbor_ranks; const MPI_Datatype *send_types = world->transfer.send_types, *recv_types = world->transfer.recv_types; MPI_Comm comm = world->transfer.comm; size_t g, i; char *tmp; MPI_Request requests[16]; for(g = 0; g < n_generations; g++) { tmp = world->cells_prev; world->cells_prev = world->cells_next; world->cells_next = tmp; for(i = 0; i < n_neighbors; i++) { MPI_Isend( world->cells_prev, 1, send_types[i], neighbor_ranks[i], tag, comm, &requests[2*i] ); MPI_Irecv( world->cells_prev, 1, recv_types[i], neighbor_ranks[i], tag, comm, &requests[2*i+1] ); } MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE); wireworld_step_complete(world); } } void do_simulation_p2p_overlap(world_t *world, size_t n_generations) { const int tag = 0; const size_t n_neighbors = world->transfer.n_neighbors; const int *neighbor_ranks = world->transfer.neighbor_ranks; const MPI_Datatype *send_types = world->transfer.send_types, *recv_types = world->transfer.recv_types; MPI_Comm comm = world->transfer.comm; size_t g, i; char *tmp; MPI_Request requests[16]; for(g = 0; g < n_generations; g++) { tmp = world->cells_prev; world->cells_prev = world->cells_next; world->cells_next = tmp; for(i = 0; i < n_neighbors; i++) { MPI_Isend( world->cells_prev, 1, send_types[i], neighbor_ranks[i], tag, comm, &requests[2*i] ); MPI_Irecv( world->cells_prev, 1, recv_types[i], neighbor_ranks[i], tag, comm, &requests[2*i+1] ); } wireworld_step_interior(world); MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE); wireworld_step_boundary(world); } } void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations) { const size_t n_neighbors = world->transfer.n_neighbors; const size_t sz = world_get_storage_size(world); size_t g; MPI_Request *requests = world->transfer.persistent_requests; for(g = 0; g < n_generations; g++) { // Persistent send/receive requests have been initialized with pointer to world->cell_prev. // Swapping pointers here will therefore not work and copying memory is necessary. // // Possible Optimization: // ---------------------- // Alternately using two sets of persistent requests (one for world->cells_prev and world->cells_next). memcpy(world->cells_prev, world->cells_next, sz); MPI_Startall(2*n_neighbors, requests); MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE); wireworld_step_complete(world); } } void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations) { const size_t n_neighbors = world->transfer.n_neighbors; const size_t sz = world_get_storage_size(world); size_t g; MPI_Request *requests = world->transfer.persistent_requests; for(g = 0; g < n_generations; g++) { // See above note in the '_no_overlap' funciton. memcpy(world->cells_prev, world->cells_next, sz); MPI_Startall(2*n_neighbors, requests); wireworld_step_interior(world); MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE); wireworld_step_boundary(world); } } void wireworld_step_complete(world_t *world) { const size_t nx = world->local_size[0], ny = world->local_size[1]; const size_t DOWN = nx+2; // (+2 ... for halo cells) const size_t i_leftupper = 1 + DOWN; wireworld_step(world, i_leftupper, nx, ny); } void wireworld_step_interior(world_t *world) { const size_t nx = world->local_size[0], ny = world->local_size[1]; const size_t DOWN = nx+2; // (+2 ... for halo cells) const size_t i_leftupper = 1 + DOWN; wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2); } void wireworld_step_boundary(world_t *world) { const size_t nx = world->local_size[0], ny = world->local_size[1]; const size_t DOWN = nx+2; // (+2 ... for halo cells) const size_t i_leftupper = 1 + DOWN, i_rightupper = nx + DOWN, i_leftlower = 1 + ny*DOWN; wireworld_step(world, i_leftupper, nx, 1); // upper wireworld_step(world, i_leftlower, nx, 1); // lower wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right } void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by) { const size_t L = -1, R = 1; const size_t D = world->local_size[0]+2; // (+2 ... for halo cells) const size_t U = -D; size_t x, y, i; int nheads; char *prev = world->cells_prev; char *next = world->cells_next; for(y = 0; y < by; y++) { i = i_start; for(x = 0; x < bx; x++) { switch(prev[i]) { // Electron heads become electron tails. case ELECTRON_HEAD: next[i] = ELECTRON_TAIL; break; // Electron tails become copper. case ELECTRON_TAIL: next[i] = WIRE; break; // New electron head replacing copper, // if 1 or 2 electron heads are in neighborhood. case WIRE: nheads = (prev[i+L+U] == ELECTRON_HEAD) + (prev[i +U] == ELECTRON_HEAD) + (prev[i+R+U] == ELECTRON_HEAD) + (prev[i+L ] == ELECTRON_HEAD) + (prev[i+R ] == ELECTRON_HEAD) + (prev[i+L+D] == ELECTRON_HEAD) + (prev[i +D] == ELECTRON_HEAD) + (prev[i+R+D] == ELECTRON_HEAD); if(nheads == 1 || nheads == 2) { next[i] = ELECTRON_HEAD; } else { next[i] = WIRE; } break; default: break; } i++; } i_start += D; } }