Commit be1aa9f4 authored by Thomas Ponweiser's avatar Thomas Ponweiser
Browse files

implemented various simulation modes (overlapping/non-overlapping...

implemented various simulation modes (overlapping/non-overlapping communication and computation, using sparse collective / p2p / persistent request communication)
parent 653d505d
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
const char *verbosity_levels[] = {"OFF", "INFO", "DEBUG", "TRACE"}; const char *verbosity_levels[] = {"OFF", "INFO", "DEBUG", "TRACE"};
const char *communication_computation_modes[] = { const char *communication_computation_modes[] = {
"Blocking (No overlap of communication and computation)", "No overlap of communication and computation",
"Non-blocking (Overlapping communication and computation)" "Overlapping communication and computation"
}; };
const char *transmission_modes[] = { const char *transmission_modes[] = {
...@@ -117,7 +117,7 @@ void conf_print(const conf_t *c, FILE *f) ...@@ -117,7 +117,7 @@ void conf_print(const conf_t *c, FILE *f)
fprintf(f, " * Verbosity level: %s (%d)\n", verbosity_levels[i], c->verbosity_level); fprintf(f, " * Verbosity level: %s (%d)\n", verbosity_levels[i], c->verbosity_level);
fprintf(f, " * Input file: %s%s\n", c->file_basename, FILE_EXT); fprintf(f, " * Input file: %s%s\n", c->file_basename, FILE_EXT);
fprintf(f, " * Transmission mode: %s\n", transmission_modes[c->transmission_mode]); fprintf(f, " * Transmission mode: %s\n", transmission_modes[c->transmission_mode]);
fprintf(f, " * Communication mode: %s\n", communication_computation_modes[c->communication_computation_mode]); fprintf(f, " * Overlap mode: %s communication and computation\n", c->communication_computation_mode == NO_OVERLAP ? "No overlap of" : "Overlapping");
fprintf(f, " * Grid of processes: %d x %d\n", c->nprocs[0], c->nprocs[1]); fprintf(f, " * Grid of processes: %d x %d\n", c->nprocs[0], c->nprocs[1]);
fprintf(f, " * Number of iterations: %d\n", c->n_iterations); fprintf(f, " * Number of iterations: %d\n", c->n_iterations);
fprintf(f, " * Generations per iteration: %ld\n", c->n_generations_per_iteration); fprintf(f, " * Generations per iteration: %ld\n", c->n_generations_per_iteration);
......
...@@ -21,7 +21,7 @@ enum transmission_mode_enum ...@@ -21,7 +21,7 @@ enum transmission_mode_enum
enum communication_computation_mode_enum enum communication_computation_mode_enum
{ {
NO_OVERLAP = 0, NO_OVERLAP = 0,
OVERLAP = 1 OVERLAP = 4
}; };
#define FILE_BASENAME_SZ 1024 #define FILE_BASENAME_SZ 1024
......
...@@ -23,6 +23,13 @@ int main(int argc, char* argv[]) ...@@ -23,6 +23,13 @@ int main(int argc, char* argv[])
MPI_Init(&argc, &argv); MPI_Init(&argc, &argv);
// We want the program to stop on I/O errors
// -> Change the default I/O error hander from MPI_ERRORS_RETURN to MPI_ERRORS_ARE_FATAL
// Notes:
// * An individual I/O error handler can be associated to each file handle.
// * The default I/O error handler is associated to the null file handle, i.e. MPI_FILE_NULL.
MPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_ARE_FATAL);
// Parse command line arguments // Parse command line arguments
conf_init_from_args(&config, argc, argv); conf_init_from_args(&config, argc, argv);
if(info_enabled(&config)) conf_print(&config, stdout); if(info_enabled(&config)) conf_print(&config, stdout);
...@@ -67,16 +74,17 @@ void read_input(const conf_t *c, world_t *world) { ...@@ -67,16 +74,17 @@ void read_input(const conf_t *c, world_t *world) {
"Global size: %ld x %ld\n\n", "Global size: %ld x %ld\n\n",
header_length, global_sizes[0], global_sizes[1] header_length, global_sizes[0], global_sizes[1]
); );
// Initialize cells (determine local tile, allocate memory)
MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, &cart_comm);
world_init(world, cart_comm, global_sizes, c);
MPI_Comm_free(&cart_comm);
if(debug_enabled(c)) printf( if(debug_enabled(c)) printf(
"%03d: Local tile: [%ld %ld) x [%ld %ld)\n", rank, "%03d: Local tile: [%ld %ld) x [%ld %ld)\n", rank,
world->local_start[0], world->local_start[0]+world->local_size[0], world->local_start[0], world->local_start[0]+world->local_size[0],
world->local_start[1], world->local_start[1]+world->local_size[1] world->local_start[1], world->local_start[1]+world->local_size[1]
); );
// Initialize cells (determine local tile, allocate memory)
MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, &cart_comm);
world_init(world, cart_comm, global_sizes);
// Collectively read cell data // Collectively read cell data
file_read_world(file, world, header_length); file_read_world(file, world, header_length);
MPI_File_close(&file); MPI_File_close(&file);
...@@ -90,7 +98,7 @@ void iterate(const conf_t *c, world_t *world) ...@@ -90,7 +98,7 @@ void iterate(const conf_t *c, world_t *world)
size_t i, g; size_t i, g;
double total_time, sim_time = 0, io_time = 0; double total_time, sim_time = 0, io_time = 0;
char output_filename[FILE_NAME_SZ+8]; char output_filename[FILE_NAME_SZ+10];
MPI_File file; MPI_File file;
size_t header_length; size_t header_length;
...@@ -105,13 +113,13 @@ void iterate(const conf_t *c, world_t *world) ...@@ -105,13 +113,13 @@ void iterate(const conf_t *c, world_t *world)
// Run n_gen generations // Run n_gen generations
sim_time -= MPI_Wtime(); sim_time -= MPI_Wtime();
do_simulation(world, n_gen); g += n_gen; do_simulation(world, n_gen, c); g += n_gen;
sim_time += MPI_Wtime(); sim_time += MPI_Wtime();
// Determine output filename // Determine output filename
snprintf( snprintf(
output_filename, sizeof(output_filename), output_filename, sizeof(output_filename),
"%s+%07ld%s", "%s+%09ld%s",
c->file_basename, g, FILE_EXT c->file_basename, g, FILE_EXT
); );
...@@ -127,7 +135,7 @@ void iterate(const conf_t *c, world_t *world) ...@@ -127,7 +135,7 @@ void iterate(const conf_t *c, world_t *world)
MPI_File_close(&file); MPI_File_close(&file);
io_time += MPI_Wtime(); io_time += MPI_Wtime();
if(info_enabled(c)) printf("Done iteration %-4d - Generation %-7ld - written '%s'.\n", i, g, output_filename); if(info_enabled(c)) printf("Generation %-9ld - written '%s'.\n", g, output_filename);
} }
total_time += MPI_Wtime(); total_time += MPI_Wtime();
......
#include <stdlib.h>
#include <string.h>
#include <mpi.h> #include <mpi.h>
#include "simulation.h" #include "simulation.h"
...@@ -6,26 +8,98 @@ ...@@ -6,26 +8,98 @@
#define ELECTRON_TAIL '~' #define ELECTRON_TAIL '~'
#define WIRE '#' #define WIRE '#'
void do_simulation_blocking_sparse(world_t *world, size_t n_generations);
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations);
void do_simulation_p2p_overlap(world_t *world, size_t n_generations);
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations);
void wireworld_step_complete(world_t *world);
void wireworld_step_interior(world_t *world);
void wireworld_step_boundary(world_t *world);
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by); void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by);
void do_simulation(world_t *world, size_t n_generations) // --------------------------------------------------------------------------
void do_simulation(world_t *world, size_t n_generations, const conf_t *c)
{ {
const size_t nx = world->local_size[0]; const int mode = c->transmission_mode | c->communication_computation_mode;
const size_t ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells) switch(mode) {
case SPARSE_COLLECTIVE | NO_OVERLAP:
do_simulation_blocking_sparse(world, n_generations);
break;
const size_t i_leftupper = 1 + DOWN; case SPARSE_COLLECTIVE | OVERLAP:
const size_t i_rightupper = nx + DOWN; do_simulation_nonblocking_sparse(world, n_generations);
const size_t i_leftlower = 1 + ny*DOWN; break;
case POINT_TO_POINT | NO_OVERLAP:
do_simulation_p2p_no_overlap(world, n_generations);
break;
case POINT_TO_POINT | OVERLAP:
do_simulation_p2p_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | NO_OVERLAP:
do_simulation_persistent_request_no_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | OVERLAP:
do_simulation_persistent_request_overlap(world, n_generations);
break;
default:
fprintf(stderr, "Not yet implemented.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
void do_simulation_blocking_sparse(world_t *world, size_t n_generations)
{
size_t g; size_t g;
char *tmp; char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1}; const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0}; const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const transfer_t *transfer = &world->transfer;
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
MPI_Neighbor_alltoallw(
world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, recv_types,
comm
);
wireworld_step_complete(world);
}
}
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations)
{
size_t g;
char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
MPI_Request request; MPI_Request request;
for(g = 0; g < n_generations; g++) { for(g = 0; g < n_generations; g++) {
...@@ -33,35 +107,190 @@ void do_simulation(world_t *world, size_t n_generations) ...@@ -33,35 +107,190 @@ void do_simulation(world_t *world, size_t n_generations)
world->cells_prev = world->cells_next; world->cells_prev = world->cells_next;
world->cells_next = tmp; world->cells_next = tmp;
// Start halo exchange
MPI_Ineighbor_alltoallw( MPI_Ineighbor_alltoallw(
world->cells_prev, counts, displs, transfer->send_types, world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, transfer->recv_types, world->cells_prev, counts, displs, recv_types,
transfer->graph_comm, &request comm, &request
); );
// Compute inner region wireworld_step_interior(world);
wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
// Finish halo exchange
MPI_Wait(&request, MPI_STATUS_IGNORE); MPI_Wait(&request, MPI_STATUS_IGNORE);
wireworld_step_boundary(world);
}
}
// Compute boundary regions void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations)
wireworld_step(world, i_leftupper, nx, 1); // upper {
wireworld_step(world, i_leftlower, nx, 1); // lower const int tag = 0;
wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left const size_t n_neighbors = world->transfer.n_neighbors;
wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
/* Blocking variant: const int *neighbor_ranks = world->transfer.neighbor_ranks;
MPI_Neighbor_alltoallw( const MPI_Datatype *send_types = world->transfer.send_types,
world->cells_prev, counts, displs, transfer->send_types, *recv_types = world->transfer.recv_types;
world->cells_prev, counts, displs, transfer->recv_types,
transfer->graph_comm MPI_Comm comm = world->transfer.comm;
);
wireworld_step(world, i_leftupper, nx, ny); size_t g, i;
*/ char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
}
}
void do_simulation_p2p_overlap(world_t *world, size_t n_generations)
{
const int tag = 0;
const size_t n_neighbors = world->transfer.n_neighbors;
const int *neighbor_ranks = world->transfer.neighbor_ranks;
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
size_t g, i;
char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
}
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g, i;
char *tmp = malloc(sz);
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// Persistent send/receive requests have been initialized with pointer to world->cell_prev.
// Swapping pointers here will therefore not work and swapping memory is necessary.
//
// Possible Optimization:
// ----------------------
// Alternately using two sets of persistent requests (one for world->cells_prev and world->cells_next).
memcpy(tmp, world->cells_prev, sz);
memcpy(world->cells_prev, world->cells_next, sz);
memcpy(world->cells_next, tmp, sz);
MPI_Startall(2*n_neighbors, requests);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
} }
free(tmp);
}
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g, i;
char *tmp = malloc(sz);
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// See above note in the '_no_overlap' funciton.
memcpy(tmp, world->cells_prev, sz);
memcpy(world->cells_prev, world->cells_next, sz);
memcpy(world->cells_next, tmp, sz);
MPI_Startall(2*n_neighbors, requests);
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
free(tmp);
}
void wireworld_step_complete(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper, nx, ny);
}
void wireworld_step_interior(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
}
void wireworld_step_boundary(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN,
i_rightupper = nx + DOWN,
i_leftlower = 1 + ny*DOWN;
wireworld_step(world, i_leftupper, nx, 1); // upper
wireworld_step(world, i_leftlower, nx, 1); // lower
wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left
wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
} }
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by) void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by)
......
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
#define _AUTOMATON_H_ #define _AUTOMATON_H_
#include "world.h" #include "world.h"
#include "configuration.h"
void do_simulation(world_t *world, size_t n_generations); void do_simulation(world_t *world, size_t n_generations, const conf_t *c);
#endif #endif
...@@ -6,10 +6,13 @@ ...@@ -6,10 +6,13 @@
void world_init_io_type(world_t *world); void world_init_io_type(world_t *world);
void world_free_io_type(world_t *world); void world_free_io_type(world_t *world);
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[]); void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c);
void world_free_neighborhood(world_t *world); void world_free_neighborhood(world_t *world);
void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size) void world_init_persistent_requests(world_t *world, const conf_t *c);
void world_free_persistent_requests(world_t *world);
void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size, const conf_t *c)
{ {
int dim, lo, hi; int dim, lo, hi;
int nprocs[2], periods[2], proc_coord[2]; int nprocs[2], periods[2], proc_coord[2];
...@@ -34,13 +37,15 @@ void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size) ...@@ -34,13 +37,15 @@ void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size)
memset(world->cells_next, ' ', storage_size); memset(world->cells_next, ' ', storage_size);
world_init_io_type(world); world_init_io_type(world);
world_init_neighborhood(world, cart_comm, nprocs, proc_coord); world_init_neighborhood(world, cart_comm, nprocs, proc_coord, c);
world_init_persistent_requests(world, c);
} }
void world_free(world_t *world) void world_free(world_t *world)
{ {
world_free_io_type(world); world_free_io_type(world);
world_free_neighborhood(world); world_free_neighborhood(world);
world_free_persistent_requests(world);
free(world->cells_prev); free(world->cells_prev);
free(world->cells_next); free(world->cells_next);
...@@ -49,10 +54,12 @@ void world_free(world_t *world) ...@@ -49,10 +54,12 @@ void world_free(world_t *world)
} }
void world_init_io_type(world_t *world) { void world_init_io_type(world_t *world) {
const int nx = world->local_size[0], ny = world->local_size[1]; const int nx = world->local_size[0],
const int sizes[] = {nx+2, ny+2}; ny = world->local_size[1];
const int subsizes[] = {nx, ny};
const int starts[] = {1, 1}; const int sizes[] = {nx+2, ny+2},
subsizes[] = {nx, ny},
starts[] = {1, 1};
MPI_Type_create_subarray(2, MPI_Type_create_subarray(2,
sizes, subsizes, starts, sizes, subsizes, starts,
...@@ -66,7 +73,7 @@ void world_free_io_type(world_t *world) ...@@ -66,7 +73,7 @@ void world_free_io_type(world_t *world)
MPI_Type_free(&world->transfer.io_type); MPI_Type_free(&world->transfer.io_type);
} }
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[]) void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c)
{ {
const int px = proc_coord[0], const int px = proc_coord[0],
py = proc_coord[1]; py = proc_coord[1];
...@@ -98,7 +105,7 @@ void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], i ...@@ -98,7 +105,7 @@ void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], i
size_t i, n; size_t i, n;
const int sizes[] = {nx+2, ny+2}; const int sizes[] = {nx+2, ny+2};
int neighbor_ranks[8]; int *neighbor_ranks = world->transfer.neighbor_ranks;
int weights[8]; int weights[8];