Commit be1aa9f4 authored by Thomas Ponweiser's avatar Thomas Ponweiser
Browse files

implemented various simulation modes (overlapping/non-overlapping...

implemented various simulation modes (overlapping/non-overlapping communication and computation, using sparse collective / p2p / persistent request communication)
parent 653d505d
......@@ -8,8 +8,8 @@
const char *verbosity_levels[] = {"OFF", "INFO", "DEBUG", "TRACE"};
const char *communication_computation_modes[] = {
"Blocking (No overlap of communication and computation)",
"Non-blocking (Overlapping communication and computation)"
"No overlap of communication and computation",
"Overlapping communication and computation"
};
const char *transmission_modes[] = {
......@@ -117,7 +117,7 @@ void conf_print(const conf_t *c, FILE *f)
fprintf(f, " * Verbosity level: %s (%d)\n", verbosity_levels[i], c->verbosity_level);
fprintf(f, " * Input file: %s%s\n", c->file_basename, FILE_EXT);
fprintf(f, " * Transmission mode: %s\n", transmission_modes[c->transmission_mode]);
fprintf(f, " * Communication mode: %s\n", communication_computation_modes[c->communication_computation_mode]);
fprintf(f, " * Overlap mode: %s communication and computation\n", c->communication_computation_mode == NO_OVERLAP ? "No overlap of" : "Overlapping");
fprintf(f, " * Grid of processes: %d x %d\n", c->nprocs[0], c->nprocs[1]);
fprintf(f, " * Number of iterations: %d\n", c->n_iterations);
fprintf(f, " * Generations per iteration: %ld\n", c->n_generations_per_iteration);
......
......@@ -21,7 +21,7 @@ enum transmission_mode_enum
enum communication_computation_mode_enum
{
NO_OVERLAP = 0,
OVERLAP = 1
OVERLAP = 4
};
#define FILE_BASENAME_SZ 1024
......
......@@ -23,6 +23,13 @@ int main(int argc, char* argv[])
MPI_Init(&argc, &argv);
// We want the program to stop on I/O errors
// -> Change the default I/O error hander from MPI_ERRORS_RETURN to MPI_ERRORS_ARE_FATAL
// Notes:
// * An individual I/O error handler can be associated to each file handle.
// * The default I/O error handler is associated to the null file handle, i.e. MPI_FILE_NULL.
MPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_ARE_FATAL);
// Parse command line arguments
conf_init_from_args(&config, argc, argv);
if(info_enabled(&config)) conf_print(&config, stdout);
......@@ -67,16 +74,17 @@ void read_input(const conf_t *c, world_t *world) {
"Global size: %ld x %ld\n\n",
header_length, global_sizes[0], global_sizes[1]
);
// Initialize cells (determine local tile, allocate memory)
MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, &cart_comm);
world_init(world, cart_comm, global_sizes, c);
MPI_Comm_free(&cart_comm);
if(debug_enabled(c)) printf(
"%03d: Local tile: [%ld %ld) x [%ld %ld)\n", rank,
world->local_start[0], world->local_start[0]+world->local_size[0],
world->local_start[1], world->local_start[1]+world->local_size[1]
);
// Initialize cells (determine local tile, allocate memory)
MPI_Cart_create(MPI_COMM_WORLD, 2, c->nprocs, periods, allow_reorder, &cart_comm);
world_init(world, cart_comm, global_sizes);
// Collectively read cell data
file_read_world(file, world, header_length);
MPI_File_close(&file);
......@@ -90,7 +98,7 @@ void iterate(const conf_t *c, world_t *world)
size_t i, g;
double total_time, sim_time = 0, io_time = 0;
char output_filename[FILE_NAME_SZ+8];
char output_filename[FILE_NAME_SZ+10];
MPI_File file;
size_t header_length;
......@@ -105,13 +113,13 @@ void iterate(const conf_t *c, world_t *world)
// Run n_gen generations
sim_time -= MPI_Wtime();
do_simulation(world, n_gen); g += n_gen;
do_simulation(world, n_gen, c); g += n_gen;
sim_time += MPI_Wtime();
// Determine output filename
snprintf(
output_filename, sizeof(output_filename),
"%s+%07ld%s",
"%s+%09ld%s",
c->file_basename, g, FILE_EXT
);
......@@ -127,7 +135,7 @@ void iterate(const conf_t *c, world_t *world)
MPI_File_close(&file);
io_time += MPI_Wtime();
if(info_enabled(c)) printf("Done iteration %-4d - Generation %-7ld - written '%s'.\n", i, g, output_filename);
if(info_enabled(c)) printf("Generation %-9ld - written '%s'.\n", g, output_filename);
}
total_time += MPI_Wtime();
......
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "simulation.h"
......@@ -6,26 +8,98 @@
#define ELECTRON_TAIL '~'
#define WIRE '#'
void do_simulation_blocking_sparse(world_t *world, size_t n_generations);
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations);
void do_simulation_p2p_overlap(world_t *world, size_t n_generations);
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations);
void wireworld_step_complete(world_t *world);
void wireworld_step_interior(world_t *world);
void wireworld_step_boundary(world_t *world);
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by);
void do_simulation(world_t *world, size_t n_generations)
// --------------------------------------------------------------------------
void do_simulation(world_t *world, size_t n_generations, const conf_t *c)
{
const size_t nx = world->local_size[0];
const size_t ny = world->local_size[1];
const int mode = c->transmission_mode | c->communication_computation_mode;
const size_t DOWN = nx+2; // (+2 ... for halo cells)
switch(mode) {
case SPARSE_COLLECTIVE | NO_OVERLAP:
do_simulation_blocking_sparse(world, n_generations);
break;
const size_t i_leftupper = 1 + DOWN;
const size_t i_rightupper = nx + DOWN;
const size_t i_leftlower = 1 + ny*DOWN;
case SPARSE_COLLECTIVE | OVERLAP:
do_simulation_nonblocking_sparse(world, n_generations);
break;
case POINT_TO_POINT | NO_OVERLAP:
do_simulation_p2p_no_overlap(world, n_generations);
break;
case POINT_TO_POINT | OVERLAP:
do_simulation_p2p_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | NO_OVERLAP:
do_simulation_persistent_request_no_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | OVERLAP:
do_simulation_persistent_request_overlap(world, n_generations);
break;
default:
fprintf(stderr, "Not yet implemented.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
void do_simulation_blocking_sparse(world_t *world, size_t n_generations)
{
size_t g;
char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const transfer_t *transfer = &world->transfer;
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
MPI_Neighbor_alltoallw(
world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, recv_types,
comm
);
wireworld_step_complete(world);
}
}
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations)
{
size_t g;
char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
MPI_Request request;
for(g = 0; g < n_generations; g++) {
......@@ -33,35 +107,190 @@ void do_simulation(world_t *world, size_t n_generations)
world->cells_prev = world->cells_next;
world->cells_next = tmp;
// Start halo exchange
MPI_Ineighbor_alltoallw(
world->cells_prev, counts, displs, transfer->send_types,
world->cells_prev, counts, displs, transfer->recv_types,
transfer->graph_comm, &request
world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, recv_types,
comm, &request
);
// Compute inner region
wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
// Finish halo exchange
wireworld_step_interior(world);
MPI_Wait(&request, MPI_STATUS_IGNORE);
wireworld_step_boundary(world);
}
}
// Compute boundary regions
wireworld_step(world, i_leftupper, nx, 1); // upper
wireworld_step(world, i_leftlower, nx, 1); // lower
wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left
wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations)
{
const int tag = 0;
const size_t n_neighbors = world->transfer.n_neighbors;
/* Blocking variant:
const int *neighbor_ranks = world->transfer.neighbor_ranks;
MPI_Neighbor_alltoallw(
world->cells_prev, counts, displs, transfer->send_types,
world->cells_prev, counts, displs, transfer->recv_types,
transfer->graph_comm
);
wireworld_step(world, i_leftupper, nx, ny);
*/
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
size_t g, i;
char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
}
}
void do_simulation_p2p_overlap(world_t *world, size_t n_generations)
{
const int tag = 0;
const size_t n_neighbors = world->transfer.n_neighbors;
const int *neighbor_ranks = world->transfer.neighbor_ranks;
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
size_t g, i;
char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
}
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g, i;
char *tmp = malloc(sz);
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// Persistent send/receive requests have been initialized with pointer to world->cell_prev.
// Swapping pointers here will therefore not work and swapping memory is necessary.
//
// Possible Optimization:
// ----------------------
// Alternately using two sets of persistent requests (one for world->cells_prev and world->cells_next).
memcpy(tmp, world->cells_prev, sz);
memcpy(world->cells_prev, world->cells_next, sz);
memcpy(world->cells_next, tmp, sz);
MPI_Startall(2*n_neighbors, requests);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
}
free(tmp);
}
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g, i;
char *tmp = malloc(sz);
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// See above note in the '_no_overlap' funciton.
memcpy(tmp, world->cells_prev, sz);
memcpy(world->cells_prev, world->cells_next, sz);
memcpy(world->cells_next, tmp, sz);
MPI_Startall(2*n_neighbors, requests);
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
free(tmp);
}
void wireworld_step_complete(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper, nx, ny);
}
void wireworld_step_interior(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
}
void wireworld_step_boundary(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN,
i_rightupper = nx + DOWN,
i_leftlower = 1 + ny*DOWN;
wireworld_step(world, i_leftupper, nx, 1); // upper
wireworld_step(world, i_leftlower, nx, 1); // lower
wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left
wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
}
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by)
......
......@@ -2,7 +2,8 @@
#define _AUTOMATON_H_
#include "world.h"
#include "configuration.h"
void do_simulation(world_t *world, size_t n_generations);
void do_simulation(world_t *world, size_t n_generations, const conf_t *c);
#endif
......@@ -6,10 +6,13 @@
void world_init_io_type(world_t *world);
void world_free_io_type(world_t *world);
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[]);
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c);
void world_free_neighborhood(world_t *world);
void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size)
void world_init_persistent_requests(world_t *world, const conf_t *c);
void world_free_persistent_requests(world_t *world);
void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size, const conf_t *c)
{
int dim, lo, hi;
int nprocs[2], periods[2], proc_coord[2];
......@@ -34,13 +37,15 @@ void world_init(world_t *world, MPI_Comm cart_comm, size_t *global_size)
memset(world->cells_next, ' ', storage_size);
world_init_io_type(world);
world_init_neighborhood(world, cart_comm, nprocs, proc_coord);
world_init_neighborhood(world, cart_comm, nprocs, proc_coord, c);
world_init_persistent_requests(world, c);
}
void world_free(world_t *world)
{
world_free_io_type(world);
world_free_neighborhood(world);
world_free_persistent_requests(world);
free(world->cells_prev);
free(world->cells_next);
......@@ -49,10 +54,12 @@ void world_free(world_t *world)
}
void world_init_io_type(world_t *world) {
const int nx = world->local_size[0], ny = world->local_size[1];
const int sizes[] = {nx+2, ny+2};
const int subsizes[] = {nx, ny};
const int starts[] = {1, 1};
const int nx = world->local_size[0],
ny = world->local_size[1];
const int sizes[] = {nx+2, ny+2},
subsizes[] = {nx, ny},
starts[] = {1, 1};
MPI_Type_create_subarray(2,
sizes, subsizes, starts,
......@@ -66,7 +73,7 @@ void world_free_io_type(world_t *world)
MPI_Type_free(&world->transfer.io_type);
}
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[])
void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], int proc_coord[], const conf_t *c)
{
const int px = proc_coord[0],
py = proc_coord[1];
......@@ -98,7 +105,7 @@ void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], i
size_t i, n;
const int sizes[] = {nx+2, ny+2};
int neighbor_ranks[8];
int *neighbor_ranks = world->transfer.neighbor_ranks;
int weights[8];
MPI_Datatype *send_types = world->transfer.send_types;
......@@ -134,14 +141,15 @@ void world_init_neighborhood(world_t *world, MPI_Comm cart_comm, int nprocs[], i
}
world->transfer.n_neighbors = n;
// Create graph communicator
{
if(c->transmission_mode == SPARSE_COLLECTIVE) {
const int allow_reorder = 0;
MPI_Dist_graph_create_adjacent(cart_comm,
n, neighbor_ranks, weights,
n, neighbor_ranks, weights,
MPI_INFO_NULL, allow_reorder, &world->transfer.graph_comm
MPI_INFO_NULL, allow_reorder, &world->transfer.comm
);
} else {
MPI_Comm_dup(cart_comm, &world->transfer.comm);
}
}
......@@ -156,12 +164,59 @@ void world_free_neighborhood(world_t *world)
MPI_Type_free(&send_types[i]);
MPI_Type_free(&recv_types[i]);
}
MPI_Comm_free(&world->transfer.graph_comm);
MPI_Comm_free(&world->transfer.comm);
}
void world_init_persistent_requests(world_t *world, const conf_t *c)
{
size_t i;
MPI_Request *requests = world->transfer.persistent_requests;
if(c->transmission_mode == PERSISTENT_REQUEST) {
const size_t n_neighbors = world->transfer.n_neighbors;
const int tag = 0;
int *neighbor_ranks = world->transfer.neighbor_ranks;
MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
for(i = 0; i < n_neighbors; i++) {
MPI_Send_init(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Recv_init(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
} else {
for(i = 0; i < 16; i++) {
requests[i] = MPI_REQUEST_NULL;
}
}
}
void world_free_persistent_requests(world_t *world)
{
const size_t n_neighbors = world->transfer.n_neighbors;
size_t i;
MPI_Request *requests = world->transfer.persistent_requests;
for(i = 0; i < 2*n_neighbors; i++) {
if(requests[i] != MPI_REQUEST_NULL) {
MPI_Request_free(&requests[i]);
}
}
}
size_t world_get_storage_size(const world_t *world)
{
int nx = world->local_size[0], ny = world->local_size[1];
const size_t nx = world->local_size[0],
ny = world->local_size[1];
return (nx+2)*(ny+2)*sizeof(char);
}
#ifndef _CELLS_H_
#define _CELLS_H_
#ifndef _WORLD_H_
#define _WORLD_H_
#include <stddef.h>
#include <mpi.h>
#include "configuration.h"
typedef struct
{
size_t n_neighbors;
MPI_Comm graph_comm;
int neighbor_ranks[8];
MPI_Datatype send_types[8];
MPI_Datatype recv_types[8];