Commit 1113fac6 authored by Thomas Ponweiser's avatar Thomas Ponweiser
Browse files

Added initial halo_exchange sample (unstructured grids) and...

Added initial halo_exchange sample (unstructured grids) and dynamic_sparse_data_exchange (n-body methods)
parent b151ab27
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "box.h"
// --------------------------------------------- Helper function declarations
int min(int a, int b);
int max(int a, int b);
double random_split_ratio();
void box_heap_insert(box_t *heap, int *heap_size, const box_t *box);
void box_heap_take_first(box_t *heap, int *heap_size, box_t *first);
// ==========================================================================
void box_set_volume(box_t *box)
{
int *extents = box->extents;
long volume = 1;
int i;
for(i = 0; i < N_DIMS; i++) {
volume *= extents[i];
}
box->volume = volume;
}
void box_intersect(const box_t *a, const box_t *b, box_t *result)
{
const int *ac = a->coords, *ae = a->extents;
const int *bc = b->coords, *be = b->extents;
int i, coordinate, extent;
for(i = 0; i < N_DIMS; i++) {
coordinate = max(ac[i], bc[i]);
extent = min(ac[i] + ae[i], bc[i] + be[i]) - coordinate;
result->coords[i] = coordinate;
result->extents[i] = extent;
}
box_set_volume(result);
}
int box_is_empty(const box_t *b)
{
const int *extents = b->extents;
return extents[X] <= 0 || extents[Y] <= 0 || extents[Z] <= 0;
}
void box_split(const box_t *original, box_t *a, box_t *b)
{
const int *extents = original->extents;
static int split_dim = 0;
int i, j;
int max_extent = -1;
for(i = 0; i < N_DIMS; i++) {
j = (split_dim + i) % N_DIMS;
if(max_extent < extents[j]) {
max_extent = extents[j];
split_dim = j;
}
}
int ea = (int)round(max_extent * random_split_ratio());
int eb = max_extent - ea;
*a = *original;
*b = *original;
a->extents[split_dim] = ea;
b->coords[split_dim] += ea;
b->extents[split_dim] = eb;
box_set_volume(a);
box_set_volume(b);
}
void box_decompose(const box_t *original, int n_boxes, box_t *boxes)
{
box_t largest_box, tmp_box, part1, part2;
int i, j, n, child1, child2;
boxes[0] = *original;
n = 1;
while(n < n_boxes) {
// take out box with maximum volume and split in two
box_heap_take_first(boxes, &n, &largest_box);
box_split(&largest_box, &part1, &part2);
// insert boxes
box_heap_insert(boxes, &n, &part1);
box_heap_insert(boxes, &n, &part2);
}
}
void box_grow(box_t *box, int extra_width)
{
int *coords = box->coords;
int *extents = box->extents;
int i;
for(i = 0; i < N_DIMS; i++) {
coords[i] -= extra_width;
extents[i] += 2 * extra_width;
}
box_set_volume(box);
}
// --------------------------------------------------------- Helper functions
int min(int a, int b)
{
return b < a ? b : a;
}
int max(int a, int b)
{
return b > a ? b : a;
}
double random_split_ratio()
{
const double jitter = 0.085;
double ratio = 0.5 - jitter + 2 * jitter * random() / RAND_MAX;
return ratio;
}
void box_heap_insert(box_t *heap, int *heap_size, const box_t *box)
{
int current = *heap_size;
int parent = (current - 1) / 2;
box_t tmp = *box;
while(current > 0 && heap[parent].volume < tmp.volume) {
heap[current] = heap[parent];
current = parent;
parent = (current - 1) / 2;
}
heap[current] = tmp;
(*heap_size)++;
}
void box_heap_take_first(box_t *heap, int *heap_size, box_t *first)
{
int current = 0;
int child = current * 2 + 1;
int n = *heap_size - 1;
box_t *tail = &heap[n];
*first = heap[current];
*heap_size = n;
while(child < n) {
if(child+1 < n && heap[child+1].volume > heap[child].volume) child++;
if(tail->volume >= heap[child].volume) break;
heap[current] = heap[child];
current = child;
child = current * 2 + 1;
}
heap[current] = *tail;
}
char *box_to_string(const box_t *box, char *buf, int size)
{
const int *c = box->coords;
const int *e = box->extents;
snprintf(buf, size, "x: [%5d,%5d); y: [%5d,%5d); z: [%5d,%5d); cells: %ld",
c[X], c[X]+e[X], c[Y], c[Y]+e[Y], c[Z], c[Z]+e[Z], box->volume);
return buf;
}
#ifndef BOX_H
#define BOX_H
enum dimension_aliases { X, Y, Z, N_DIMS };
typedef struct
{
int coords[N_DIMS];
int extents[N_DIMS];
long volume;
} box_t;
// Meta-Information for MPI-Type creation
#define BOX_T_N_INT_MEMBERS 2 * N_DIMS
#define BOX_T_FIRST_INT_MEMBER coords[0]
#define BOX_T_N_LONG_MEMBERS 1
#define BOX_T_FIRST_LONG_MEMBER volume
void box_set_volume(box_t *b);
void box_intersect(const box_t *a, const box_t *b, box_t *intersection);
int box_is_empty(const box_t *b);
void box_split(const box_t *box, box_t *a, box_t *b);
void box_decompose(const box_t *original, int n_boxes, box_t *boxes);
void box_grow(box_t *box, int extra_width);
char *box_to_string(const box_t *box, char *buf, int size);
#endif
#include <getopt.h>
#include <math.h>
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include "configuration.h"
#include "box.h"
// --------------------------------------------- Helper function declarations
long parse_long(const char *str);
void set_ncells_per_proc(conf_t *c, double ncells_per_proc);
void set_ncells_total(conf_t *c, double ncells_total);
void set_global_domain(conf_t *c, int cube_edge_length);
const char *verbosity_levels[] = {"OFF", "INFO", "DEBUG", "TRACE"};
// ==========================================================================
void conf_init(conf_t *c)
{
// Default number of cells (mesh elements) per proces: 1 * 2^14 = 16k
double ncells_per_proc_avg = 0x1p14;
c->verbosity_level = INFO;
c->debug_rank = -1;
#if MPI_VERSION >= 3
c->transfer_mode = SPARSE_COLLECTIVE;
#else
#pragma message("MPI_VERSION < 3 => Default transfer mode for halo exchange will be collective.")
c->transfer_mode = COLLECTIVE;
#endif
c->n_iterations = 100;
// Default values:
set_ncells_per_proc(c, ncells_per_proc_avg);
}
const char* conf_set_from_args(conf_t *c, int argc, char* argv[])
{
struct option long_options[] = {
{"verbose", required_argument, NULL, 'v'},
{"debug-rank", required_argument, NULL, 'g'},
{"ncells-per-proc", required_argument, NULL, 'n'},
{"ncells", required_argument, NULL, 'N'},
{"edge-length", required_argument, NULL, 'e'},
{"graph", no_argument, &c->transfer_mode, SPARSE_COLLECTIVE},
{"collective", no_argument, &c->transfer_mode, COLLECTIVE},
{"p2p", no_argument, &c->transfer_mode, P2P_DEFAULT},
{"p2p-sync", no_argument, &c->transfer_mode, P2P_SYNCHRONOUS},
{"p2p-ready", no_argument, &c->transfer_mode, P2P_READY},
{"iterations", required_argument, NULL, 'i'},
{NULL, 0, NULL, 0}
};
int option;
while((option = getopt_long(argc, argv, "v:g:n:N:e:i:", long_options, NULL)) >= 0) {
switch(option) {
case 'v': c->verbosity_level = atoi(optarg); break;
case 'g': c->debug_rank = atoi(optarg); break;
case 'n': set_ncells_per_proc(c, parse_long(optarg)); break;
case 'N': set_ncells_total(c, parse_long(optarg)); break;
case 'e': set_global_domain(c, atoi(optarg)); break;
case 'i': c->n_iterations = parse_long(optarg); break;
}
}
return NULL;
}
void conf_print(const conf_t *c, FILE *f)
{
char *transfer_mode_strs[] = {
"Sparse collective - MPI_Neighbor_alltoallw",
"Collective - MPI_Alltoallw",
"Point-to-point - MPI_Isend / MPI_Irecv",
"Point-to-point; synchronous mode - MPI_Issend / MPI_Irecv",
"Point-to-point; ready mode - MPI_Irsend / MPI_Irecv"
};
char buf[128];
int i = c->verbosity_level;
if(i < 0) i = 0; else if(i > 3) i = 3;
fprintf(f, "Configuration:\n");
fprintf(f, " * Verbosity level: %s (%d)\n", verbosity_levels[i], c->verbosity_level);
fprintf(f, " * Mesh domain (cube): %s\n", box_to_string(&c->global_domain, buf, 128));
fprintf(f, " * Halo transfer mode: %s\n", transfer_mode_strs[c->transfer_mode]);
fprintf(f, " * Number of iterations: %d\n", c->n_iterations);
fprintf(f, "\n");
}
int log_enabled(const conf_t *c, int lvl)
{
static int rank = -1;
if(rank < 0) {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
}
return c->verbosity_level >= lvl && (rank == 0 || rank == c->debug_rank);
}
int info_enabled(const conf_t *c)
{
return log_enabled(c, INFO);
}
int debug_enabled(const conf_t *c)
{
return log_enabled(c, DEBUG);
}
int trace_enabled(const conf_t *c)
{
return log_enabled(c, TRACE);
}
// --------------------------------------------------------- Helper functions
void set_ncells_per_proc(conf_t *c, double ncells_per_proc)
{
int nprocs;
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
set_ncells_total(c, ncells_per_proc * nprocs);
}
void set_ncells_total(conf_t *c, double ncells_total)
{
double edge_length_exact = cbrt(ncells_total);
set_global_domain(c, (int)round(edge_length_exact));
}
void set_global_domain(conf_t *c, int cube_edge_length)
{
box_t *cube = &c->global_domain;
int i;
for(i = 0; i < N_DIMS; i++) {
cube->coords[i] = 0;
cube->extents[i] = cube_edge_length;
}
box_set_volume(cube);
}
long parse_long(const char *str)
{
char* p;
long result = strtol(str, &p, 10);
if(*p == 'k') {
result <<= 10;
} else if(*p == 'M') {
result <<= 20;
}
return result;
}
#ifndef CONFIGURATION_H
#define CONFIGURATION_H
#include <stdio.h>
#include "box.h"
enum verbosity_level_enum
{
OFF = 0,
INFO = 1,
DEBUG = 2,
TRACE = 3,
};
enum transfer_mode_enum
{
SPARSE_COLLECTIVE = 0,
COLLECTIVE = 1,
P2P_DEFAULT = 2,
P2P_SYNCHRONOUS = 3,
P2P_READY = 4,
};
typedef struct
{
int verbosity_level;
int debug_rank;
int transfer_mode;
int n_iterations;
box_t global_domain;
} conf_t;
// Meta-information for MPI-Datatype creation (see mpitypes.c)
#define CONF_T_N_INT_MEMBERS 4
#define CONF_T_FIRST_INT_MEMBER verbosity_level
#define CONF_T_N_BOX_MEMBERS 1
#define CONF_T_FIRST_BOX_MEMBER global_domain
void conf_init(conf_t *c);
const char* conf_set_from_args(conf_t *c, int argc, char* argv[]);
void conf_print(const conf_t *c, FILE *file);
int info_enabled(const conf_t *c);
int warn_enabled(const conf_t *c);
int debug_enabled(const conf_t *c);
int trace_enabled(const conf_t *c);
#endif
#include <assert.h>
#include <stdlib.h>
#include "field.h"
#include "mesh.h"
void int_field_init(int_field_t *field, const conf_t *configuration, const mesh_t *mesh)
{
field->configuration = configuration;
field->mesh = mesh;
field->data = malloc(mesh->extended_local_domain.volume * sizeof(int));
}
void int_field_free(int_field_t *field)
{
free(field->data);
field->data = NULL;
}
void int_field_set_constant(int_field_t *field, int value)
{
int i, n;
int *data;
n = field->mesh->extended_local_domain.volume;
data = field->data;
for(i = 0; i < n; i++) data[i] = value;
}
int int_field_check_value(const int_field_t *field, const box_t *domain, int expected_value)
{
const mesh_t* mesh = field->mesh;
const int *c = domain->coords;
const int *e = domain->extents;
const int *data = field->data;
int x, y, z, idx;;
for(z = c[Z]; z < c[Z] + e[Z]; z++) {
for(y = c[Y]; y < c[Y] + e[Y]; y++) {
for(x = c[X]; x < c[X] + e[X]; x++) {
idx = mesh_idx(mesh, x, y, z);
if(data[idx] != expected_value) return 0;
}
}
}
return 1;
}
void int_field_halo_exchange(int_field_t *field)
{
int mode = field->configuration->transfer_mode;
switch(mode) {
case SPARSE_COLLECTIVE:
mpi_halo_exchange_int_sparse_collective(field); break;
case COLLECTIVE:
mpi_halo_exchange_int_collective(field); break;
case P2P_DEFAULT:
mpi_halo_exchange_int_p2p_default(field); break;
case P2P_SYNCHRONOUS:
mpi_halo_exchange_int_p2p_synchronous(field); break;
case P2P_READY:
mpi_halo_exchange_int_p2p_ready(field); break;
}
}
#ifndef FIELD_H
#define FIELD_H
#include "configuration.h"
#include "mesh.h"
typedef struct
{
const conf_t *configuration;
const mesh_t *mesh;
int *data;
} int_field_t;
void int_field_init(int_field_t *field, const conf_t *configuration, const mesh_t *mesh);
void int_field_free(int_field_t *field);
void int_field_set_constant(int_field_t *field, int value);
int int_field_check_value(const int_field_t *field, const box_t *domain, int expected_value);
void int_field_halo_exchange(int_field_t *field);
#endif
#include <limits.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "configuration.h"
#include "mpicomm.h"
#include "mesh.h"
#include "field.h"
#ifdef DEBUG_ATTACH
#include <unistd.h>
void wait_for_debugger();
#endif
void print_decomposition(const conf_t *configuraiton, const box_t *decomposition, FILE *f);
void print_adjacency_matrix(const mesh_t *mesh, const conf_t *configuraiton, FILE *f);
void halo_exchange_demo(const mesh_t *mesh, const conf_t *configuration);
int main(int argc, char *argv[])
{
int mpi_rank, nprocs;
const char* error;
mesh_t mesh;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
// initialize random number generator
srand((mpi_rank + 17) * (int)time(NULL));
conf_t configuration;
conf_init(&configuration);
if(mpi_rank == 0) {
// parse comand line args on root process only
error = conf_set_from_args(&configuration, argc, argv);
if(error) {
fprintf(stderr, "%s\n", error);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
// Broadcast configuration (nonblocking)
MPI_Request req_bcast_conf;
mpi_broadcast_configuration_start(&configuration, &req_bcast_conf);
// Create and broadcast decomposition
box_t *decomposition = malloc(nprocs * sizeof(*decomposition));
if(mpi_rank == 0) {
const box_t *cube = &configuration.global_domain;
box_decompose(cube, nprocs, decomposition);
}
mpi_broadcast_decomposition(decomposition);
// Finish nonblocking configuration broadcast
mpi_broadcast_configuration_finish(&req_bcast_conf);
if(info_enabled(&configuration)) {
conf_print(&configuration, stdout);
print_decomposition(&configuration, decomposition, stdout);
}
#ifdef DEBUG_ATTACH
if(mpi_rank == configuration.debug_rank) {
wait_for_debugger();
}
#endif
mesh_init(&mesh, &configuration, decomposition);
print_adjacency_matrix(&mesh, &configuration, stdout);
halo_exchange_demo(&mesh, &configuration);
mesh_free(&mesh);
free(decomposition);
MPI_Finalize();
return EXIT_SUCCESS;
}
// --------------------------------------------------------------------------
void halo_exchange_demo(const mesh_t *mesh, const conf_t *configuration)
{
int mpi_rank, i, n;
int_field_t field;
const neighbor_info *nb;