Newer
Older
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "simulation.h"
#define ELECTRON_HEAD '@'
#define ELECTRON_TAIL '~'
#define WIRE '#'
void do_simulation_blocking_sparse(world_t *world, size_t n_generations);
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations);
void do_simulation_p2p_overlap(world_t *world, size_t n_generations);
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations);
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations);
void wireworld_step_complete(world_t *world);
void wireworld_step_interior(world_t *world);
void wireworld_step_boundary(world_t *world);
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by);
// --------------------------------------------------------------------------
void do_simulation(world_t *world, size_t n_generations, const conf_t *c)
{
const int mode = c->transmission_mode | c->communication_computation_mode;
switch(mode) {
case SPARSE_COLLECTIVE | NO_OVERLAP:
do_simulation_blocking_sparse(world, n_generations);
break;
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
case SPARSE_COLLECTIVE | OVERLAP:
do_simulation_nonblocking_sparse(world, n_generations);
break;
case POINT_TO_POINT | NO_OVERLAP:
do_simulation_p2p_no_overlap(world, n_generations);
break;
case POINT_TO_POINT | OVERLAP:
do_simulation_p2p_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | NO_OVERLAP:
do_simulation_persistent_request_no_overlap(world, n_generations);
break;
case PERSISTENT_REQUEST | OVERLAP:
do_simulation_persistent_request_overlap(world, n_generations);
break;
default:
fprintf(stderr, "Not yet implemented.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
void do_simulation_blocking_sparse(world_t *world, size_t n_generations)
{
size_t g;
char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
MPI_Neighbor_alltoallw(
world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, recv_types,
comm
);
wireworld_step_complete(world);
}
}
void do_simulation_nonblocking_sparse(world_t *world, size_t n_generations)
{
size_t g;
char *tmp;
const int counts[] = {1, 1, 1, 1, 1, 1, 1, 1};
const MPI_Aint displs[] = {0, 0, 0, 0, 0, 0, 0, 0};
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
MPI_Request request;
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
MPI_Ineighbor_alltoallw(
world->cells_prev, counts, displs, send_types,
world->cells_prev, counts, displs, recv_types,
comm, &request
);
wireworld_step_interior(world);
MPI_Wait(&request, MPI_STATUS_IGNORE);
wireworld_step_boundary(world);
}
}
void do_simulation_p2p_no_overlap(world_t *world, size_t n_generations)
{
const int tag = 0;
const size_t n_neighbors = world->transfer.n_neighbors;
const int *neighbor_ranks = world->transfer.neighbor_ranks;
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
size_t g, i;
char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
}
}
void do_simulation_p2p_overlap(world_t *world, size_t n_generations)
{
const int tag = 0;
const size_t n_neighbors = world->transfer.n_neighbors;
const int *neighbor_ranks = world->transfer.neighbor_ranks;
const MPI_Datatype *send_types = world->transfer.send_types,
*recv_types = world->transfer.recv_types;
MPI_Comm comm = world->transfer.comm;
size_t g, i;
char *tmp;
MPI_Request requests[16];
for(g = 0; g < n_generations; g++) {
tmp = world->cells_prev;
world->cells_prev = world->cells_next;
world->cells_next = tmp;
for(i = 0; i < n_neighbors; i++) {
MPI_Isend(
world->cells_prev, 1, send_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i]
);
MPI_Irecv(
world->cells_prev, 1, recv_types[i], neighbor_ranks[i],
tag, comm, &requests[2*i+1]
);
}
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
}
void do_simulation_persistent_request_no_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g;
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// Persistent send/receive requests have been initialized with pointer to world->cell_prev.
// Swapping pointers here will therefore not work and copying memory is necessary.
//
// Possible Optimization:
// ----------------------
// Alternately using two sets of persistent requests (one for world->cells_prev and world->cells_next).
memcpy(world->cells_prev, world->cells_next, sz);
MPI_Startall(2*n_neighbors, requests);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_complete(world);
}
}
void do_simulation_persistent_request_overlap(world_t *world, size_t n_generations)
{
const size_t n_neighbors = world->transfer.n_neighbors;
const size_t sz = world_get_storage_size(world);
size_t g;
MPI_Request *requests = world->transfer.persistent_requests;
for(g = 0; g < n_generations; g++) {
// See above note in the '_no_overlap' funciton.
memcpy(world->cells_prev, world->cells_next, sz);
MPI_Startall(2*n_neighbors, requests);
wireworld_step_interior(world);
MPI_Waitall(2*n_neighbors, requests, MPI_STATUSES_IGNORE);
wireworld_step_boundary(world);
}
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
}
void wireworld_step_complete(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper, nx, ny);
}
void wireworld_step_interior(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN;
wireworld_step(world, i_leftupper+1+DOWN, nx-2, ny-2);
}
void wireworld_step_boundary(world_t *world)
{
const size_t nx = world->local_size[0],
ny = world->local_size[1];
const size_t DOWN = nx+2; // (+2 ... for halo cells)
const size_t i_leftupper = 1 + DOWN,
i_rightupper = nx + DOWN,
i_leftlower = 1 + ny*DOWN;
wireworld_step(world, i_leftupper, nx, 1); // upper
wireworld_step(world, i_leftlower, nx, 1); // lower
wireworld_step(world, i_leftupper+DOWN, 1, ny-2); // left
wireworld_step(world, i_rightupper+DOWN, 1, ny-2); // right
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
}
void wireworld_step(world_t *world, size_t i_start, size_t bx, size_t by)
{
const size_t L = -1, R = 1;
const size_t D = world->local_size[0]+2; // (+2 ... for halo cells)
const size_t U = -D;
size_t x, y, i;
int nheads;
char *prev = world->cells_prev;
char *next = world->cells_next;
for(y = 0; y < by; y++) {
i = i_start;
for(x = 0; x < bx; x++) {
switch(prev[i]) {
// Electron heads become electron tails.
case ELECTRON_HEAD: next[i] = ELECTRON_TAIL; break;
// Electron tails become copper.
case ELECTRON_TAIL: next[i] = WIRE; break;
// New electron head replacing copper,
// if 1 or 2 electron heads are in neighborhood.
case WIRE:
nheads =
(prev[i+L+U] == ELECTRON_HEAD) +
(prev[i +U] == ELECTRON_HEAD) +
(prev[i+R+U] == ELECTRON_HEAD) +
(prev[i+L ] == ELECTRON_HEAD) +
(prev[i+R ] == ELECTRON_HEAD) +
(prev[i+L+D] == ELECTRON_HEAD) +
(prev[i +D] == ELECTRON_HEAD) +
(prev[i+R+D] == ELECTRON_HEAD);
if(nheads == 1 || nheads == 2) {
next[i] = ELECTRON_HEAD;
} else {
next[i] = WIRE;
}
break;
default: break;
}
i++;
}
i_start += D;
}
}