Skip to content
from __future__ import print_function
from mpi4py import MPI
import numpy as np
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if size != 2:
raise RuntimeError("Please run with two MPI tasks")
data = np.zeros((8,8), int)
if rank == 0:
for i in range(8):
data[i,:] = np.arange(1, 9) + (i+1) * 10
if rank == 0:
print("Original data:")
print(data)
# Create the custom datatype
# Note: Python integers are 64 bits
columntype = MPI.INT64_T.Create_vector(8, 1, 8)
columntype.Commit()
# mpi4py requires that the input and output buffers are contiguous
# in memory. Thus, in order to send/receive from second column we need
# to create a contiguous view starting from the second element in memory
# which can be accomplished by flattening the array into 1d with ravel
if rank == 0:
comm.Send((data.ravel()[1:], 1, columntype), dest=1)
elif rank == 1:
comm.Recv((data.ravel()[1:], 1, columntype), source=0)
if rank == 1:
print("Received data:")
print(data)
from __future__ import print_function
from mpi4py import MPI
import numpy as np
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if size != 2:
raise RuntimeError("Please run with two MPI tasks")
data = np.zeros((8,8), int)
if rank == 0:
for i in range(8):
data[i,:] = np.arange(1, 9) + (i+1) * 10
if rank == 0:
print("Original data:")
print(data)
# Create the custom datatype
# Note: Python integers are 64 bits
counts = np.arange(4, dtype=int) + 1
displacements = np.arange(4, dtype=int) * (1 + 2 * data.itemsize)
indexedtype = MPI.INT64_T.Create_indexed(counts, displacements)
indexedtype.Commit()
if rank == 0:
comm.Send((data, 1, indexedtype), dest=1)
elif rank == 1:
comm.Recv((data, 1, indexedtype), source=0)
if rank == 1:
print("Received data:")
print(data)
Copyright (C) 2018 CSC - IT Center for Science Ltd.
Licensed under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Code is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Copy of the GNU General Public License can be obtained from
<http://www.gnu.org/licenses/>.
Two dimensional heat equation
=============================
This folder contains a code which solves two dimensional heat equation
with MPI parallelization. The code features non-blocking point-to-point
communication, user defined datatypes, collective communication,
and parallel I/O with MPI I/O.
Heat (or diffusion) equation is
<!-- Equation
\frac{\partial u}{\partial t} = \alpha \nabla^2 u
-->
![img](http://quicklatex.com/cache3/d2/ql_b3f6b8bdc3a8862c73c5a97862afb9d2_l3.png)
where **u(x, y, t)** is the temperature field that varies in space and time,
and α is thermal diffusivity constant. The two dimensional Laplacian can be
discretized with finite differences as
<!-- Equation
\begin{align*}
\nabla^2 u &= \frac{u(i-1,j)-2u(i,j)+u(i+1,j)}{(\Delta x)^2} \\
&+ \frac{u(i,j-1)-2u(i,j)+u(i,j+1)}{(\Delta y)^2}
\end{align*}
-->
![img](http://quicklatex.com/cache3/2d/ql_59f49ed64dbbe76704e0679b8ad7c22d_l3.png)
Given an initial condition (u(t=0) = u0) one can follow the time dependence of
the temperature field with explicit time evolution method:
<!-- Equation
u^{m+1}(i,j) = u^m(i,j) + \Delta t \alpha \nabla^2 u^m(i,j)
-->
![img](http://quicklatex.com/cache3/9e/ql_9eb7ce5f3d5eccd6cfc1ff5638bf199e_l3.png)
Note: Algorithm is stable only when
<!-- Equation
\Delta t < \frac{1}{2 \alpha} \frac{(\Delta x \Delta y)^2}{(\Delta x)^2 (\Delta y)^2}
-->
![img](http://quicklatex.com/cache3/d1/ql_0e7107049c9183d11dbb1e81174280d1_l3.png)
The two dimensional grid is decomposed along both dimensions, and the
communication of boundary data is overlapped with computation. Restart files
are written and read with MPI I/O.
Compilation instructions
------------------------
For building and running the example one needs to have the
[libpng](http://www.libpng.org/pub/png/libpng.html) library installed. In
addition, working MPI environment is required. For Python version mpi4py and
matplotlib are needed.
Move to proper subfolder (C or Fortran) and modify the top of the **Makefile**
according to your environment (proper compiler commands and compiler flags).
Code can be build simple with **make**
How to run
----------
The number of MPI ranks has to be a factor of the grid dimension (default
dimension is 2000). The default initial temperature field is a disk. Initial
temperature field can be read also from a file, the provided **bottle.dat**
illustrates what happens to a cold soda bottle in sauna.
* Running with defaults: mpirun -np 4 ./heat_mpi
* Initial field from a file: mpirun -np 4 ./heat_mpi bottle.dat
* Initial field from a file, given number of time steps:
mpirun -np 4 ./heat_mpi bottle.dat 1000
* Defauls pattern with given dimensions and time steps:
mpirun -np 4 ./heat_mpi 800 800 1000
The program produces a series of heat_XXXX.png files which show the
time development of the temperature field
CC=mpicc
CCFLAGS=-O3 -Wall
LDFLAGS=
LIBS=-lpng -lm
EXE=heat_mpi
OBJS=core.o setup.o utilities.o io.o main.o
OBJS_PNG=pngwriter.o
all: $(EXE)
pngwriter.o: pngwriter.c pngwriter.h
core.o: core.c heat.h
utilities.o: utilities.c heat.h
setup.o: setup.c heat.h
io.o: io.c heat.h
main.o: main.c heat.h
$(OBJS_PNG): C_COMPILER := $(CC)
$(OBJS): C_COMPILER := $(CC)
$(EXE): $(OBJS) $(OBJS_PNG)
$(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
%.o: %.c
$(C_COMPILER) $(CCFLAGS) -c $< -o $@
.PHONY: clean
clean:
-/bin/rm -f $(EXE) a.out *.o *.png *~
This source diff could not be displayed because it is too large. You can view the blob instead.
/* Main solver routines for heat equation solver */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include "heat.h"
/* Exchange the boundary values */
void exchange_init(field *temperature, parallel_data *parallel)
{
int ind, width;
width = temperature->ny + 2;
// Send to the up, receive from down
ind = idx(1, 0, width);
MPI_Isend(&temperature->data[ind], 1, parallel->rowtype,
parallel->nup, 11, parallel->comm, &parallel->requests[0]);
ind = idx(temperature->nx + 1, 0, width);
MPI_Irecv(&temperature->data[ind], 1, parallel->rowtype,
parallel->ndown, 11, parallel->comm, &parallel->requests[1]);
// Send to the down, receive from up
ind = idx(temperature->nx, 0, width);
MPI_Isend(&temperature->data[ind], 1, parallel->rowtype,
parallel->ndown, 12, parallel->comm, &parallel->requests[2]);
ind = idx(0, 0, width);
MPI_Irecv(&temperature->data[ind], 1, parallel->rowtype,
parallel->nup, 12, parallel->comm, &parallel->requests[3]);
// Send to the left, receive from right
ind = idx(0, 1, width);
MPI_Isend(&temperature->data[ind], 1, parallel->columntype,
parallel->nleft, 13, parallel->comm, &parallel->requests[4]);
ind = idx(0, temperature->ny + 1, width);
MPI_Irecv(&temperature->data[ind], 1, parallel->columntype,
parallel->nright, 13, parallel->comm, &parallel->requests[5]);
// Send to the right, receive from left
ind = idx(0, temperature->ny, width);
MPI_Isend(&temperature->data[ind], 1, parallel->columntype,
parallel->nright, 14, parallel->comm, &parallel->requests[7]);
ind = 0;
MPI_Irecv(&temperature->data[ind], 1, parallel->columntype,
parallel->nleft, 14, parallel->comm, &parallel->requests[6]);
}
/* complete the non-blocking communication */
void exchange_finalize(parallel_data *parallel)
{
MPI_Waitall(8, &parallel->requests[0], MPI_STATUSES_IGNORE);
}
/* Update the temperature values using five-point stencil */
void evolve_interior(field *curr, field *prev, double a, double dt)
{
int i, j;
int ic, iu, id, il, ir; // indexes for center, up, down, left, right
int width;
width = curr->ny + 2;
double dx2, dy2;
/* Determine the temperature field at next time step
* As we have fixed boundary conditions, the outermost gridpoints
* are not updated. */
dx2 = prev->dx * prev->dx;
dy2 = prev->dy * prev->dy;
for (i = 2; i < curr->nx; i++) {
for (j = 2; j < curr->ny; j++) {
ic = idx(i, j, width);
iu = idx(i+1, j, width);
id = idx(i-1, j, width);
ir = idx(i, j+1, width);
il = idx(i, j-1, width);
curr->data[ic] = prev->data[ic] + a * dt *
((prev->data[iu] -
2.0 * prev->data[ic] +
prev->data[id]) / dx2 +
(prev->data[ir] -
2.0 * prev->data[ic] +
prev->data[il]) / dy2);
}
}
}
/* Update the temperature values using five-point stencil */
/* update only the border-dependent regions of the field */
void evolve_edges(field *curr, field *prev, double a, double dt)
{
int i, j;
int ic, iu, id, il, ir; // indexes for center, up, down, left, right
int width;
width = curr->ny + 2;
double dx2, dy2;
/* Determine the temperature field at next time step
* As we have fixed boundary conditions, the outermost gridpoints
* are not updated. */
dx2 = prev->dx * prev->dx;
dy2 = prev->dy * prev->dy;
i = 1;
for (j = 1; j < curr->ny + 1; j++) {
ic = idx(i, j, width);
iu = idx(i+1, j, width);
id = idx(i-1, j, width);
ir = idx(i, j+1, width);
il = idx(i, j-1, width);
curr->data[ic] = prev->data[ic] + a * dt *
((prev->data[iu] -
2.0 * prev->data[ic] +
prev->data[id]) / dx2 +
(prev->data[ir] -
2.0 * prev->data[ic] +
prev->data[il]) / dy2);
}
i = curr -> nx;
for (j = 1; j < curr->ny + 1; j++) {
ic = idx(i, j, width);
iu = idx(i+1, j, width);
id = idx(i-1, j, width);
ir = idx(i, j+1, width);
il = idx(i, j-1, width);
curr->data[ic] = prev->data[ic] + a * dt *
((prev->data[iu] -
2.0 * prev->data[ic] +
prev->data[id]) / dx2 +
(prev->data[ir] -
2.0 * prev->data[ic] +
prev->data[il]) / dy2);
}
j = 1;
for (i = 1; i < curr->nx + 1; i++) {
ic = idx(i, j, width);
iu = idx(i+1, j, width);
id = idx(i-1, j, width);
ir = idx(i, j+1, width);
il = idx(i, j-1, width);
curr->data[ic] = prev->data[ic] + a * dt *
((prev->data[iu] -
2.0 * prev->data[ic] +
prev->data[id]) / dx2 +
(prev->data[ir] -
2.0 * prev->data[ic] +
prev->data[il]) / dy2);
}
j = curr -> ny;
for (i = 1; i < curr->nx + 1; i++) {
ic = idx(i, j, width);
iu = idx(i+1, j, width);
id = idx(i-1, j, width);
ir = idx(i, j+1, width);
il = idx(i, j-1, width);
curr->data[ic] = prev->data[ic] + a * dt *
((prev->data[iu] -
2.0 * prev->data[ic] +
prev->data[id]) / dx2 +
(prev->data[ir] -
2.0 * prev->data[ic] +
prev->data[il]) / dy2);
}
}
#ifndef __HEAT_H__
#define __HEAT_H__
/* Datatype for temperature field */
typedef struct {
/* nx and ny are the true dimensions of the field. The array data
* contains also ghost layers, so it will have dimensions nx+2 x ny+2 */
int nx; /* Local dimensions of the field */
int ny;
int nx_full; /* Global dimensions of the field */
int ny_full; /* Global dimensions of the field */
double dx;
double dy;
double *data;
} field;
/* Datatype for basic parallelization information */
typedef struct {
int size; /* Number of MPI tasks */
int rank;
int nup, ndown, nleft, nright; /* Ranks of neighbouring MPI tasks */
MPI_Comm comm; /* Cartesian communicator */
MPI_Request requests[8]; /* Requests for non-blocking communication */
MPI_Datatype rowtype; /* MPI Datatype for communication of rows */
MPI_Datatype columntype; /* MPI Datatype for communication of columns */
MPI_Datatype subarraytype; /* MPI Datatype for communication in text I/O */
MPI_Datatype restarttype; /* MPI Datatype for communication in restart I/O */
MPI_Datatype filetype; /* MPI Datatype for file view in restart I/O */
} parallel_data;
/* We use here fixed grid spacing */
#define DX 0.01
#define DY 0.01
/* file name for restart checkpoints*/
#define CHECKPOINT "HEAT_RESTART.dat"
/* Inline function for indexing the 2D arrays */
static inline int idx(int i, int j, int width)
{
return i * width + j;
}
/* Function prototypes */
double *malloc_2d(int nx, int ny);
void free_2d(double *array);
void set_field_dimensions(field *temperature, int nx, int ny,
parallel_data *parallel);
void parallel_setup(parallel_data *parallel, int nx, int ny);
void initialize(int argc, char *argv[], field *temperature1,
field *temperature2, int *nsteps, parallel_data *parallel,
int *iter0);
void generate_field(field *temperature, parallel_data *parallel);
void exchange_init(field *temperature, parallel_data *parallel);
void exchange_finalize(parallel_data *parallel);
void evolve_interior(field *curr, field *prev, double a, double dt);
void evolve_edges(field *curr, field *prev, double a, double dt);
void write_field(field *temperature, int iter, parallel_data *parallel);
void read_field(field *temperature1, field *temperature2,
char *filename, parallel_data *parallel);
void write_restart(field *temperature, parallel_data *parallel, int iter);
void read_restart(field *temperature, parallel_data *parallel, int *iter);
void copy_field(field *temperature1, field *temperature2);
void swap_fields(field *temperature1, field *temperature2);
void allocate_field(field *temperature);
void deallocate_field(field *temperature);
void finalize(field *temperature1, field *temperature2,
parallel_data *parallel);
#endif /* __HEAT_H__ */
/* I/O related functions for heat equation solver */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include "heat.h"
#include "pngwriter.h"
/* Output routine that prints out a picture of the temperature
* distribution. */
void write_field(field *temperature, int iter, parallel_data *parallel)
{
char filename[64];
/* The actual write routine takes only the actual data
* (without ghost layers) so we need array for that. */
int height, width;
double *full_data;
int coords[2];
int ix, jy;
int i, p;
height = temperature->nx_full;
width = temperature->ny_full;
if (parallel->rank == 0) {
/* Copy the inner data */
full_data = malloc_2d(height, width);
for (i = 0; i < temperature->nx; i++)
memcpy(&full_data[idx(i, 0, width)],
&temperature->data[idx(i+1, 1, temperature->ny + 2)],
temperature->ny * sizeof(double));
/* Receive data from other ranks */
for (p = 1; p < parallel->size; p++) {
MPI_Cart_coords(parallel->comm, p, 2, coords);
ix = coords[0] * temperature->nx;
jy = coords[1] * temperature->ny;
MPI_Recv(&full_data[idx(ix, jy, width)], 1,
parallel->subarraytype, p, 22,
parallel->comm, MPI_STATUS_IGNORE);
}
/* Write out the data to a png file */
sprintf(filename, "%s_%04d.png", "heat", iter);
save_png(full_data, height, width, filename, 'c');
free_2d(full_data);
} else {
/* Send data */
MPI_Ssend(temperature->data, 1,
parallel->subarraytype, 0,
22, parallel->comm);
}
}
/* Read the initial temperature distribution from a file and
* initialize the temperature fields temperature1 and
* temperature2 to the same initial state. */
void read_field(field *temperature1, field *temperature2, char *filename,
parallel_data *parallel)
{
FILE *fp;
int nx, ny, i, j;
double *full_data;
int coords[2];
int ix, jy, p;
int count;
fp = fopen(filename, "r");
/* Read the header */
count = fscanf(fp, "# %d %d \n", &nx, &ny);
if (count < 2) {
fprintf(stderr, "Error while reading the input file!\n");
MPI_Abort(MPI_COMM_WORLD, -1);
}
parallel_setup(parallel, nx, ny);
set_field_dimensions(temperature1, nx, ny, parallel);
set_field_dimensions(temperature2, nx, ny, parallel);
/* Allocate arrays (including ghost layers) */
temperature1->data =
malloc_2d(temperature1->nx + 2, temperature1->ny + 2);
temperature2->data =
malloc_2d(temperature2->nx + 2, temperature2->ny + 2);
if (parallel->rank == 0) {
/* Full array */
full_data = malloc_2d(nx, ny);
/* Read the actual data */
for (i = 0; i < nx; i++) {
for (j = 0; j < ny; j++) {
count = fscanf(fp, "%lf", &full_data[idx(i, j, ny)]);
}
}
/* Copy to own local array */
for (i = 0; i < temperature1->nx; i++) {
memcpy(&temperature1->data[idx(i+1, 1, temperature1->ny + 2)],
&full_data[idx(i, 0, ny)], temperature1->ny * sizeof(double));
}
/* Send to other processes */
for (p = 1; p < parallel->size; p++) {
MPI_Cart_coords(parallel->comm, p, 2, coords);
ix = coords[0] * temperature1->nx;
jy = coords[1] * temperature1->ny;
MPI_Send(&full_data[idx(ix, jy, ny)], 1, parallel->subarraytype,
p, 44, parallel->comm);
}
} else {
/* Receive data */
MPI_Recv(temperature1->data, 1,
parallel->subarraytype, 0,
44, parallel->comm, MPI_STATUS_IGNORE);
}
/* Set the boundary values */
for (i = 0; i < temperature1->nx + 1; i++) {
temperature1->data[idx(i, 0, temperature1->ny + 2)] =
temperature1->data[idx(i, 1, temperature1->ny + 2)];
temperature1->data[idx(i, temperature1->ny + 1, temperature1->ny + 2)] =
temperature1->data[idx(i, temperature1->ny, temperature1->ny + 2)];
}
for (j = 0; j < temperature1->ny + 2; j++) {
temperature1->data[idx(0, j, temperature1->ny + 2)] =
temperature1->data[idx(1, j, temperature1->ny + 2)];
temperature1->data[idx(temperature1->nx + 1, j, temperature1->ny + 2)] =
temperature1->data[idx(temperature1->nx, j, temperature1->ny + 2)];
}
copy_field(temperature1, temperature2);
if (parallel->rank == 0) {
free_2d(full_data);
}
fclose(fp);
}
/* Write a restart checkpoint that contains field dimensions, current
* iteration number and temperature field. */
void write_restart(field *temperature, parallel_data *parallel, int iter)
{
MPI_File fp;
MPI_Offset disp;
// open the file and write the dimensions
MPI_File_open(MPI_COMM_WORLD, CHECKPOINT,
MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fp);
if (parallel->rank == 0) {
MPI_File_write(fp, &temperature->nx_full, 1, MPI_INT,
MPI_STATUS_IGNORE);
MPI_File_write(fp, &temperature->ny_full, 1, MPI_INT,
MPI_STATUS_IGNORE);
MPI_File_write(fp, &iter, 1, MPI_INT, MPI_STATUS_IGNORE);
}
disp = 3 * sizeof(int);
MPI_File_set_view(fp, 0, MPI_DOUBLE, parallel->filetype, "native",
MPI_INFO_NULL);
MPI_File_write_at_all(fp, disp, temperature->data,
1, parallel->restarttype, MPI_STATUS_IGNORE);
MPI_File_close(&fp);
}
/* Read a restart checkpoint that contains field dimensions, current
* iteration number and temperature field. */
void read_restart(field *temperature, parallel_data *parallel, int *iter)
{
MPI_File fp;
MPI_Offset disp;
int nx, ny;
// open the file and write the dimensions
MPI_File_open(MPI_COMM_WORLD, CHECKPOINT, MPI_MODE_RDONLY,
MPI_INFO_NULL, &fp);
// read grid size and current iteration
MPI_File_read_all(fp, &nx, 1, MPI_INT, MPI_STATUS_IGNORE);
MPI_File_read_all(fp, &ny, 1, MPI_INT, MPI_STATUS_IGNORE);
MPI_File_read_all(fp, iter, 1, MPI_INT, MPI_STATUS_IGNORE);
// set correct dimensions to MPI metadata
parallel_setup(parallel, nx, ny);
// set local dimensions and allocate memory for the data
set_field_dimensions(temperature, nx, ny, parallel);
allocate_field(temperature);
disp = 3 * sizeof(int);
MPI_File_set_view(fp, 0, MPI_DOUBLE, parallel->filetype, "native",
MPI_INFO_NULL);
MPI_File_read_at_all(fp, disp, temperature->data,
1, parallel->restarttype, MPI_STATUS_IGNORE);
MPI_File_close(&fp);
}
/* Heat equation solver in 2D. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <mpi.h>
#include "heat.h"
int main(int argc, char **argv)
{
double a = 0.5; //!< Diffusion constant
field current, previous; //!< Current and previous temperature fields
double dt; //!< Time step
int nsteps; //!< Number of time steps
int image_interval = 500; //!< Image output interval
int restart_interval = 200; //!< Checkpoint output interval
parallel_data parallelization; //!< Parallelization info
int iter, iter0; //!< Iteration counter
double dx2, dy2; //!< delta x and y squared
double start_clock; //!< Time stamps
MPI_Init(&argc, &argv);
initialize(argc, argv, &current, &previous, &nsteps,
&parallelization, &iter0);
/* Output the initial field */
write_field(&current, iter0, &parallelization);
iter0++;
/* Largest stable time step */
dx2 = current.dx * current.dx;
dy2 = current.dy * current.dy;
dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
/* Get the start time stamp */
start_clock = MPI_Wtime();
/* Time evolve */
for (iter = iter0; iter < iter0 + nsteps; iter++) {
exchange_init(&previous, &parallelization);
evolve_interior(&current, &previous, a, dt);
exchange_finalize(&parallelization);
evolve_edges(&current, &previous, a, dt);
if (iter % image_interval == 0) {
write_field(&current, iter, &parallelization);
}
/* write a checkpoint now and then for easy restarting */
if (iter % restart_interval == 0) {
write_restart(&current, &parallelization, iter);
}
/* Swap current field so that it will be used
as previous for next iteration step */
swap_fields(&current, &previous);
}
/* Determine the CPU time used for the iteration */
if (parallelization.rank == 0) {
printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock));
printf("Reference value at 5,5: %f\n",
previous.data[idx(5, 5, current.ny + 2)]);
}
write_field(&current, iter, &parallelization);
finalize(&current, &previous, &parallelization);
MPI_Finalize();
return 0;
}
#include <png.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "pngwriter.h"
/* Datatype for RGB pixel */
typedef struct {
uint8_t red;
uint8_t green;
uint8_t blue;
} pixel_t;
void cmap(double value, const double scaling, const double maxval,
pixel_t *pix);
static int heat_colormap[256][3] = {
{59, 76, 192}, {59, 76, 192}, {60, 78, 194}, {61, 80, 195},
{62, 81, 197}, {64, 83, 198}, {65, 85, 200}, {66, 87, 201},
{67, 88, 203}, {68, 90, 204}, {69, 92, 206}, {71, 93, 207},
{72, 95, 209}, {73, 97, 210}, {74, 99, 211}, {75, 100, 213},
{77, 102, 214}, {78, 104, 215}, {79, 105, 217}, {80, 107, 218},
{82, 109, 219}, {83, 110, 221}, {84, 112, 222}, {85, 114, 223},
{87, 115, 224}, {88, 117, 225}, {89, 119, 227}, {90, 120, 228},
{92, 122, 229}, {93, 124, 230}, {94, 125, 231}, {96, 127, 232},
{97, 129, 233}, {98, 130, 234}, {100, 132, 235}, {101, 133, 236},
{102, 135, 237}, {103, 137, 238}, {105, 138, 239}, {106, 140, 240},
{107, 141, 240}, {109, 143, 241}, {110, 144, 242}, {111, 146, 243},
{113, 147, 244}, {114, 149, 244}, {116, 150, 245}, {117, 152, 246},
{118, 153, 246}, {120, 155, 247}, {121, 156, 248}, {122, 157, 248},
{124, 159, 249}, {125, 160, 249}, {127, 162, 250}, {128, 163, 250},
{129, 164, 251}, {131, 166, 251}, {132, 167, 252}, {133, 168, 252},
{135, 170, 252}, {136, 171, 253}, {138, 172, 253}, {139, 174, 253},
{140, 175, 254}, {142, 176, 254}, {143, 177, 254}, {145, 179, 254},
{146, 180, 254}, {147, 181, 255}, {149, 182, 255}, {150, 183, 255},
{152, 185, 255}, {153, 186, 255}, {154, 187, 255}, {156, 188, 255},
{157, 189, 255}, {158, 190, 255}, {160, 191, 255}, {161, 192, 255},
{163, 193, 255}, {164, 194, 254}, {165, 195, 254}, {167, 196, 254},
{168, 197, 254}, {169, 198, 254}, {171, 199, 253}, {172, 200, 253},
{173, 201, 253}, {175, 202, 252}, {176, 203, 252}, {177, 203, 252},
{179, 204, 251}, {180, 205, 251}, {181, 206, 250}, {183, 207, 250},
{184, 207, 249}, {185, 208, 249}, {186, 209, 248}, {188, 209, 247},
{189, 210, 247}, {190, 211, 246}, {191, 211, 246}, {193, 212, 245},
{194, 213, 244}, {195, 213, 243}, {196, 214, 243}, {198, 214, 242},
{199, 215, 241}, {200, 215, 240}, {201, 216, 239}, {202, 216, 239},
{204, 217, 238}, {205, 217, 237}, {206, 217, 236}, {207, 218, 235},
{208, 218, 234}, {209, 218, 233}, {210, 219, 232}, {211, 219, 231},
{212, 219, 230}, {214, 220, 229}, {215, 220, 228}, {216, 220, 227},
{217, 220, 225}, {218, 220, 224}, {219, 220, 223}, {220, 221, 222},
{221, 221, 221}, {222, 220, 219}, {223, 220, 218}, {224, 219, 216},
{225, 219, 215}, {226, 218, 214}, {227, 218, 212}, {228, 217, 211},
{229, 216, 209}, {230, 216, 208}, {231, 215, 206}, {232, 215, 205},
{233, 214, 203}, {233, 213, 202}, {234, 212, 200}, {235, 212, 199},
{236, 211, 197}, {237, 210, 196}, {237, 209, 194}, {238, 208, 193},
{239, 208, 191}, {239, 207, 190}, {240, 206, 188}, {240, 205, 187},
{241, 204, 185}, {242, 203, 183}, {242, 202, 182}, {243, 201, 180},
{243, 200, 179}, {243, 199, 177}, {244, 198, 176}, {244, 197, 174},
{245, 196, 173}, {245, 195, 171}, {245, 194, 169}, {246, 193, 168},
{246, 192, 166}, {246, 190, 165}, {246, 189, 163}, {247, 188, 161},
{247, 187, 160}, {247, 186, 158}, {247, 184, 157}, {247, 183, 155},
{247, 182, 153}, {247, 181, 152}, {247, 179, 150}, {247, 178, 149},
{247, 177, 147}, {247, 175, 146}, {247, 174, 144}, {247, 172, 142},
{247, 171, 141}, {247, 170, 139}, {247, 168, 138}, {247, 167, 136},
{247, 165, 135}, {246, 164, 133}, {246, 162, 131}, {246, 161, 130},
{246, 159, 128}, {245, 158, 127}, {245, 156, 125}, {245, 155, 124},
{244, 153, 122}, {244, 151, 121}, {243, 150, 119}, {243, 148, 117},
{242, 147, 116}, {242, 145, 114}, {241, 143, 113}, {241, 142, 111},
{240, 140, 110}, {240, 138, 108}, {239, 136, 107}, {239, 135, 105},
{238, 133, 104}, {237, 131, 102}, {237, 129, 101}, {236, 128, 99},
{235, 126, 98}, {235, 124, 96}, {234, 122, 95}, {233, 120, 94},
{232, 118, 92}, {231, 117, 91}, {230, 115, 89}, {230, 113, 88},
{229, 111, 86}, {228, 109, 85}, {227, 107, 84}, {226, 105, 82},
{225, 103, 81}, {224, 101, 79}, {223, 99, 78}, {222, 97, 77},
{221, 95, 75}, {220, 93, 74}, {219, 91, 73}, {218, 89, 71},
{217, 87, 70}, {215, 85, 69}, {214, 82, 67}, {213, 80, 66},
{212, 78, 65}, {211, 76, 64}, {210, 74, 62}, {208, 71, 61},
{207, 69, 60}, {206, 67, 59}, {204, 64, 57}, {203, 62, 56},
{202, 59, 55}, {200, 57, 54}, {199, 54, 53}, {198, 52, 51},
{196, 49, 50}, {195, 46, 49}, {193, 43, 48}, {192, 40, 47},
{191, 37, 46}, {189, 34, 44}, {188, 30, 43}, {186, 26, 42},
{185, 22, 41}, {183, 17, 40}, {182, 11, 39}, {180, 4, 38}
};
/*
* Save the two dimensional array as a png image
* Arguments:
* double *data - pointer to an array of nx * ny values
* int nx - number of COLUMNS to be written
* int ny - number of ROWS to be written
* char *fname - name of the picture
* char lang - either 'c' or 'f' denoting the memory
* layout. That is, if 'f' is given, then rows
* and columns are swapped.
*/
int save_png(double *data, const int height, const int width,
const char *fname, const char lang)
{
FILE *fp;
png_structp pngstruct_ptr = NULL;
png_infop pnginfo_ptr = NULL;
png_byte **row_pointers = NULL;
int i, j;
/* Default return status is failure */
int status = -1;
int pixel_size = 3;
int depth = 8;
/* Open the file and initialize the png library.
* Note that in error cases we jump to clean up
* parts in the end of this function using goto. */
fp = fopen(fname, "wb");
if (fp == NULL) {
goto fopen_failed;
}
pngstruct_ptr =
png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (pngstruct_ptr == NULL) {
goto pngstruct_create_failed;
}
pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
if (pnginfo_ptr == NULL) {
goto pnginfo_create_failed;
}
if (setjmp(png_jmpbuf(pngstruct_ptr))) {
goto setjmp_failed;
}
png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
(size_t) height, depth, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
for (i = 0; i < height; i++) {
png_byte *row = png_malloc(pngstruct_ptr,
sizeof(uint8_t) * width * pixel_size);
row_pointers[i] = row;
/* Branch according to the memory layout */
if (lang == 'c' || lang == 'C') {
for (j = 0; j < width; j++) {
pixel_t pixel;
/* Scale the values so that values between 0 and
* 100 degrees are mapped to values between 0 and 255 */
cmap(data[j + i * width], 2.55, 0.0, &pixel);
*row++ = pixel.red;
*row++ = pixel.green;
*row++ = pixel.blue;
}
} else if (lang == 'f' || lang == 'F') {
for (j = 0; j < width; j++) {
pixel_t pixel;
cmap(data[i + j * height], 2.55, 0.0, &pixel);
*row++ = pixel.red;
*row++ = pixel.green;
*row++ = pixel.blue;
}
} else {
fprintf(stderr, "Unknown memory order %c for pngwriter!\n", lang);
exit(EXIT_FAILURE);
}
}
png_init_io(pngstruct_ptr, fp);
png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
png_write_png(pngstruct_ptr, pnginfo_ptr,
PNG_TRANSFORM_IDENTITY, NULL);
status = 0;
for (i = 0; i < height; i++) {
png_free(pngstruct_ptr, row_pointers[i]);
}
png_free(pngstruct_ptr, row_pointers);
/* Cleanup with labels */
setjmp_failed:
pnginfo_create_failed:
png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
pngstruct_create_failed:
fclose(fp);
fopen_failed:
return status;
}
/*
* This routine sets the RGB values for the pixel_t structure using
* the colormap data heat_colormap. If the value is outside the
* acceptable png values 0, 255 blue or red color is used instead.
*/
void cmap(double value, const double scaling, const double offset,
pixel_t *pix)
{
int ival;
ival = (int)(value * scaling + offset);
if (ival < 0) { /* Colder than colorscale, substitute blue */
pix->red = 0;
pix->green = 0;
pix->blue = 255;
} else if (ival > 255) {
pix->red = 255; /* Hotter than colormap, substitute red */
pix->green = 0;
pix->blue = 0;
} else {
pix->red = heat_colormap[ival][0];
pix->green = heat_colormap[ival][1];
pix->blue = heat_colormap[ival][2];
}
}
#ifndef PNGWRITER_H_
#define PNGWRITER_H_
int save_png(double *data, const int nx, const int ny, const char *fname,
const char lang);
#endif
/* Setup routines for heat equation solver */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include "heat.h"
#include "pngwriter.h"
#define NSTEPS 500 // Default number of iteration steps
/* Initialize the heat equation solver */
void initialize(int argc, char *argv[], field *current,
field *previous, int *nsteps, parallel_data *parallel,
int *iter0)
{
/*
* Following combinations of command line arguments are possible:
* No arguments: use default field dimensions and number of time steps
* One argument: read initial field from a given file
* Two arguments: initial field from file and number of time steps
* Three arguments: field dimensions (rows,cols) and number of time steps
*/
int rows = 2000; //!< Field dimensions with default values
int cols = 2000;
char input_file[64]; //!< Name of the optional input file
int read_file = 0;
*nsteps = NSTEPS;
*iter0 = 0;
switch (argc) {
case 1:
/* Use default values */
break;
case 2:
/* Read initial field from a file */
strncpy(input_file, argv[1], 64);
read_file = 1;
break;
case 3:
/* Read initial field from a file */
strncpy(input_file, argv[1], 64);
read_file = 1;
/* Number of time steps */
*nsteps = atoi(argv[2]);
break;
case 4:
/* Field dimensions */
rows = atoi(argv[1]);
cols = atoi(argv[2]);
/* Number of time steps */
*nsteps = atoi(argv[3]);
break;
default:
printf("Unsupported number of command line arguments\n");
exit(-1);
}
// Check if checkpoint exists
if (!access(CHECKPOINT, F_OK)) {
read_restart(current, parallel, iter0);
set_field_dimensions(previous, current->nx_full, current->ny_full,
parallel);
allocate_field(previous);
if (parallel->rank == 0)
printf("Restarting from an earlier checkpoint saved"
" at iteration %d.\n", *iter0);
copy_field(current, previous);
} else if (read_file) {
read_field(current, previous, input_file, parallel);
} else {
parallel_setup(parallel, rows, cols);
set_field_dimensions(current, rows, cols, parallel);
set_field_dimensions(previous, rows, cols, parallel);
generate_field(current, parallel);
allocate_field(previous);
copy_field(current, previous);
}
}
/* Generate initial temperature field. Pattern is disc with a radius
* of nx_full / 6 in the center of the grid.
* Boundary conditions are (different) constant temperatures outside the grid */
void generate_field(field *temperature, parallel_data *parallel)
{
int i, j, ind, width;
double radius;
int dx, dy;
int dims[2], coords[2], periods[2];
/* Allocate the temperature array, note that
* we have to allocate also the ghost layers */
temperature->data =
malloc_2d(temperature->nx + 2, temperature->ny + 2);
MPI_Cart_get(parallel->comm, 2, dims, periods, coords);
/* Radius of the source disc */
radius = temperature->nx_full / 6.0;
width = temperature->ny + 2;
for (i = 0; i < temperature->nx + 2; i++) {
for (j = 0; j < temperature->ny + 2; j++) {
/* Distance of point i, j from the origin */
dx = i + coords[0] * temperature->nx -
temperature->nx_full / 2 + 1;
dy = j + coords[1] * temperature->ny -
temperature->ny_full / 2 + 1;
ind = idx(i, j, width);
if (dx * dx + dy * dy < radius * radius) {
temperature->data[ind] = 5.0;
} else {
temperature->data[ind] = 65.0;
}
}
}
/* Boundary conditions */
// Left boundary
if (coords[1] == 0) {
for (i = 0; i < temperature->nx + 2; i++) {
ind = idx(i, 0, width);
temperature->data[ind] = 20.0;
}
}
// Right boundary
if (coords[1] == dims[1] - 1) {
for (i = 0; i < temperature->nx + 2; i++) {
ind = idx(i, temperature->ny + 1, width);
temperature->data[ind] = 70.0;
}
}
// Upper boundary
if (coords[0] == 0) {
for (j = 0; j < temperature->ny + 2; j++) {
ind = idx(0, j, width);
temperature->data[ind] = 85.0;
}
}
// Lower boundary
if (coords[0] == dims[0] - 1) {
for (j = 0; j < temperature->ny + 2; j++) {
ind = idx(temperature->nx + 1, j, width);
temperature->data[ind] = 5.0;
}
}
}
/* Set dimensions of the field. Note that the nx is the size of the first
* dimension and ny the second. */
void set_field_dimensions(field *temperature, int nx, int ny,
parallel_data *parallel)
{
int nx_local, ny_local;
int dims[2], coords[2], periods[2];
MPI_Cart_get(parallel->comm, 2, dims, periods, coords);
nx_local = nx / dims[0];
ny_local = ny / dims[1];
temperature->dx = DX;
temperature->dy = DY;
temperature->nx = nx_local;
temperature->ny = ny_local;
temperature->nx_full = nx;
temperature->ny_full = ny;
}
void parallel_setup(parallel_data *parallel, int nx, int ny)
{
int nx_local;
int ny_local;
int world_size;
int dims[2] = {0, 0};
int periods[2] = { 0, 0 };
/* Set grid dimensions */
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Dims_create(world_size, 2, dims);
nx_local = nx / dims[0];
ny_local = ny / dims[1];
if (nx_local * dims[0] != nx) {
printf("Cannot divide grid evenly to processors in x-direction "
"%d x %d != %d\n", nx_local, dims[0], nx);
MPI_Abort(MPI_COMM_WORLD, -2);
}
if (ny_local * dims[1] != ny) {
printf("Cannot divide grid evenly to processors in y-direction "
"%d x %d != %d\n", ny_local, dims[1], ny);
MPI_Abort(MPI_COMM_WORLD, -2);
}
/* Create cartesian communicator */
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &parallel->comm);
MPI_Cart_shift(parallel->comm, 0, 1, &parallel->nup, &parallel->ndown);
MPI_Cart_shift(parallel->comm, 1, 1, &parallel->nleft,
&parallel->nright);
MPI_Comm_size(parallel->comm, &parallel->size);
MPI_Comm_rank(parallel->comm, &parallel->rank);
if (parallel->rank == 0) {
printf("Using domain decomposition %d x %d\n", dims[0], dims[1]);
printf("Local domain size %d x %d\n", nx_local, ny_local);
}
/* Create datatypes for halo exchange */
MPI_Type_vector(nx_local + 2, 1, ny_local + 2, MPI_DOUBLE,
&parallel->columntype);
MPI_Type_contiguous(ny_local + 2, MPI_DOUBLE, &parallel->rowtype);
MPI_Type_commit(&parallel->columntype);
MPI_Type_commit(&parallel->rowtype);
/* Create datatype for subblock needed in text I/O
* Rank 0 uses datatype for receiving data into full array while
* other ranks use datatype for sending the inner part of array */
int sizes[2] = {nx_local + 2, ny_local + 2};
int subsizes[2] = { nx_local, ny_local };
int offsets[2] = {1, 1};
if (parallel->rank == 0) {
sizes[0] = nx;
sizes[1] = ny;
offsets[0] = 0;
offsets[1] = 0;
}
MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
MPI_DOUBLE, &parallel->subarraytype);
MPI_Type_commit(&parallel->subarraytype);
/* Create datatypes for restart I/O
* For boundary ranks also the ghost layer (boundary condition)
* is written */
int coords[2];
MPI_Cart_coords(parallel->comm, parallel->rank, 2, coords);
sizes[0] = nx + 2;
sizes[1] = ny + 2;
offsets[0] = 1 + coords[0] * nx_local;
offsets[1] = 1 + coords[1] * ny_local;
if (coords[0] == 0) {
offsets[0] -= 1;
subsizes[0] += 1;
}
if (coords[0] == dims[0] - 1) {
subsizes[0] += 1;
}
if (coords[1] == 0) {
offsets[1] -= 1;
subsizes[1] += 1;
}
if (coords[1] == dims[1] - 1) {
subsizes[1] += 1;
}
MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
MPI_DOUBLE, &parallel->filetype);
MPI_Type_commit(&parallel->filetype);
sizes[0] = nx_local + 2;
sizes[1] = ny_local + 2;
offsets[0] = 1;
offsets[1] = 1;
if (coords[0] == 0) {
offsets[0] = 0;
}
if (coords[1] == 0) {
offsets[1] = 0;
}
MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
MPI_DOUBLE, &parallel->restarttype);
MPI_Type_commit(&parallel->restarttype);
}
/* Deallocate the 2D arrays of temperature fields */
void finalize(field *temperature1, field *temperature2,
parallel_data *parallel)
{
free_2d(temperature1->data);
free_2d(temperature2->data);
MPI_Type_free(&parallel->rowtype);
MPI_Type_free(&parallel->columntype);
MPI_Type_free(&parallel->subarraytype);
MPI_Type_free(&parallel->restarttype);
MPI_Type_free(&parallel->filetype);
}
/* Utility functions for heat equation solver
* NOTE: This file does not need to be edited! */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include "heat.h"
/* Utility routine for allocating a two dimensional array */
double *malloc_2d(int nx, int ny)
{
double *array;
array = (double *) malloc(nx * ny * sizeof(double));
return array;
}
/* Utility routine for deallocating a two dimensional array */
void free_2d(double *array)
{
free(array);
}
/* Copy data on temperature1 into temperature2 */
void copy_field(field *temperature1, field *temperature2)
{
assert(temperature1->nx == temperature2->nx);
assert(temperature1->ny == temperature2->ny);
memcpy(temperature2->data, temperature1->data,
(temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double));
}
/* Swap the data of fields temperature1 and temperature2 */
void swap_fields(field *temperature1, field *temperature2)
{
double *tmp;
tmp = temperature1->data;
temperature1->data = temperature2->data;
temperature2->data = tmp;
}
/* Allocate memory for a temperature field and initialise it to zero */
void allocate_field(field *temperature)
{
// Allocate also ghost layers
temperature->data =
malloc_2d(temperature->nx + 2, temperature->ny + 2);
// Initialize to zero
memset(temperature->data, 0.0,
(temperature->nx + 2) * (temperature->ny + 2) * sizeof(double));
}
FC=mpif90
CC=gcc
FCFLAGS=-O3 -Wall
CCFLAGS=-O3 -Wall
LDFLAGS=
LIBS=-lpng
EXE=heat_mpi
OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
OBJS_PNG=pngwriter.o
all: $(EXE)
pngwriter.o: pngwriter.c pngwriter.h
core.o: core.F90 heat_mod.o
utilities.o: utilities.F90 heat_mod.o
io.o: io.F90 heat_mod.o pngwriter_mod.o
setup.o: setup.F90 heat_mod.o utilities.o io.o
pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
$(EXE): $(OBJS) $(OBJS_PNG)
$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
%.o: %.F90
$(FC) $(FCFLAGS) -c $< -o $@
%.o: %.c
$(CC) $(CCFLAGS) -c $< -o $@
.PHONY: clean
clean:
-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
This source diff could not be displayed because it is too large. You can view the blob instead.
! Main solver routines for heat equation solver
module core
use heat
contains
! Exchange the boundary data between MPI tasks
subroutine exchange_init(field0, parallel)
use mpi
implicit none
type(field), intent(inout) :: field0
type(parallel_data), intent(in) :: parallel
integer :: ierr
! Send to left, receive from right
call mpi_isend(field0%data(0, 1), 1, parallel%columntype, &
& parallel%nleft, 11, parallel%comm, parallel%requests(1), ierr)
call mpi_irecv(field0%data(0, field0%ny + 1), 1, parallel%columntype, &
& parallel%nright, 11, &
& parallel%comm, parallel%requests(2), ierr)
! Send to right, receive from left
call mpi_isend(field0%data(0, field0%ny), 1, parallel%columntype, &
& parallel%nright, 12, parallel%comm, parallel%requests(3), ierr)
call mpi_irecv(field0%data(0, 0), 1, parallel%columntype, &
& parallel%nleft, 12, &
& parallel%comm, parallel%requests(4), ierr)
! Send to up receive from down
call mpi_isend(field0%data(1, 0), 1, parallel%rowtype, &
& parallel%nup, 13, parallel%comm, parallel%requests(5), ierr)
call mpi_irecv(field0%data(field0%nx+1, 0), 1, parallel%rowtype, &
& parallel%ndown, 13, parallel%comm, parallel%requests(6), ierr)
! Send to the down, receive from up
call mpi_isend(field0%data(field0%nx, 0), 1, parallel%rowtype, &
& parallel%ndown, 14, parallel%comm, parallel%requests(7), ierr)
call mpi_irecv(field0%data(0, 0), 1, parallel%rowtype, &
& parallel%nup, 14, parallel%comm, parallel%requests(8), ierr)
end subroutine exchange_init
! Finalize the non-blocking communication
subroutine exchange_finalize(parallel)
use mpi
implicit none
type(parallel_data), intent(inout) :: parallel
integer :: ierr
call mpi_waitall(8, parallel%requests, mpi_statuses_ignore, ierr)
end subroutine exchange_finalize
! Compute one time step of temperature evolution
! Arguments:
! curr (type(field)): current temperature values
! prev (type(field)): values from previous time step
! a (real(dp)): update equation constant
! dt (real(dp)): time step value
subroutine evolve_interior(curr, prev, a, dt)
implicit none
type(field), intent(inout) :: curr, prev
real(dp) :: a, dt
integer :: i, j, nx, ny
nx = curr%nx
ny = curr%ny
do j = 2, ny - 1
do i = 2, nx - 1
curr%data(i, j) = prev%data(i, j) + a * dt * &
& ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
& prev%data(i+1, j)) / curr%dx**2 + &
& (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
& prev%data(i, j+1)) / curr%dy**2)
end do
end do
end subroutine evolve_interior
! Compute one time step of temperature evolution
! Arguments:
! curr (type(field)): current temperature values
! prev (type(field)): values from previous time step
! a (real(dp)): update equation constant
! dt (real(dp)): time step value
! Update only the border-dependent part
subroutine evolve_edges(curr, prev, a, dt)
implicit none
type(field), intent(inout) :: curr, prev
real(dp) :: a, dt
integer :: i, j, nx, ny
nx = curr%nx
ny = curr%ny
j = 1
do i = 1, nx
curr%data(i, j) = prev%data(i, j) + a * dt * &
& ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
& prev%data(i+1, j)) / curr%dx**2 + &
& (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
& prev%data(i, j+1)) / curr%dy**2)
end do
j = ny
do i = 1, nx
curr%data(i, j) = prev%data(i, j) + a * dt * &
& ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
& prev%data(i+1, j)) / curr%dx**2 + &
& (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
& prev%data(i, j+1)) / curr%dy**2)
end do
i = 1
do j = 1, ny
curr%data(i, j) = prev%data(i, j) + a * dt * &
& ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
& prev%data(i+1, j)) / curr%dx**2 + &
& (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
& prev%data(i, j+1)) / curr%dy**2)
end do
i = nx
do j = 1, ny
curr%data(i, j) = prev%data(i, j) + a * dt * &
& ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
& prev%data(i+1, j)) / curr%dx**2 + &
& (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
& prev%data(i, j+1)) / curr%dy**2)
end do
end subroutine evolve_edges
end module core
! Field metadata for heat equation solver
module heat
use iso_fortran_env, only : REAL64
implicit none
integer, parameter :: dp = REAL64
real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
type :: field
integer :: nx ! local dimension of the field
integer :: ny
integer :: nx_full ! global dimension of the field
integer :: ny_full
real(dp) :: dx
real(dp) :: dy
real(dp), dimension(:,:), allocatable :: data
end type field
type :: parallel_data
integer :: size
integer :: rank
integer :: nup, ndown, nleft, nright ! Ranks of neighbouring MPI tasks
integer :: comm
integer :: requests(8) ! Non-blocking communication handles
integer :: rowtype ! MPI Datatype for communication of
! rows
integer :: columntype ! MPI Datatype for communication of
! columns
integer :: subarraytype ! MPI Datatype for text I/O
integer :: restarttype ! MPI Datatype for restart I/O
integer :: filetype ! MPI Datatype for file view in
! restart I/O
end type parallel_data
contains
! Initialize the field type metadata
! Arguments:
! field0 (type(field)): input field
! nx, ny, dx, dy: field dimensions and spatial step size
subroutine set_field_dimensions(field0, nx, ny, parallel)
implicit none
type(field), intent(out) :: field0
integer, intent(in) :: nx, ny
type(parallel_data), intent(in) :: parallel
integer :: nx_local, ny_local
integer, dimension(2) :: dims, coords
logical :: periods(2)
integer :: ierr
call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
nx_local = nx / dims(1)
ny_local = ny / dims(2)
field0%dx = DX
field0%dy = DY
field0%nx = nx_local
field0%ny = ny_local
field0%nx_full = nx
field0%ny_full = ny
end subroutine set_field_dimensions
subroutine parallel_setup(parallel, nx, ny)
use mpi
implicit none
type(parallel_data), intent(out) :: parallel
integer, intent(in), optional :: nx, ny
integer :: nx_local, ny_local
integer :: world_size
integer :: dims(2) = (/0, 0/)
logical :: periods(2) = (/.false., .false./)
integer :: coords(2)
integer, dimension(2) :: sizes, subsizes, offsets
integer :: ierr
call mpi_comm_size(MPI_COMM_WORLD, world_size, ierr)
! Set grid dimensions
call mpi_dims_create(world_size, 2, dims, ierr)
nx_local = nx / dims(1)
ny_local = ny / dims(2)
! Ensure that the grid is divisible to the MPI tasks
if (nx_local * dims(1) /= nx) then
write(*,*) 'Cannot divide grid evenly to processors in x-direction', &
& nx_local, dims(1), nx
call mpi_abort(MPI_COMM_WORLD, -2, ierr)
end if
if (ny_local * dims(2) /= ny) then
write(*,*) 'Cannot divide grid evenly to processors in y-direction', &
& ny_local, dims(2), ny
call mpi_abort(MPI_COMM_WORLD, -2, ierr)
end if
! Create cartesian communicator
call mpi_cart_create(MPI_COMM_WORLD, 2, dims, periods, .true., parallel%comm, ierr)
call mpi_cart_shift(parallel%comm, 0, 1, parallel%nup, parallel%ndown, ierr)
call mpi_cart_shift(parallel%comm, 1, 1, parallel%nleft, &
& parallel%nright, ierr)
call mpi_comm_size(parallel%comm, parallel%size, ierr)
call mpi_comm_rank(parallel%comm, parallel%rank, ierr)
if (parallel%rank == 0) then
write(*,'(A, I3, A3, I3)') 'Using domain decomposition', dims(1), 'x', &
& dims(2)
write(*,'(A, I5, A3, I5)') 'Local domain size', nx_local, 'x', ny_local
end if
! Create datatypes for halo exchange
call mpi_type_vector(ny_local + 2, 1, nx_local + 2, MPI_DOUBLE_PRECISION, &
& parallel%rowtype, ierr)
call mpi_type_contiguous(nx_local + 2, MPI_DOUBLE_PRECISION, &
& parallel%columntype, ierr)
call mpi_type_commit(parallel%rowtype, ierr)
call mpi_type_commit(parallel%columntype, ierr)
! Create datatype for subblock needed in I/O
! Rank 0 uses datatype for receiving data into full array while
! other ranks use datatype for sending the inner part of array
sizes(1) = nx_local + 2
sizes(2) = ny_local + 2
subsizes(1) = nx_local
subsizes(2) = ny_local
offsets(1) = 1
offsets(2) = 1
if (parallel%rank == 0) then
sizes(1) = nx
sizes(2) = ny
offsets(1) = 0
offsets(2) = 0
end if
call mpi_type_create_subarray(2, sizes, subsizes, offsets, &
& MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%subarraytype, &
& ierr)
call mpi_type_commit(parallel%subarraytype, ierr)
! Create datatypes for restart I/O
! For boundary ranks also the ghost layer (boundary condition)
! is written
call mpi_cart_coords(parallel%comm, parallel%rank, 2, coords, ierr);
sizes(1) = nx + 2
sizes(2) = ny + 2
offsets(1) = 1 + coords(1) * nx_local
offsets(2) = 1 + coords(2) * ny_local
if (coords(1) == 0) then
offsets(1) = offsets(1) - 1
subsizes(1) = subsizes(1) + 1
end if
if (coords(1) == dims(1) - 1) then
subsizes(1) = subsizes(1) + 1;
end if
if (coords(2) == 0) then
offsets(2) = offsets(2) - 1
subsizes(2) = subsizes(2) + 1
end if
if (coords(2) == dims(2) - 1) then
subsizes(2) = subsizes(2) + 1
end if
call mpi_type_create_subarray(2, sizes, subsizes, offsets, &
& MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%filetype, ierr)
call mpi_type_commit(parallel%filetype, ierr)
sizes(1) = nx_local + 2
sizes(2) = ny_local + 2
offsets(1) = 1
offsets(2) = 1
if (coords(1) == 0) then
offsets(1) = 0
end if
if (coords(2) == 0) then
offsets(2) = 0
end if
call mpi_type_create_subarray(2, sizes, subsizes, offsets, &
& MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%restarttype, ierr)
call mpi_type_commit(parallel%restarttype, ierr)
end subroutine parallel_setup
end module heat
! I/O routines for heat equation solver
module io
use heat
use mpi
contains
! Output routine, saves the temperature distribution as a png image
! Arguments:
! curr (type(field)): variable with the temperature data
! iter (integer): index of the time step
subroutine write_field(curr, iter, parallel)
use pngwriter
implicit none
type(field), intent(in) :: curr
integer, intent(in) :: iter
type(parallel_data), intent(in) :: parallel
character(len=85) :: filename
integer :: stat
real(dp), dimension(:,:), allocatable, target :: full_data
integer :: coords(2)
integer :: ix, jy
integer :: p, ierr
if (parallel%rank == 0) then
allocate(full_data(curr%nx_full, curr%ny_full))
! Copy rand #0 data to the global array
full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
! Receive data from other ranks
do p = 1, parallel%size - 1
call mpi_cart_coords(parallel%comm, p, 2, coords, ierr)
ix = coords(1) * curr%nx + 1
jy = coords(2) * curr%ny + 1
call mpi_recv(full_data(ix, jy), 1, parallel%subarraytype, p, 22, &
& parallel%comm, MPI_STATUS_IGNORE, ierr)
end do
write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
stat = save_png(full_data, curr%nx_full, curr%ny_full, filename)
deallocate(full_data)
else
! Send data
call mpi_ssend(curr%data, 1, parallel%subarraytype, 0, 22, &
& parallel%comm, ierr)
end if
end subroutine write_field
! Reads the temperature distribution from an input file
! Arguments:
! field0 (type(field)): field variable that will store the
! read data
! filename (char): name of the input file
! Note that this version assumes the input data to be in C memory layout
subroutine read_field(field0, filename, parallel)
implicit none
type(field), intent(out) :: field0
character(len=85), intent(in) :: filename
type(parallel_data), intent(out) :: parallel
integer :: nx, ny, i, p, ierr
character(len=2) :: dummy
real(dp), dimension(:,:), allocatable :: full_data
integer :: coords(2)
integer :: ix, jy
open(10, file=filename)
! Read the header
read(10, *) dummy, nx, ny
call parallel_setup(parallel, nx, ny)
call set_field_dimensions(field0, nx, ny, parallel)
! The arrays for temperature field contain also a halo region
allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
if (parallel%rank == 0) then
allocate(full_data(nx, ny))
! Read the data
do i = 1, nx
read(10, *) full_data(i, 1:ny)
end do
! Copy own local part
field0%data(1:field0%nx, 1:field0%ny) = full_data(1:field0%nx, 1:field0%ny)
! Send to other process
do p=1, parallel%size - 1
call mpi_cart_coords(parallel%comm, p, 2, coords, ierr)
ix = coords(1) * field0%nx + 1
jy = coords(2) * field0%ny + 1
call mpi_send(full_data(ix, jy), 1, parallel%subarraytype, p, 44, &
& parallel%comm, ierr)
end do
else
! Receive data
call mpi_recv(field0%data, 1, parallel%subarraytype, 0, 44, &
& parallel%comm, MPI_STATUS_IGNORE, ierr)
end if
! Set the boundary values
field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
field0%data(1:field0%nx, field0%ny+1) = field0%data(1:field0%nx, field0%ny)
field0%data(0, 0:field0%ny+1) = field0%data(1, 0:field0%ny+1)
field0%data(field0%nx+1, 0:field0%ny+1) = field0%data(field0%nx, 0:field0%ny+1)
close(10)
if (parallel%rank == 0) then
deallocate(full_data)
end if
end subroutine read_field
! Write a restart checkpoint that contains field dimensions, current
! iteration number and temperature field.
subroutine write_restart(temp, parallel, iter)
implicit none
type(field), intent(in) :: temp
type(parallel_data), intent(in) :: parallel
integer, intent(in) :: iter
integer :: fp
integer(kind=MPI_OFFSET_KIND) :: disp
integer :: ierr
call mpi_file_open(MPI_COMM_WORLD, "HEAT_RESTART.dat", &
& MPI_MODE_CREATE + MPI_MODE_WRONLY, &
& MPI_INFO_NULL, fp, ierr)
! write the restart header by the rank#0
if (parallel%rank == 0) then
disp = 0
call mpi_file_write_at(fp, disp, temp%nx_full, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
disp = disp + sizeof(temp%nx_full)
call mpi_file_write_at(fp, disp, temp%ny_full, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
disp = disp + sizeof(temp%ny_full)
call mpi_file_write_at(fp, disp, iter, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
end if
! For simplicity, skip three integers at the beginning of file in all tasks
disp = 3 * sizeof(temp%nx_full)
call mpi_file_set_view(fp, 0, MPI_DOUBLE_PRECISION, parallel%filetype, &
& 'native', MPI_INFO_NULL, ierr);
call mpi_file_write_at_all(fp, disp, temp%data, &
& 1, parallel%restarttype, MPI_STATUS_IGNORE, ierr)
call mpi_file_close(fp, ierr)
end subroutine write_restart
! Read a restart checkpoint that contains field dimensions, current
! iteration number and temperature field.
subroutine read_restart(temp, parallel, iter)
implicit none
type(field), intent(inout) :: temp
type(parallel_data), intent(inout) :: parallel
integer, intent(out) :: iter
integer :: rows, cols
integer :: fp
integer(kind=MPI_OFFSET_KIND) :: disp
integer :: ierr
call mpi_file_open(MPI_COMM_WORLD, "HEAT_RESTART.dat", MPI_MODE_RDONLY, &
& MPI_INFO_NULL, fp, ierr)
! read in the restart header (dimensions, number of preceding steps) by the
! rank#0 and broadcast it to other ranks
if (parallel%rank == 0) then
disp = 0
call mpi_file_read_at(fp, disp, rows, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
disp = disp + sizeof(rows)
call mpi_file_read_at(fp, disp, cols, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
disp = disp + sizeof(cols)
call mpi_file_read_at(fp, disp, iter, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
end if
call mpi_bcast(rows, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call mpi_bcast(cols, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call mpi_bcast(iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
! set correct dimensions to MPI metadata
call parallel_setup(parallel, rows, cols)
! set local dimensions and allocate memory for the data
call set_field_dimensions(temp, rows, cols, parallel)
allocate(temp%data(0:temp%nx+1, 0:temp%ny+1))
temp%data(:,:) = 0.0
! read in restart data
disp = 3*sizeof(rows)
call mpi_file_set_view(fp, 0, MPI_DOUBLE_PRECISION, parallel%filetype, &
& 'native', MPI_INFO_NULL, ierr);
call mpi_file_read_at_all(fp, disp, temp%data, &
& 1, parallel%restarttype, MPI_STATUS_IGNORE, ierr)
call mpi_file_close(fp, ierr)
end subroutine read_restart
end module io
! Heat equation solver in 2D.
program heat_solve
use heat
use core
use io
use setup
use utilities
use mpi
implicit none
real(dp), parameter :: a = 0.5 ! Diffusion constant
type(field) :: current, previous ! Current and previus temperature fields
real(dp) :: dt ! Time step
integer :: nsteps ! Number of time steps
integer, parameter :: image_interval = 500 ! Image output interval
integer, parameter :: checkpoint_interval = 200 ! restart interval
type(parallel_data) :: parallelization
integer :: ierr
integer :: iter, iter0
real(kind=dp) :: start, stop ! Timers
call mpi_init(ierr)
call initialize(current, previous, nsteps, parallelization, iter0)
! Draw the picture of the initial state
call write_field(current, iter0, parallelization)
iter0 = iter0 + 1
! Largest stable time step
dt = current%dx**2 * current%dy**2 / &
& (2.0 * a * (current%dx**2 + current%dy**2))
! Main iteration loop, save a picture every
! image_interval steps
start = mpi_wtime()
do iter = iter0, iter0 + nsteps
call exchange_init(previous, parallelization)
call evolve_interior(current, previous, a, dt)
call exchange_finalize(parallelization)
call evolve_edges(current,previous, a, dt)
if (mod(iter, image_interval) == 0) then
call write_field(current, iter, parallelization)
if (mod(iter, checkpoint_interval) == 0) then
call write_restart(current, parallelization, iter)
end if
end if
call swap_fields(current, previous)
end do
stop = mpi_wtime()
if (parallelization % rank == 0) then
write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5)
end if
call finalize(current, previous)
call mpi_finalize(ierr)
end program heat_solve