bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6
--- a/datatypes/python/solution/columntype.py
+++ b/datatypes/python/solution/columntype.py
+from __future__ import print_function
+from mpi4py import MPI
+import numpy as np
+
+comm = MPI.COMM_WORLD
+size = comm.Get_size()
+rank = comm.Get_rank()
+
+if size != 2:
+    raise RuntimeError("Please run with two MPI tasks")
+
+data = np.zeros((8,8), int)
+if rank == 0:
+    for i in range(8):
+        data[i,:] = np.arange(1, 9) + (i+1) * 10
+
+if rank == 0:
+    print("Original data:")
+    print(data)
+
+# Create the custom datatype
+# Note: Python integers are 64 bits
+columntype = MPI.INT64_T.Create_vector(8, 1, 8)
+columntype.Commit()
+
+# mpi4py requires that the input and output buffers are contiguous 
+# in memory. Thus, in order to send/receive from second column we need
+# to create a contiguous view starting from the second element in memory
+# which can be accomplished by flattening the array into 1d with ravel
+if rank == 0:
+    comm.Send((data.ravel()[1:], 1, columntype), dest=1)
+elif rank == 1:
+    comm.Recv((data.ravel()[1:], 1, columntype), source=0)
+
+if rank == 1:
+    print("Received data:")
+    print(data)
--- a/datatypes/python/solution/indexed_type.py
+++ b/datatypes/python/solution/indexed_type.py
+from __future__ import print_function
+from mpi4py import MPI
+import numpy as np
+
+comm = MPI.COMM_WORLD
+size = comm.Get_size()
+rank = comm.Get_rank()
+
+if size != 2:
+    raise RuntimeError("Please run with two MPI tasks")
+
+data = np.zeros((8,8), int)
+if rank == 0:
+    for i in range(8):
+        data[i,:] = np.arange(1, 9) + (i+1) * 10
+
+if rank == 0:
+    print("Original data:")
+    print(data)
+
+# Create the custom datatype
+# Note: Python integers are 64 bits
+counts = np.arange(4, dtype=int) + 1
+displacements = np.arange(4, dtype=int) * (1 + 2 * data.itemsize)
+indexedtype = MPI.INT64_T.Create_indexed(counts, displacements)
+indexedtype.Commit()
+
+if rank == 0:
+    comm.Send((data, 1, indexedtype), dest=1)
+elif rank == 1:
+    comm.Recv((data, 1, indexedtype), source=0)
+
+if rank == 1:
+    print("Received data:")
+    print(data)
--- a/heat-equation/LICENSE.txt
+++ b/heat-equation/LICENSE.txt
+
+Copyright (C) 2018  CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+<http://www.gnu.org/licenses/>.
+
--- a/heat-equation/README.md
+++ b/heat-equation/README.md
+Two dimensional heat equation
+=============================
+This folder contains a code which solves two dimensional heat equation
+with MPI parallelization. The code features non-blocking point-to-point 
+communication, user defined datatypes, collective communication, 
+and parallel I/O with MPI I/O.
+
+Heat (or diffusion) equation is
+
+<!-- Equation
+\frac{\partial u}{\partial t} = \alpha \nabla^2 u
+--> 
+![img](http://quicklatex.com/cache3/d2/ql_b3f6b8bdc3a8862c73c5a97862afb9d2_l3.png)
+
+where **u(x, y, t)** is the temperature field that varies in space and time,
+and α is thermal diffusivity constant. The two dimensional Laplacian can be
+discretized with finite differences as
+
+<!-- Equation
+\begin{align*}
+\nabla^2 u  &= \frac{u(i-1,j)-2u(i,j)+u(i+1,j)}{(\Delta x)^2} \\
+ &+ \frac{u(i,j-1)-2u(i,j)+u(i,j+1)}{(\Delta y)^2}
+\end{align*}
+--> 
+![img](http://quicklatex.com/cache3/2d/ql_59f49ed64dbbe76704e0679b8ad7c22d_l3.png)
+
+Given an initial condition (u(t=0) = u0) one can follow the time dependence of
+the temperature field with explicit time evolution method:
+
+<!-- Equation
+u^{m+1}(i,j) = u^m(i,j) + \Delta t \alpha \nabla^2 u^m(i,j) 
+--> 
+![img](http://quicklatex.com/cache3/9e/ql_9eb7ce5f3d5eccd6cfc1ff5638bf199e_l3.png)
+
+Note: Algorithm is stable only when
+
+<!-- Equation
+\Delta t < \frac{1}{2 \alpha} \frac{(\Delta x \Delta y)^2}{(\Delta x)^2 (\Delta y)^2}
+-->
+![img](http://quicklatex.com/cache3/d1/ql_0e7107049c9183d11dbb1e81174280d1_l3.png)
+
+The two dimensional grid is decomposed along both dimensions, and the
+communication of boundary data is overlapped with computation. Restart files
+are written and read with MPI I/O.
+
+Compilation instructions
+------------------------
+For building and running the example one needs to have the
+[libpng](http://www.libpng.org/pub/png/libpng.html) library installed. In
+addition, working MPI environment is required. For Python version mpi4py and
+matplotlib are needed.
+
+Move to proper subfolder (C or Fortran) and modify the top of the **Makefile**
+according to your environment (proper compiler commands and compiler flags).
+Code can be build simple with **make**
+
+How to run
+----------
+The number of MPI ranks has to be a factor of the grid dimension (default 
+dimension is 2000). The default initial temperature field is a disk. Initial
+temperature field can be read also from a file, the provided **bottle.dat** 
+illustrates what happens to a cold soda bottle in sauna.
+
+
+ * Running with defaults: mpirun -np 4 ./heat_mpi
+ * Initial field from a file: mpirun -np 4 ./heat_mpi bottle.dat
+ * Initial field from a file, given number of time steps:
+   mpirun -np 4 ./heat_mpi bottle.dat 1000
+ * Defauls pattern with given dimensions and time steps:
+   mpirun -np 4 ./heat_mpi 800 800 1000
+
+  The program produces a series of heat_XXXX.png files which show the
+  time development of the temperature field
+
--- a/heat-equation/c/Makefile
+++ b/heat-equation/c/Makefile
+CC=mpicc
+CCFLAGS=-O3 -Wall 
+LDFLAGS=
+LIBS=-lpng -lm
+
+EXE=heat_mpi
+OBJS=core.o setup.o utilities.o io.o main.o
+OBJS_PNG=pngwriter.o
+
+
+all: $(EXE)
+
+pngwriter.o: pngwriter.c pngwriter.h
+core.o: core.c heat.h
+utilities.o: utilities.c heat.h
+setup.o: setup.c heat.h
+io.o: io.c heat.h
+main.o: main.c heat.h
+
+$(OBJS_PNG): C_COMPILER := $(CC)
+$(OBJS): C_COMPILER := $(CC)
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+	$(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.c
+	$(C_COMPILER) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+	-/bin/rm -f $(EXE) a.out *.o *.png *~
--- a/heat-equation/c/bottle.dat
+++ b/heat-equation/c/bottle.dat
--- a/heat-equation/c/core.c
+++ b/heat-equation/c/core.c
+/* Main solver routines for heat equation solver */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <mpi.h>
+
+#include "heat.h"
+
+/* Exchange the boundary values */
+void exchange_init(field *temperature, parallel_data *parallel)
+{
+    int ind, width;
+    width = temperature->ny + 2;
+    // Send to the up, receive from down
+    ind = idx(1, 0, width);
+    MPI_Isend(&temperature->data[ind], 1, parallel->rowtype,
+              parallel->nup, 11, parallel->comm, &parallel->requests[0]);
+    ind = idx(temperature->nx + 1, 0, width);
+    MPI_Irecv(&temperature->data[ind], 1, parallel->rowtype, 
+              parallel->ndown, 11, parallel->comm, &parallel->requests[1]);
+    // Send to the down, receive from up
+    ind = idx(temperature->nx, 0, width);
+    MPI_Isend(&temperature->data[ind], 1, parallel->rowtype, 
+              parallel->ndown, 12, parallel->comm, &parallel->requests[2]);
+    ind = idx(0, 0, width);
+    MPI_Irecv(&temperature->data[ind], 1, parallel->rowtype,
+              parallel->nup, 12, parallel->comm, &parallel->requests[3]);
+    // Send to the left, receive from right
+    ind = idx(0, 1, width);
+    MPI_Isend(&temperature->data[ind], 1, parallel->columntype,
+              parallel->nleft, 13, parallel->comm, &parallel->requests[4]); 
+    ind = idx(0, temperature->ny + 1, width);
+    MPI_Irecv(&temperature->data[ind], 1, parallel->columntype, 
+              parallel->nright, 13, parallel->comm, &parallel->requests[5]); 
+    // Send to the right, receive from left
+    ind = idx(0, temperature->ny, width);
+    MPI_Isend(&temperature->data[ind], 1, parallel->columntype,
+              parallel->nright, 14, parallel->comm, &parallel->requests[7]);
+    ind = 0;
+    MPI_Irecv(&temperature->data[ind], 1, parallel->columntype,
+              parallel->nleft, 14, parallel->comm, &parallel->requests[6]);
+
+}
+
+/* complete the non-blocking communication */
+void exchange_finalize(parallel_data *parallel)
+{
+    MPI_Waitall(8, &parallel->requests[0], MPI_STATUSES_IGNORE);
+}
+
+/* Update the temperature values using five-point stencil */
+void evolve_interior(field *curr, field *prev, double a, double dt)
+{
+    int i, j;
+    int ic, iu, id, il, ir; // indexes for center, up, down, left, right
+    int width;
+    width = curr->ny + 2;
+    double dx2, dy2;
+
+    /* Determine the temperature field at next time step
+     * As we have fixed boundary conditions, the outermost gridpoints
+     * are not updated. */
+    dx2 = prev->dx * prev->dx;
+    dy2 = prev->dy * prev->dy;
+    for (i = 2; i < curr->nx; i++) {
+        for (j = 2; j < curr->ny; j++) {
+            ic = idx(i, j, width);
+            iu = idx(i+1, j, width);
+            id = idx(i-1, j, width);
+            ir = idx(i, j+1, width);
+            il = idx(i, j-1, width);
+            curr->data[ic] = prev->data[ic] + a * dt *
+                               ((prev->data[iu] -
+                                 2.0 * prev->data[ic] +
+                                 prev->data[id]) / dx2 +
+                                (prev->data[ir] -
+                                 2.0 * prev->data[ic] +
+                                 prev->data[il]) / dy2);
+        }
+    }
+}
+
+/* Update the temperature values using five-point stencil */
+/* update only the border-dependent regions of the field */
+void evolve_edges(field *curr, field *prev, double a, double dt)
+{
+    int i, j;
+    int ic, iu, id, il, ir; // indexes for center, up, down, left, right
+    int width;
+    width = curr->ny + 2;
+    double dx2, dy2;
+
+    /* Determine the temperature field at next time step
+     * As we have fixed boundary conditions, the outermost gridpoints
+     * are not updated. */
+    dx2 = prev->dx * prev->dx;
+    dy2 = prev->dy * prev->dy;
+    i = 1;
+    for (j = 1; j < curr->ny + 1; j++) {
+        ic = idx(i, j, width);
+        iu = idx(i+1, j, width);
+        id = idx(i-1, j, width);
+        ir = idx(i, j+1, width);
+        il = idx(i, j-1, width);
+        curr->data[ic] = prev->data[ic] + a * dt *
+                           ((prev->data[iu] -
+                             2.0 * prev->data[ic] +
+                             prev->data[id]) / dx2 +
+                            (prev->data[ir] -
+                             2.0 * prev->data[ic] +
+                             prev->data[il]) / dy2);
+    }
+    i = curr -> nx;
+    for (j = 1; j < curr->ny + 1; j++) {
+        ic = idx(i, j, width);
+        iu = idx(i+1, j, width);
+        id = idx(i-1, j, width);
+        ir = idx(i, j+1, width);
+        il = idx(i, j-1, width);
+        curr->data[ic] = prev->data[ic] + a * dt *
+                           ((prev->data[iu] -
+                             2.0 * prev->data[ic] +
+                             prev->data[id]) / dx2 +
+                            (prev->data[ir] -
+                             2.0 * prev->data[ic] +
+                             prev->data[il]) / dy2);
+    }
+    j = 1;
+    for (i = 1; i < curr->nx + 1; i++) {
+        ic = idx(i, j, width);
+        iu = idx(i+1, j, width);
+        id = idx(i-1, j, width);
+        ir = idx(i, j+1, width);
+        il = idx(i, j-1, width);
+        curr->data[ic] = prev->data[ic] + a * dt *
+                           ((prev->data[iu] -
+                             2.0 * prev->data[ic] +
+                             prev->data[id]) / dx2 +
+                            (prev->data[ir] -
+                             2.0 * prev->data[ic] +
+                             prev->data[il]) / dy2);
+    }
+    j = curr -> ny;
+    for (i = 1; i < curr->nx + 1; i++) {
+        ic = idx(i, j, width);
+        iu = idx(i+1, j, width);
+        id = idx(i-1, j, width);
+        ir = idx(i, j+1, width);
+        il = idx(i, j-1, width);
+        curr->data[ic] = prev->data[ic] + a * dt *
+                           ((prev->data[iu] -
+                             2.0 * prev->data[ic] +
+                             prev->data[id]) / dx2 +
+                            (prev->data[ir] -
+                             2.0 * prev->data[ic] +
+                             prev->data[il]) / dy2);
+    }
+}
--- a/heat-equation/c/heat.h
+++ b/heat-equation/c/heat.h
+#ifndef __HEAT_H__
+#define __HEAT_H__
+
+
+/* Datatype for temperature field */
+typedef struct {
+    /* nx and ny are the true dimensions of the field. The array data
+     * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */
+    int nx;                     /* Local dimensions of the field */
+    int ny;
+    int nx_full;                /* Global dimensions of the field */
+    int ny_full;                /* Global dimensions of the field */
+    double dx;
+    double dy;
+    double *data;
+} field;
+
+/* Datatype for basic parallelization information */
+typedef struct {
+    int size;                   /* Number of MPI tasks */
+    int rank;
+    int nup, ndown, nleft, nright; /* Ranks of neighbouring MPI tasks */
+    MPI_Comm comm;             /* Cartesian communicator */
+    MPI_Request requests[8];   /* Requests for non-blocking communication */
+    MPI_Datatype rowtype;      /* MPI Datatype for communication of rows */
+    MPI_Datatype columntype;   /* MPI Datatype for communication of columns */
+    MPI_Datatype subarraytype; /* MPI Datatype for communication in text I/O */
+    MPI_Datatype restarttype;  /* MPI Datatype for communication in restart I/O */
+    MPI_Datatype filetype;     /* MPI Datatype for file view in restart I/O */
+} parallel_data;
+
+
+/* We use here fixed grid spacing */
+#define DX 0.01
+#define DY 0.01
+
+/* file name for restart checkpoints*/
+#define CHECKPOINT "HEAT_RESTART.dat"
+
+/* Inline function for indexing the 2D arrays */
+static inline int idx(int i, int j, int width)
+{
+    return i * width + j;
+}
+
+/* Function prototypes */
+double *malloc_2d(int nx, int ny);
+
+void free_2d(double *array);
+
+void set_field_dimensions(field *temperature, int nx, int ny,
+                          parallel_data *parallel);
+
+void parallel_setup(parallel_data *parallel, int nx, int ny);
+
+void initialize(int argc, char *argv[], field *temperature1,
+                field *temperature2, int *nsteps, parallel_data *parallel,
+                int *iter0);
+
+void generate_field(field *temperature, parallel_data *parallel);
+
+void exchange_init(field *temperature, parallel_data *parallel);
+
+void exchange_finalize(parallel_data *parallel);
+
+void evolve_interior(field *curr, field *prev, double a, double dt);
+
+void evolve_edges(field *curr, field *prev, double a, double dt);
+
+void write_field(field *temperature, int iter, parallel_data *parallel);
+
+void read_field(field *temperature1, field *temperature2,
+                char *filename, parallel_data *parallel);
+
+void write_restart(field *temperature, parallel_data *parallel, int iter);
+
+void read_restart(field *temperature, parallel_data *parallel, int *iter);
+
+void copy_field(field *temperature1, field *temperature2);
+
+void swap_fields(field *temperature1, field *temperature2);
+
+void allocate_field(field *temperature);
+
+void deallocate_field(field *temperature);
+
+void finalize(field *temperature1, field *temperature2,
+              parallel_data *parallel);
+
+#endif  /* __HEAT_H__ */
+
--- a/heat-equation/c/io.c
+++ b/heat-equation/c/io.c
+/* I/O related functions for heat equation solver */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <mpi.h>
+
+#include "heat.h"
+#include "pngwriter.h"
+
+/* Output routine that prints out a picture of the temperature
+ * distribution. */
+void write_field(field *temperature, int iter, parallel_data *parallel)
+{
+    char filename[64];
+
+    /* The actual write routine takes only the actual data
+     * (without ghost layers) so we need array for that. */
+    int height, width;
+    double *full_data;
+
+    int coords[2];
+    int ix, jy;
+
+    int i, p;
+
+    height = temperature->nx_full;
+    width = temperature->ny_full;
+
+    if (parallel->rank == 0) {
+        /* Copy the inner data */
+        full_data = malloc_2d(height, width);
+        for (i = 0; i < temperature->nx; i++)
+            memcpy(&full_data[idx(i, 0, width)], 
+                     &temperature->data[idx(i+1, 1, temperature->ny + 2)],
+                   temperature->ny * sizeof(double));
+        /* Receive data from other ranks */
+        for (p = 1; p < parallel->size; p++) {
+            MPI_Cart_coords(parallel->comm, p, 2, coords);
+            ix = coords[0] * temperature->nx;
+            jy = coords[1] * temperature->ny;
+            MPI_Recv(&full_data[idx(ix, jy, width)], 1, 
+                     parallel->subarraytype, p, 22, 
+                     parallel->comm, MPI_STATUS_IGNORE);
+        }
+        /* Write out the data to a png file */
+        sprintf(filename, "%s_%04d.png", "heat", iter);
+        save_png(full_data, height, width, filename, 'c');
+        free_2d(full_data);
+    } else {
+        /* Send data */
+        MPI_Ssend(temperature->data, 1, 
+                  parallel->subarraytype, 0,
+                  22, parallel->comm);
+    }
+}
+
+/* Read the initial temperature distribution from a file and
+ * initialize the temperature fields temperature1 and
+ * temperature2 to the same initial state. */
+void read_field(field *temperature1, field *temperature2, char *filename,
+                parallel_data *parallel)
+{
+    FILE *fp;
+    int nx, ny, i, j;
+    double *full_data;
+
+    int coords[2];
+    int ix, jy, p;
+
+    int count;
+
+    fp = fopen(filename, "r");
+    /* Read the header */
+    count = fscanf(fp, "# %d %d \n", &nx, &ny);
+    if (count < 2) {
+        fprintf(stderr, "Error while reading the input file!\n");
+        MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+
+    parallel_setup(parallel, nx, ny);
+    set_field_dimensions(temperature1, nx, ny, parallel);
+    set_field_dimensions(temperature2, nx, ny, parallel);
+
+
+    /* Allocate arrays (including ghost layers) */
+    temperature1->data =
+        malloc_2d(temperature1->nx + 2, temperature1->ny + 2);
+    temperature2->data =
+        malloc_2d(temperature2->nx + 2, temperature2->ny + 2);
+
+    if (parallel->rank == 0) {
+        /* Full array */
+        full_data = malloc_2d(nx, ny);
+
+        /* Read the actual data */
+        for (i = 0; i < nx; i++) {
+            for (j = 0; j < ny; j++) {
+                count = fscanf(fp, "%lf", &full_data[idx(i, j, ny)]);
+            }
+        }
+        /* Copy to own local array */
+        for (i = 0; i < temperature1->nx; i++) {
+            memcpy(&temperature1->data[idx(i+1, 1, temperature1->ny + 2)],
+                   &full_data[idx(i, 0, ny)], temperature1->ny * sizeof(double));
+        }
+        /* Send to other processes */
+        for (p = 1; p < parallel->size; p++) {
+            MPI_Cart_coords(parallel->comm, p, 2, coords);
+            ix = coords[0] * temperature1->nx;
+            jy = coords[1] * temperature1->ny;
+            MPI_Send(&full_data[idx(ix, jy, ny)], 1, parallel->subarraytype, 
+                     p, 44, parallel->comm);
+        }
+    } else {
+        /* Receive data */
+        MPI_Recv(temperature1->data, 1, 
+                 parallel->subarraytype, 0,
+                 44, parallel->comm, MPI_STATUS_IGNORE);
+    }
+
+    /* Set the boundary values */
+    for (i = 0; i < temperature1->nx + 1; i++) {
+        temperature1->data[idx(i, 0, temperature1->ny + 2)] = 
+            temperature1->data[idx(i, 1, temperature1->ny + 2)];
+        temperature1->data[idx(i, temperature1->ny + 1, temperature1->ny + 2)] =
+            temperature1->data[idx(i, temperature1->ny, temperature1->ny + 2)];
+    }
+    for (j = 0; j < temperature1->ny + 2; j++) {
+        temperature1->data[idx(0, j, temperature1->ny + 2)] = 
+                        temperature1->data[idx(1, j, temperature1->ny + 2)];
+        temperature1->data[idx(temperature1->nx + 1, j, temperature1->ny + 2)] =
+            temperature1->data[idx(temperature1->nx, j, temperature1->ny + 2)];
+    }
+
+    copy_field(temperature1, temperature2);
+
+    if (parallel->rank == 0) {
+        free_2d(full_data);
+    }
+
+    fclose(fp);
+}
+
+/* Write a restart checkpoint that contains field dimensions, current
+ * iteration number and temperature field. */
+void write_restart(field *temperature, parallel_data *parallel, int iter)
+{
+    MPI_File fp;
+    MPI_Offset disp;
+
+    // open the file and write the dimensions
+    MPI_File_open(MPI_COMM_WORLD, CHECKPOINT,
+                  MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fp);
+    if (parallel->rank == 0) {
+        MPI_File_write(fp, &temperature->nx_full, 1, MPI_INT,
+                       MPI_STATUS_IGNORE);
+        MPI_File_write(fp, &temperature->ny_full, 1, MPI_INT,
+                       MPI_STATUS_IGNORE);
+        MPI_File_write(fp, &iter, 1, MPI_INT, MPI_STATUS_IGNORE);
+    }
+
+    disp = 3 * sizeof(int);
+    MPI_File_set_view(fp, 0, MPI_DOUBLE, parallel->filetype, "native", 
+                      MPI_INFO_NULL);
+    MPI_File_write_at_all(fp, disp, temperature->data,
+                          1, parallel->restarttype, MPI_STATUS_IGNORE);
+    MPI_File_close(&fp);
+}
+
+/* Read a restart checkpoint that contains field dimensions, current
+ * iteration number and temperature field. */
+void read_restart(field *temperature, parallel_data *parallel, int *iter)
+{
+    MPI_File fp;
+    MPI_Offset disp;
+
+    int nx, ny;
+
+    // open the file and write the dimensions
+    MPI_File_open(MPI_COMM_WORLD, CHECKPOINT, MPI_MODE_RDONLY,
+                  MPI_INFO_NULL, &fp);
+
+    // read grid size and current iteration
+    MPI_File_read_all(fp, &nx, 1, MPI_INT, MPI_STATUS_IGNORE);
+    MPI_File_read_all(fp, &ny, 1, MPI_INT, MPI_STATUS_IGNORE);
+    MPI_File_read_all(fp, iter, 1, MPI_INT, MPI_STATUS_IGNORE);
+    // set correct dimensions to MPI metadata
+    parallel_setup(parallel, nx, ny);
+    // set local dimensions and allocate memory for the data
+    set_field_dimensions(temperature, nx, ny, parallel);
+    allocate_field(temperature);
+
+
+    disp = 3 * sizeof(int);
+    MPI_File_set_view(fp, 0, MPI_DOUBLE, parallel->filetype, "native", 
+                      MPI_INFO_NULL);
+    MPI_File_read_at_all(fp, disp, temperature->data,
+                          1, parallel->restarttype, MPI_STATUS_IGNORE);
+    MPI_File_close(&fp);
+}
--- a/heat-equation/c/main.c
+++ b/heat-equation/c/main.c
+/* Heat equation solver in 2D. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <mpi.h>
+
+#include "heat.h"
+
+int main(int argc, char **argv)
+{
+    double a = 0.5;             //!< Diffusion constant
+    field current, previous;    //!< Current and previous temperature fields
+
+    double dt;                  //!< Time step
+    int nsteps;                 //!< Number of time steps
+
+    int image_interval = 500;    //!< Image output interval
+
+    int restart_interval = 200;  //!< Checkpoint output interval
+
+    parallel_data parallelization; //!< Parallelization info
+
+    int iter, iter0;               //!< Iteration counter
+
+    double dx2, dy2;            //!< delta x and y squared
+
+    double start_clock;        //!< Time stamps
+
+    MPI_Init(&argc, &argv);
+
+    initialize(argc, argv, &current, &previous, &nsteps, 
+               &parallelization, &iter0);
+
+    /* Output the initial field */
+    write_field(&current, iter0, &parallelization);
+    iter0++;
+
+    /* Largest stable time step */
+    dx2 = current.dx * current.dx;
+    dy2 = current.dy * current.dy;
+    dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
+
+    /* Get the start time stamp */
+    start_clock = MPI_Wtime();
+
+    /* Time evolve */
+    for (iter = iter0; iter < iter0 + nsteps; iter++) {
+        exchange_init(&previous, &parallelization);
+        evolve_interior(&current, &previous, a, dt);
+        exchange_finalize(&parallelization);
+        evolve_edges(&current, &previous, a, dt);
+        if (iter % image_interval == 0) {
+            write_field(&current, iter, &parallelization);
+        }
+       /* write a checkpoint now and then for easy restarting */
+        if (iter % restart_interval == 0) {
+            write_restart(&current, &parallelization, iter);
+        }
+        /* Swap current field so that it will be used
+            as previous for next iteration step */
+        swap_fields(&current, &previous);
+    }
+
+    /* Determine the CPU time used for the iteration */
+    if (parallelization.rank == 0) {
+        printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock));
+        printf("Reference value at 5,5: %f\n", 
+                        previous.data[idx(5, 5, current.ny + 2)]);
+    }
+
+    write_field(&current, iter, &parallelization);
+
+    finalize(&current, &previous, &parallelization);
+    MPI_Finalize();
+
+    return 0;
+}
--- a/heat-equation/c/pngwriter.c
+++ b/heat-equation/c/pngwriter.c
+#include <png.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+    uint8_t red;
+    uint8_t green;
+    uint8_t blue;
+} pixel_t;
+
+
+void cmap(double value, const double scaling, const double maxval,
+          pixel_t *pix);
+
+static int heat_colormap[256][3] = {
+    {59, 76, 192}, {59, 76, 192}, {60, 78, 194}, {61, 80, 195},
+    {62, 81, 197}, {64, 83, 198}, {65, 85, 200}, {66, 87, 201},
+    {67, 88, 203}, {68, 90, 204}, {69, 92, 206}, {71, 93, 207},
+    {72, 95, 209}, {73, 97, 210}, {74, 99, 211}, {75, 100, 213},
+    {77, 102, 214}, {78, 104, 215}, {79, 105, 217}, {80, 107, 218},
+    {82, 109, 219}, {83, 110, 221}, {84, 112, 222}, {85, 114, 223},
+    {87, 115, 224}, {88, 117, 225}, {89, 119, 227}, {90, 120, 228},
+    {92, 122, 229}, {93, 124, 230}, {94, 125, 231}, {96, 127, 232},
+    {97, 129, 233}, {98, 130, 234}, {100, 132, 235}, {101, 133, 236},
+    {102, 135, 237}, {103, 137, 238}, {105, 138, 239}, {106, 140, 240},
+    {107, 141, 240}, {109, 143, 241}, {110, 144, 242}, {111, 146, 243},
+    {113, 147, 244}, {114, 149, 244}, {116, 150, 245}, {117, 152, 246},
+    {118, 153, 246}, {120, 155, 247}, {121, 156, 248}, {122, 157, 248},
+    {124, 159, 249}, {125, 160, 249}, {127, 162, 250}, {128, 163, 250},
+    {129, 164, 251}, {131, 166, 251}, {132, 167, 252}, {133, 168, 252},
+    {135, 170, 252}, {136, 171, 253}, {138, 172, 253}, {139, 174, 253},
+    {140, 175, 254}, {142, 176, 254}, {143, 177, 254}, {145, 179, 254},
+    {146, 180, 254}, {147, 181, 255}, {149, 182, 255}, {150, 183, 255},
+    {152, 185, 255}, {153, 186, 255}, {154, 187, 255}, {156, 188, 255},
+    {157, 189, 255}, {158, 190, 255}, {160, 191, 255}, {161, 192, 255},
+    {163, 193, 255}, {164, 194, 254}, {165, 195, 254}, {167, 196, 254},
+    {168, 197, 254}, {169, 198, 254}, {171, 199, 253}, {172, 200, 253},
+    {173, 201, 253}, {175, 202, 252}, {176, 203, 252}, {177, 203, 252},
+    {179, 204, 251}, {180, 205, 251}, {181, 206, 250}, {183, 207, 250},
+    {184, 207, 249}, {185, 208, 249}, {186, 209, 248}, {188, 209, 247},
+    {189, 210, 247}, {190, 211, 246}, {191, 211, 246}, {193, 212, 245},
+    {194, 213, 244}, {195, 213, 243}, {196, 214, 243}, {198, 214, 242},
+    {199, 215, 241}, {200, 215, 240}, {201, 216, 239}, {202, 216, 239},
+    {204, 217, 238}, {205, 217, 237}, {206, 217, 236}, {207, 218, 235},
+    {208, 218, 234}, {209, 218, 233}, {210, 219, 232}, {211, 219, 231},
+    {212, 219, 230}, {214, 220, 229}, {215, 220, 228}, {216, 220, 227},
+    {217, 220, 225}, {218, 220, 224}, {219, 220, 223}, {220, 221, 222},
+    {221, 221, 221}, {222, 220, 219}, {223, 220, 218}, {224, 219, 216},
+    {225, 219, 215}, {226, 218, 214}, {227, 218, 212}, {228, 217, 211},
+    {229, 216, 209}, {230, 216, 208}, {231, 215, 206}, {232, 215, 205},
+    {233, 214, 203}, {233, 213, 202}, {234, 212, 200}, {235, 212, 199},
+    {236, 211, 197}, {237, 210, 196}, {237, 209, 194}, {238, 208, 193},
+    {239, 208, 191}, {239, 207, 190}, {240, 206, 188}, {240, 205, 187},
+    {241, 204, 185}, {242, 203, 183}, {242, 202, 182}, {243, 201, 180},
+    {243, 200, 179}, {243, 199, 177}, {244, 198, 176}, {244, 197, 174},
+    {245, 196, 173}, {245, 195, 171}, {245, 194, 169}, {246, 193, 168},
+    {246, 192, 166}, {246, 190, 165}, {246, 189, 163}, {247, 188, 161},
+    {247, 187, 160}, {247, 186, 158}, {247, 184, 157}, {247, 183, 155},
+    {247, 182, 153}, {247, 181, 152}, {247, 179, 150}, {247, 178, 149},
+    {247, 177, 147}, {247, 175, 146}, {247, 174, 144}, {247, 172, 142},
+    {247, 171, 141}, {247, 170, 139}, {247, 168, 138}, {247, 167, 136},
+    {247, 165, 135}, {246, 164, 133}, {246, 162, 131}, {246, 161, 130},
+    {246, 159, 128}, {245, 158, 127}, {245, 156, 125}, {245, 155, 124},
+    {244, 153, 122}, {244, 151, 121}, {243, 150, 119}, {243, 148, 117},
+    {242, 147, 116}, {242, 145, 114}, {241, 143, 113}, {241, 142, 111},
+    {240, 140, 110}, {240, 138, 108}, {239, 136, 107}, {239, 135, 105},
+    {238, 133, 104}, {237, 131, 102}, {237, 129, 101}, {236, 128, 99},
+    {235, 126, 98}, {235, 124, 96}, {234, 122, 95}, {233, 120, 94},
+    {232, 118, 92}, {231, 117, 91}, {230, 115, 89}, {230, 113, 88},
+    {229, 111, 86}, {228, 109, 85}, {227, 107, 84}, {226, 105, 82},
+    {225, 103, 81}, {224, 101, 79}, {223, 99, 78}, {222, 97, 77},
+    {221, 95, 75}, {220, 93, 74}, {219, 91, 73}, {218, 89, 71},
+    {217, 87, 70}, {215, 85, 69}, {214, 82, 67}, {213, 80, 66},
+    {212, 78, 65}, {211, 76, 64}, {210, 74, 62}, {208, 71, 61},
+    {207, 69, 60}, {206, 67, 59}, {204, 64, 57}, {203, 62, 56},
+    {202, 59, 55}, {200, 57, 54}, {199, 54, 53}, {198, 52, 51},
+    {196, 49, 50}, {195, 46, 49}, {193, 43, 48}, {192, 40, 47},
+    {191, 37, 46}, {189, 34, 44}, {188, 30, 43}, {186, 26, 42},
+    {185, 22, 41}, {183, 17, 40}, {182, 11, 39}, {180, 4, 38}
+};
+
+
+/*
+ * Save the two dimensional array as a png image
+ * Arguments:
+ *   double *data - pointer to an array of nx * ny values
+ *   int nx       - number of COLUMNS to be written
+ *   int ny       - number of ROWS to be written
+ *   char *fname  - name of the picture
+ *   char lang    - either 'c' or 'f' denoting the memory
+ *                  layout. That is, if 'f' is given, then rows
+ *                  and columns are swapped.
+ */
+int save_png(double *data, const int height, const int width,
+             const char *fname, const char lang)
+{
+    FILE *fp;
+    png_structp pngstruct_ptr = NULL;
+    png_infop pnginfo_ptr = NULL;
+    png_byte **row_pointers = NULL;
+    int i, j;
+
+    /* Default return status is failure */
+    int status = -1;
+
+    int pixel_size = 3;
+    int depth = 8;
+
+    /* Open the file and initialize the png library.
+     * Note that in error cases we jump to clean up
+     * parts in the end of this function using goto. */
+    fp = fopen(fname, "wb");
+    if (fp == NULL) {
+        goto fopen_failed;
+    }
+
+    pngstruct_ptr =
+        png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+    if (pngstruct_ptr == NULL) {
+        goto pngstruct_create_failed;
+    }
+
+    pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+    if (pnginfo_ptr == NULL) {
+        goto pnginfo_create_failed;
+    }
+
+    if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+        goto setjmp_failed;
+    }
+
+    png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+                 (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+                 PNG_INTERLACE_NONE,
+                 PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+    row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+    for (i = 0; i < height; i++) {
+        png_byte *row = png_malloc(pngstruct_ptr,
+                                   sizeof(uint8_t) * width * pixel_size);
+        row_pointers[i] = row;
+
+        /* Branch according to the memory layout */
+        if (lang == 'c' || lang == 'C') {
+            for (j = 0; j < width; j++) {
+                pixel_t pixel;
+                /* Scale the values so that values between 0 and
+                 * 100 degrees are mapped to values between 0 and 255 */
+                cmap(data[j + i * width], 2.55, 0.0, &pixel);
+                *row++ = pixel.red;
+                *row++ = pixel.green;
+                *row++ = pixel.blue;
+            }
+        } else if (lang == 'f' || lang == 'F') {
+            for (j = 0; j < width; j++) {
+                pixel_t pixel;
+                cmap(data[i + j * height], 2.55, 0.0, &pixel);
+                *row++ = pixel.red;
+                *row++ = pixel.green;
+                *row++ = pixel.blue;
+            }
+        } else {
+            fprintf(stderr, "Unknown memory order %c for pngwriter!\n", lang);
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    png_init_io(pngstruct_ptr, fp);
+    png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+    png_write_png(pngstruct_ptr, pnginfo_ptr,
+                  PNG_TRANSFORM_IDENTITY, NULL);
+
+    status = 0;
+
+    for (i = 0; i < height; i++) {
+        png_free(pngstruct_ptr, row_pointers[i]);
+    }
+    png_free(pngstruct_ptr, row_pointers);
+
+    /* Cleanup with labels */
+setjmp_failed:
+pnginfo_create_failed:
+    png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+    fclose(fp);
+fopen_failed:
+    return status;
+}
+
+/*
+ * This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0, 255 blue or red color is used instead.
+ */
+void cmap(double value, const double scaling, const double offset,
+          pixel_t *pix)
+{
+    int ival;
+
+    ival = (int)(value * scaling + offset);
+    if (ival < 0) {             /* Colder than colorscale, substitute blue */
+        pix->red = 0;
+        pix->green = 0;
+        pix->blue = 255;
+    } else if (ival > 255) {
+        pix->red = 255;         /* Hotter than colormap, substitute red */
+        pix->green = 0;
+        pix->blue = 0;
+    } else {
+        pix->red = heat_colormap[ival][0];
+        pix->green = heat_colormap[ival][1];
+        pix->blue = heat_colormap[ival][2];
+    }
+}
+
--- a/heat-equation/c/pngwriter.h
+++ b/heat-equation/c/pngwriter.h
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(double *data, const int nx, const int ny, const char *fname,
+             const char lang);
+
+#endif
--- a/heat-equation/c/setup.c
+++ b/heat-equation/c/setup.c
+/* Setup routines for heat equation solver */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <mpi.h>
+
+#include "heat.h"
+#include "pngwriter.h"
+
+#define NSTEPS 500  // Default number of iteration steps
+
+/* Initialize the heat equation solver */
+void initialize(int argc, char *argv[], field *current,
+                field *previous, int *nsteps, parallel_data *parallel, 
+                int *iter0)
+{
+    /*
+     * Following combinations of command line arguments are possible:
+     * No arguments:    use default field dimensions and number of time steps
+     * One argument:    read initial field from a given file
+     * Two arguments:   initial field from file and number of time steps
+     * Three arguments: field dimensions (rows,cols) and number of time steps
+     */
+
+
+    int rows = 2000;             //!< Field dimensions with default values
+    int cols = 2000;
+
+    char input_file[64];        //!< Name of the optional input file
+
+    int read_file = 0;
+
+    *nsteps = NSTEPS;
+    *iter0 = 0;
+
+    switch (argc) {
+    case 1:
+        /* Use default values */
+        break;
+    case 2:
+        /* Read initial field from a file */
+        strncpy(input_file, argv[1], 64);
+        read_file = 1;
+        break;
+    case 3:
+        /* Read initial field from a file */
+        strncpy(input_file, argv[1], 64);
+        read_file = 1;
+
+        /* Number of time steps */
+        *nsteps = atoi(argv[2]);
+        break;
+    case 4:
+        /* Field dimensions */
+        rows = atoi(argv[1]);
+        cols = atoi(argv[2]);
+        /* Number of time steps */
+        *nsteps = atoi(argv[3]);
+        break;
+    default:
+        printf("Unsupported number of command line arguments\n");
+        exit(-1);
+    }
+
+   // Check if checkpoint exists
+    if (!access(CHECKPOINT, F_OK)) {
+        read_restart(current, parallel, iter0);
+        set_field_dimensions(previous, current->nx_full, current->ny_full,
+                             parallel);
+        allocate_field(previous);
+        if (parallel->rank == 0)
+            printf("Restarting from an earlier checkpoint saved"
+                   " at iteration %d.\n", *iter0);
+        copy_field(current, previous);
+    } else if (read_file) {
+        read_field(current, previous, input_file, parallel);
+    } else {
+        parallel_setup(parallel, rows, cols);
+        set_field_dimensions(current, rows, cols, parallel);
+        set_field_dimensions(previous, rows, cols, parallel);
+        generate_field(current, parallel);
+        allocate_field(previous);
+        copy_field(current, previous);
+    }
+}
+
+/* Generate initial temperature field.  Pattern is disc with a radius
+ * of nx_full / 6 in the center of the grid.
+ * Boundary conditions are (different) constant temperatures outside the grid */
+void generate_field(field *temperature, parallel_data *parallel)
+{
+    int i, j, ind, width;
+    double radius;
+    int dx, dy;
+    int dims[2], coords[2], periods[2];
+
+    /* Allocate the temperature array, note that
+     * we have to allocate also the ghost layers */
+    temperature->data =
+        malloc_2d(temperature->nx + 2, temperature->ny + 2);
+
+    MPI_Cart_get(parallel->comm, 2, dims, periods, coords);
+
+    /* Radius of the source disc */
+    radius = temperature->nx_full / 6.0;
+
+    width = temperature->ny + 2;
+    for (i = 0; i < temperature->nx + 2; i++) {
+        for (j = 0; j < temperature->ny + 2; j++) {
+            /* Distance of point i, j from the origin */
+            dx = i + coords[0] * temperature->nx -
+                 temperature->nx_full / 2 + 1;
+            dy = j + coords[1] * temperature->ny -
+                 temperature->ny_full / 2 + 1;
+            ind = idx(i, j, width);
+            if (dx * dx + dy * dy < radius * radius) {
+                temperature->data[ind] = 5.0;
+            } else {
+                temperature->data[ind] = 65.0;
+            }
+        }
+    }
+
+    /* Boundary conditions */
+    // Left boundary
+    if (coords[1] == 0) {
+        for (i = 0; i < temperature->nx + 2; i++) {
+            ind = idx(i, 0, width);
+            temperature->data[ind] = 20.0;
+        }
+    }
+    // Right boundary
+    if (coords[1] == dims[1] - 1) {
+        for (i = 0; i < temperature->nx + 2; i++) {
+            ind = idx(i, temperature->ny + 1, width);
+            temperature->data[ind] = 70.0;
+        }
+    }
+    // Upper boundary
+    if (coords[0] == 0) {
+        for (j = 0; j < temperature->ny + 2; j++) {
+            ind = idx(0, j, width);
+            temperature->data[ind] = 85.0;
+        }
+    }
+    // Lower boundary
+    if (coords[0] == dims[0] - 1) {
+        for (j = 0; j < temperature->ny + 2; j++) {
+            ind = idx(temperature->nx + 1, j, width);
+            temperature->data[ind] = 5.0;
+        }
+    }
+
+}
+
+/* Set dimensions of the field. Note that the nx is the size of the first
+ * dimension and ny the second. */
+void set_field_dimensions(field *temperature, int nx, int ny,
+                          parallel_data *parallel)
+{
+    int nx_local, ny_local;
+    int dims[2], coords[2], periods[2];
+
+    MPI_Cart_get(parallel->comm, 2, dims, periods, coords);
+    nx_local = nx / dims[0];
+    ny_local = ny / dims[1];
+
+    temperature->dx = DX;
+    temperature->dy = DY;
+    temperature->nx = nx_local;
+    temperature->ny = ny_local;
+    temperature->nx_full = nx;
+    temperature->ny_full = ny;
+}
+
+void parallel_setup(parallel_data *parallel, int nx, int ny)
+{
+    int nx_local;
+    int ny_local;
+    int world_size;
+    int dims[2] = {0, 0};
+    int periods[2] = { 0, 0 };
+
+    /* Set grid dimensions */
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+    MPI_Dims_create(world_size, 2, dims);
+    nx_local = nx / dims[0];
+    ny_local = ny / dims[1];
+
+    if (nx_local * dims[0] != nx) {
+        printf("Cannot divide grid evenly to processors in x-direction "
+               "%d x %d != %d\n", nx_local, dims[0], nx);
+        MPI_Abort(MPI_COMM_WORLD, -2);
+    }
+    if (ny_local * dims[1] != ny) {
+        printf("Cannot divide grid evenly to processors in y-direction "
+               "%d x %d != %d\n", ny_local, dims[1], ny);
+        MPI_Abort(MPI_COMM_WORLD, -2);
+    }
+
+    /* Create cartesian communicator */
+    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &parallel->comm);
+    MPI_Cart_shift(parallel->comm, 0, 1, &parallel->nup, &parallel->ndown);
+    MPI_Cart_shift(parallel->comm, 1, 1, &parallel->nleft,
+                   &parallel->nright);
+
+    MPI_Comm_size(parallel->comm, &parallel->size);
+    MPI_Comm_rank(parallel->comm, &parallel->rank);
+
+    if (parallel->rank == 0) {
+        printf("Using domain decomposition %d x %d\n", dims[0], dims[1]);
+        printf("Local domain size %d x %d\n", nx_local, ny_local);
+    }
+
+    /* Create datatypes for halo exchange */
+    MPI_Type_vector(nx_local + 2, 1, ny_local + 2, MPI_DOUBLE,
+                    &parallel->columntype);
+    MPI_Type_contiguous(ny_local + 2, MPI_DOUBLE, &parallel->rowtype);
+    MPI_Type_commit(&parallel->columntype);
+    MPI_Type_commit(&parallel->rowtype);
+
+    /* Create datatype for subblock needed in text I/O
+     *   Rank 0 uses datatype for receiving data into full array while
+     *   other ranks use datatype for sending the inner part of array */
+    int sizes[2] = {nx_local + 2, ny_local + 2};
+    int subsizes[2] = { nx_local, ny_local };
+    int offsets[2] = {1, 1};
+    if (parallel->rank == 0) {
+        sizes[0] = nx;
+        sizes[1] = ny;
+        offsets[0] = 0;
+        offsets[1] = 0;
+    }
+
+    MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
+                             MPI_DOUBLE, &parallel->subarraytype);
+    MPI_Type_commit(&parallel->subarraytype);
+
+    /* Create datatypes for restart I/O
+     * For boundary ranks also the ghost layer (boundary condition) 
+     * is written */
+
+    int coords[2];
+    MPI_Cart_coords(parallel->comm, parallel->rank, 2, coords);
+    sizes[0] = nx + 2;
+    sizes[1] = ny + 2;
+    offsets[0] = 1 + coords[0] * nx_local;
+    offsets[1] = 1 + coords[1] * ny_local;
+    if (coords[0] == 0) {
+       offsets[0] -= 1;
+       subsizes[0] += 1;
+    }
+    if (coords[0] == dims[0] - 1) {
+       subsizes[0] += 1;
+    }
+    if (coords[1] == 0) {
+       offsets[1] -= 1;
+       subsizes[1] += 1;
+    }
+    if (coords[1] == dims[1] - 1) {
+       subsizes[1] += 1;
+    }
+
+    MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
+                             MPI_DOUBLE, &parallel->filetype);
+    MPI_Type_commit(&parallel->filetype);
+
+    sizes[0] = nx_local + 2;
+    sizes[1] = ny_local + 2;
+    offsets[0] = 1;
+    offsets[1] = 1;
+    if (coords[0] == 0) {
+       offsets[0] = 0;
+    }
+    if (coords[1] == 0) {
+       offsets[1] = 0;
+    }
+
+    MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C,
+                             MPI_DOUBLE, &parallel->restarttype);
+    MPI_Type_commit(&parallel->restarttype);
+
+}
+
+/* Deallocate the 2D arrays of temperature fields */
+void finalize(field *temperature1, field *temperature2,
+              parallel_data *parallel)
+{
+    free_2d(temperature1->data);
+    free_2d(temperature2->data);
+
+    MPI_Type_free(&parallel->rowtype);
+    MPI_Type_free(&parallel->columntype);
+    MPI_Type_free(&parallel->subarraytype);
+    MPI_Type_free(&parallel->restarttype);
+    MPI_Type_free(&parallel->filetype);
+
+}
+
--- a/heat-equation/c/utilities.c
+++ b/heat-equation/c/utilities.c
+/* Utility functions for heat equation solver
+ *   NOTE: This file does not need to be edited! */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <mpi.h>
+
+#include "heat.h"
+
+/* Utility routine for allocating a two dimensional array */
+double *malloc_2d(int nx, int ny)
+{
+    double *array;
+
+    array = (double *) malloc(nx * ny * sizeof(double));
+
+    return array;
+}
+
+/* Utility routine for deallocating a two dimensional array */
+void free_2d(double *array)
+{
+    free(array);
+}
+
+
+/* Copy data on temperature1 into temperature2 */
+void copy_field(field *temperature1, field *temperature2)
+{
+    assert(temperature1->nx == temperature2->nx);
+    assert(temperature1->ny == temperature2->ny);
+    memcpy(temperature2->data, temperature1->data,
+           (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double));
+}
+
+/* Swap the data of fields temperature1 and temperature2 */
+void swap_fields(field *temperature1, field *temperature2)
+{
+    double *tmp;
+    tmp = temperature1->data;
+    temperature1->data = temperature2->data;
+    temperature2->data = tmp;
+}
+
+/* Allocate memory for a temperature field and initialise it to zero */
+void allocate_field(field *temperature)
+{
+    // Allocate also ghost layers
+    temperature->data =
+        malloc_2d(temperature->nx + 2, temperature->ny + 2);
+
+    // Initialize to zero
+    memset(temperature->data, 0.0,
+           (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double));
+}
--- a/heat-equation/fortran/Makefile
+++ b/heat-equation/fortran/Makefile
+FC=mpif90
+CC=gcc
+FCFLAGS=-O3 -Wall 
+CCFLAGS=-O3 -Wall 
+LDFLAGS=
+LIBS=-lpng
+
+EXE=heat_mpi
+OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
+OBJS_PNG=pngwriter.o
+
+
+all: $(EXE)
+
+pngwriter.o: pngwriter.c pngwriter.h
+core.o: core.F90 heat_mod.o
+utilities.o: utilities.F90 heat_mod.o
+io.o: io.F90 heat_mod.o pngwriter_mod.o 
+setup.o: setup.F90 heat_mod.o utilities.o io.o
+pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
+main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+	$(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+	$(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
--- a/heat-equation/fortran/bottle.dat
+++ b/heat-equation/fortran/bottle.dat
--- a/heat-equation/fortran/core.F90
+++ b/heat-equation/fortran/core.F90
+! Main solver routines for heat equation solver
+module core
+  use heat
+
+contains
+
+  ! Exchange the boundary data between MPI tasks
+  subroutine exchange_init(field0, parallel)
+    use mpi
+
+    implicit none
+
+    type(field), intent(inout) :: field0
+    type(parallel_data), intent(in) :: parallel
+
+    integer :: ierr
+
+    ! Send to left, receive from right
+    call mpi_isend(field0%data(0, 1), 1, parallel%columntype, &
+         & parallel%nleft, 11, parallel%comm, parallel%requests(1), ierr)
+    call mpi_irecv(field0%data(0, field0%ny + 1), 1, parallel%columntype, &
+         & parallel%nright, 11, &
+         & parallel%comm, parallel%requests(2), ierr)
+    
+    ! Send to right, receive from left
+    call mpi_isend(field0%data(0, field0%ny), 1, parallel%columntype, &
+         & parallel%nright, 12, parallel%comm, parallel%requests(3), ierr)
+    call mpi_irecv(field0%data(0, 0), 1, parallel%columntype, &
+         & parallel%nleft, 12, &
+         & parallel%comm, parallel%requests(4), ierr)
+
+    ! Send to up receive from down
+    call mpi_isend(field0%data(1, 0), 1, parallel%rowtype, &
+         & parallel%nup, 13, parallel%comm, parallel%requests(5), ierr)
+    call mpi_irecv(field0%data(field0%nx+1, 0), 1, parallel%rowtype, &
+         & parallel%ndown, 13, parallel%comm, parallel%requests(6), ierr)
+
+    ! Send to the down, receive from up
+    call mpi_isend(field0%data(field0%nx, 0), 1, parallel%rowtype, &
+         & parallel%ndown, 14, parallel%comm, parallel%requests(7), ierr)
+    call mpi_irecv(field0%data(0, 0), 1, parallel%rowtype, &
+         & parallel%nup, 14, parallel%comm, parallel%requests(8), ierr)
+
+  end subroutine exchange_init
+
+  ! Finalize the non-blocking communication
+  subroutine exchange_finalize(parallel)
+    use mpi
+    implicit none
+    type(parallel_data), intent(inout) :: parallel
+    integer :: ierr
+
+    call mpi_waitall(8, parallel%requests, mpi_statuses_ignore, ierr)
+  end subroutine exchange_finalize
+
+  ! Compute one time step of temperature evolution
+  ! Arguments:
+  !   curr (type(field)): current temperature values
+  !   prev (type(field)): values from previous time step
+  !   a (real(dp)): update equation constant
+  !   dt (real(dp)): time step value
+  subroutine evolve_interior(curr, prev, a, dt)
+
+    implicit none
+
+    type(field), intent(inout) :: curr, prev
+    real(dp) :: a, dt
+    integer :: i, j, nx, ny
+
+    nx = curr%nx
+    ny = curr%ny
+
+    do j = 2, ny - 1
+       do i = 2, nx - 1
+          curr%data(i, j) = prev%data(i, j) + a * dt * &
+               & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+               &   prev%data(i+1, j)) / curr%dx**2 + &
+               &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+               &   prev%data(i, j+1)) / curr%dy**2)
+       end do
+    end do
+  end subroutine evolve_interior
+
+  ! Compute one time step of temperature evolution
+  ! Arguments:
+  !   curr (type(field)): current temperature values
+  !   prev (type(field)): values from previous time step
+  !   a (real(dp)): update equation constant
+  !   dt (real(dp)): time step value
+  ! Update only the border-dependent part
+  subroutine evolve_edges(curr, prev, a, dt)
+
+    implicit none
+
+    type(field), intent(inout) :: curr, prev
+    real(dp) :: a, dt
+    integer :: i, j, nx, ny
+
+    nx = curr%nx
+    ny = curr%ny
+
+    j = 1
+    do i = 1, nx
+       curr%data(i, j) = prev%data(i, j) + a * dt * &
+            & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i+1, j)) / curr%dx**2 + &
+            &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i, j+1)) / curr%dy**2)
+    end do
+    j = ny
+    do i = 1, nx
+       curr%data(i, j) = prev%data(i, j) + a * dt * &
+            & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i+1, j)) / curr%dx**2 + &
+            &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i, j+1)) / curr%dy**2)
+    end do
+    i = 1
+    do j = 1, ny
+       curr%data(i, j) = prev%data(i, j) + a * dt * &
+            & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i+1, j)) / curr%dx**2 + &
+            &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i, j+1)) / curr%dy**2)
+    end do
+    i = nx
+    do j = 1, ny
+       curr%data(i, j) = prev%data(i, j) + a * dt * &
+            & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i+1, j)) / curr%dx**2 + &
+            &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+            &   prev%data(i, j+1)) / curr%dy**2)
+    end do
+
+  end subroutine evolve_edges
+
+
+end module core
--- a/heat-equation/fortran/heat_mod.F90
+++ b/heat-equation/fortran/heat_mod.F90
+! Field metadata for heat equation solver
+module heat
+  use iso_fortran_env, only : REAL64
+  implicit none
+
+  integer, parameter :: dp = REAL64
+  real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
+
+  type :: field
+     integer :: nx          ! local dimension of the field
+     integer :: ny
+     integer :: nx_full     ! global dimension of the field
+     integer :: ny_full
+     real(dp) :: dx
+     real(dp) :: dy
+     real(dp), dimension(:,:), allocatable :: data
+  end type field
+
+  type :: parallel_data
+     integer :: size
+     integer :: rank
+     integer :: nup, ndown, nleft, nright  ! Ranks of neighbouring MPI tasks
+     integer :: comm
+     integer :: requests(8)                ! Non-blocking communication handles
+     integer :: rowtype                    ! MPI Datatype for communication of 
+                                           ! rows
+     integer :: columntype                 ! MPI Datatype for communication of 
+                                           ! columns
+     integer :: subarraytype               ! MPI Datatype for text I/O
+     integer :: restarttype                ! MPI Datatype for restart I/O
+     integer :: filetype                   ! MPI Datatype for file view in
+                                           ! restart I/O
+
+  end type parallel_data
+
+contains
+  ! Initialize the field type metadata
+  ! Arguments:
+  !   field0 (type(field)): input field
+  !   nx, ny, dx, dy: field dimensions and spatial step size
+  subroutine set_field_dimensions(field0, nx, ny, parallel)
+    implicit none
+
+    type(field), intent(out) :: field0
+    integer, intent(in) :: nx, ny
+    type(parallel_data), intent(in) :: parallel
+
+    integer :: nx_local, ny_local
+
+    integer, dimension(2) :: dims, coords
+    logical :: periods(2)
+    integer :: ierr
+
+    call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
+
+    nx_local = nx / dims(1)
+    ny_local = ny / dims(2)
+
+    field0%dx = DX
+    field0%dy = DY
+    field0%nx = nx_local
+    field0%ny = ny_local
+    field0%nx_full = nx
+    field0%ny_full = ny
+
+  end subroutine set_field_dimensions
+
+  subroutine parallel_setup(parallel, nx, ny)
+    use mpi
+
+    implicit none
+
+    type(parallel_data), intent(out) :: parallel
+    integer, intent(in), optional :: nx, ny
+
+    integer :: nx_local, ny_local
+    integer :: world_size
+    integer :: dims(2) = (/0, 0/)
+    logical :: periods(2) = (/.false., .false./)
+    integer :: coords(2)
+    integer, dimension(2) :: sizes, subsizes, offsets
+
+    integer :: ierr
+
+    call mpi_comm_size(MPI_COMM_WORLD, world_size, ierr)
+
+    ! Set grid dimensions
+    call mpi_dims_create(world_size, 2, dims, ierr)
+    nx_local = nx / dims(1)
+    ny_local = ny / dims(2)
+
+    ! Ensure that the grid is divisible to the MPI tasks
+    if (nx_local * dims(1) /= nx) then
+       write(*,*) 'Cannot divide grid evenly to processors in x-direction', &
+            & nx_local, dims(1), nx
+       call mpi_abort(MPI_COMM_WORLD, -2, ierr)
+    end if
+    if (ny_local * dims(2) /= ny) then
+       write(*,*) 'Cannot divide grid evenly to processors in y-direction', &
+            & ny_local, dims(2), ny
+       call mpi_abort(MPI_COMM_WORLD, -2, ierr)
+    end if
+
+
+    ! Create cartesian communicator
+    call mpi_cart_create(MPI_COMM_WORLD, 2, dims, periods, .true., parallel%comm, ierr)
+    call mpi_cart_shift(parallel%comm, 0, 1, parallel%nup, parallel%ndown, ierr)
+    call mpi_cart_shift(parallel%comm, 1, 1, parallel%nleft, &
+         & parallel%nright, ierr)
+
+    call mpi_comm_size(parallel%comm, parallel%size, ierr)
+    call mpi_comm_rank(parallel%comm, parallel%rank, ierr)
+
+    if (parallel%rank == 0) then
+       write(*,'(A, I3, A3, I3)') 'Using domain decomposition', dims(1), 'x', & 
+         & dims(2)
+       write(*,'(A, I5, A3, I5)') 'Local domain size', nx_local,  'x', ny_local
+    end if
+
+    ! Create datatypes for halo exchange
+    call mpi_type_vector(ny_local + 2, 1, nx_local + 2, MPI_DOUBLE_PRECISION, &
+         & parallel%rowtype, ierr)
+    call mpi_type_contiguous(nx_local + 2, MPI_DOUBLE_PRECISION, &
+         & parallel%columntype, ierr)
+    call mpi_type_commit(parallel%rowtype, ierr)
+    call mpi_type_commit(parallel%columntype, ierr)
+
+    ! Create datatype for subblock needed in I/O
+    !   Rank 0 uses datatype for receiving data into full array while
+    !   other ranks use datatype for sending the inner part of array
+    sizes(1) = nx_local + 2
+    sizes(2) = ny_local + 2
+    subsizes(1) = nx_local
+    subsizes(2) = ny_local
+    offsets(1) = 1
+    offsets(2) = 1
+    if (parallel%rank == 0) then
+       sizes(1) = nx
+       sizes(2) = ny
+       offsets(1) = 0
+       offsets(2) = 0
+    end if
+    call mpi_type_create_subarray(2, sizes, subsizes, offsets, &
+         & MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%subarraytype, &
+         & ierr)
+    call mpi_type_commit(parallel%subarraytype, ierr)
+
+    ! Create datatypes for restart I/O
+    ! For boundary ranks also the ghost layer (boundary condition) 
+    ! is written 
+
+    call mpi_cart_coords(parallel%comm, parallel%rank, 2, coords, ierr);
+    sizes(1) = nx + 2
+    sizes(2) = ny + 2
+    offsets(1) = 1 + coords(1) * nx_local
+    offsets(2) = 1 + coords(2) * ny_local
+    if (coords(1) == 0) then
+       offsets(1) = offsets(1) - 1
+       subsizes(1) = subsizes(1) + 1
+    end if
+    if (coords(1) == dims(1) - 1) then 
+       subsizes(1) = subsizes(1) + 1;
+    end if
+    if (coords(2) == 0) then
+       offsets(2) = offsets(2) - 1
+       subsizes(2) = subsizes(2) + 1
+    end if 
+    if (coords(2) == dims(2) - 1) then
+       subsizes(2) = subsizes(2) + 1
+    end if
+
+    call mpi_type_create_subarray(2, sizes, subsizes, offsets, & 
+         & MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%filetype, ierr)
+    call mpi_type_commit(parallel%filetype, ierr)
+
+    sizes(1) = nx_local + 2
+    sizes(2) = ny_local + 2
+    offsets(1) = 1
+    offsets(2) = 1
+    if (coords(1) == 0) then
+       offsets(1) = 0
+    end if
+    if (coords(2) == 0) then
+       offsets(2) = 0
+    end if
+
+    call mpi_type_create_subarray(2, sizes, subsizes, offsets, &
+         & MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, parallel%restarttype, ierr)
+    call mpi_type_commit(parallel%restarttype, ierr)
+
+  end subroutine parallel_setup
+
+end module heat
--- a/heat-equation/fortran/io.F90
+++ b/heat-equation/fortran/io.F90
+! I/O routines for heat equation solver
+module io
+  use heat
+  use mpi
+
+contains
+
+  ! Output routine, saves the temperature distribution as a png image
+  ! Arguments:
+  !   curr (type(field)): variable with the temperature data
+  !   iter (integer): index of the time step
+  subroutine write_field(curr, iter, parallel)
+
+    use pngwriter
+    implicit none
+    type(field), intent(in) :: curr
+    integer, intent(in) :: iter
+    type(parallel_data), intent(in) :: parallel
+
+    character(len=85) :: filename
+
+    integer :: stat
+    real(dp), dimension(:,:), allocatable, target :: full_data
+
+    integer :: coords(2)
+    integer :: ix, jy
+    integer :: p, ierr
+
+    if (parallel%rank == 0) then
+       allocate(full_data(curr%nx_full, curr%ny_full))
+       ! Copy rand #0 data to the global array
+       full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
+
+       ! Receive data from other ranks
+       do p = 1, parallel%size - 1
+          call mpi_cart_coords(parallel%comm, p, 2, coords, ierr)
+          ix = coords(1) * curr%nx + 1
+          jy = coords(2) * curr%ny + 1
+          call mpi_recv(full_data(ix, jy), 1, parallel%subarraytype, p, 22, &
+               & parallel%comm, MPI_STATUS_IGNORE, ierr)
+       end do
+       write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
+       stat = save_png(full_data, curr%nx_full, curr%ny_full, filename)
+       deallocate(full_data)
+    else
+       ! Send data
+       call mpi_ssend(curr%data, 1, parallel%subarraytype, 0, 22, &
+            & parallel%comm, ierr)
+    end if
+
+  end subroutine write_field
+
+
+  ! Reads the temperature distribution from an input file
+  ! Arguments:
+  !   field0 (type(field)): field variable that will store the
+  !                         read data
+  !   filename (char): name of the input file
+  ! Note that this version assumes the input data to be in C memory layout
+  subroutine read_field(field0, filename, parallel)
+
+    implicit none
+    type(field), intent(out) :: field0
+    character(len=85), intent(in) :: filename
+    type(parallel_data), intent(out) :: parallel
+
+    integer :: nx, ny, i, p, ierr
+    character(len=2) :: dummy
+
+    real(dp), dimension(:,:), allocatable :: full_data
+    integer :: coords(2)
+    integer :: ix, jy
+
+    open(10, file=filename)
+    ! Read the header
+    read(10, *) dummy, nx, ny
+
+    call parallel_setup(parallel, nx, ny)
+    call set_field_dimensions(field0, nx, ny, parallel)
+
+    ! The arrays for temperature field contain also a halo region
+    allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
+
+    if (parallel%rank == 0) then
+       allocate(full_data(nx, ny))
+       ! Read the data
+       do i = 1, nx
+          read(10, *) full_data(i, 1:ny)
+       end do
+       ! Copy own local part
+       field0%data(1:field0%nx, 1:field0%ny) = full_data(1:field0%nx, 1:field0%ny)
+       ! Send to other process
+       do p=1, parallel%size - 1
+          call mpi_cart_coords(parallel%comm, p, 2, coords, ierr)
+          ix = coords(1) * field0%nx + 1
+          jy = coords(2) * field0%ny + 1
+          call mpi_send(full_data(ix, jy), 1, parallel%subarraytype, p, 44, &
+               &   parallel%comm, ierr)
+       end do
+    else
+       ! Receive data
+       call mpi_recv(field0%data, 1, parallel%subarraytype, 0, 44, &
+            &        parallel%comm, MPI_STATUS_IGNORE, ierr)
+    end if
+
+    ! Set the boundary values
+    field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
+    field0%data(1:field0%nx, field0%ny+1) = field0%data(1:field0%nx, field0%ny)
+    field0%data(0, 0:field0%ny+1) = field0%data(1, 0:field0%ny+1)
+    field0%data(field0%nx+1, 0:field0%ny+1) = field0%data(field0%nx, 0:field0%ny+1)
+
+    close(10)
+    if (parallel%rank == 0) then
+       deallocate(full_data)
+    end if
+
+  end subroutine read_field
+
+  ! Write a restart checkpoint that contains field dimensions, current
+  ! iteration number and temperature field.
+  subroutine write_restart(temp, parallel, iter)
+    implicit none
+
+    type(field), intent(in) :: temp
+    type(parallel_data), intent(in) :: parallel
+    integer, intent(in) :: iter
+
+    integer :: fp
+    integer(kind=MPI_OFFSET_KIND) :: disp
+    integer :: ierr
+
+    call mpi_file_open(MPI_COMM_WORLD, "HEAT_RESTART.dat",  &
+         & MPI_MODE_CREATE + MPI_MODE_WRONLY, &
+         & MPI_INFO_NULL, fp, ierr)
+
+    ! write the restart header by the rank#0
+    if (parallel%rank == 0) then
+       disp = 0
+       call mpi_file_write_at(fp, disp, temp%nx_full, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+       disp = disp + sizeof(temp%nx_full)
+       call mpi_file_write_at(fp, disp, temp%ny_full, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+       disp = disp + sizeof(temp%ny_full)
+       call mpi_file_write_at(fp, disp, iter, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+    end if
+
+    ! For simplicity, skip three integers at the beginning of file in all tasks
+    disp = 3 * sizeof(temp%nx_full)
+    call mpi_file_set_view(fp, 0, MPI_DOUBLE_PRECISION, parallel%filetype, &
+         & 'native', MPI_INFO_NULL, ierr);
+    call mpi_file_write_at_all(fp, disp, temp%data, &
+         & 1, parallel%restarttype, MPI_STATUS_IGNORE, ierr)
+
+    call mpi_file_close(fp, ierr)
+
+  end subroutine write_restart
+
+  ! Read a restart checkpoint that contains field dimensions, current
+  ! iteration number and temperature field.
+  subroutine read_restart(temp, parallel, iter)
+    implicit none
+
+    type(field), intent(inout) :: temp
+    type(parallel_data), intent(inout) :: parallel
+    integer, intent(out) :: iter
+
+    integer :: rows, cols
+    integer :: fp
+    integer(kind=MPI_OFFSET_KIND) :: disp
+    integer :: ierr
+
+    call mpi_file_open(MPI_COMM_WORLD, "HEAT_RESTART.dat", MPI_MODE_RDONLY, &
+         & MPI_INFO_NULL, fp, ierr)
+
+    ! read in the restart header (dimensions, number of preceding steps) by the
+    ! rank#0 and broadcast it to other ranks
+    if (parallel%rank == 0) then
+       disp = 0
+       call mpi_file_read_at(fp, disp, rows, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+       disp = disp + sizeof(rows)
+       call mpi_file_read_at(fp, disp, cols, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+       disp = disp + sizeof(cols)
+       call mpi_file_read_at(fp, disp, iter, 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+    end if
+    call mpi_bcast(rows, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    call mpi_bcast(cols, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    call mpi_bcast(iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+    ! set correct dimensions to MPI metadata
+    call parallel_setup(parallel, rows, cols)
+    ! set local dimensions and allocate memory for the data
+    call set_field_dimensions(temp, rows, cols, parallel)
+    allocate(temp%data(0:temp%nx+1, 0:temp%ny+1))
+    temp%data(:,:) = 0.0
+
+   ! read in restart data
+    disp = 3*sizeof(rows)
+    call mpi_file_set_view(fp, 0, MPI_DOUBLE_PRECISION, parallel%filetype, &
+         & 'native', MPI_INFO_NULL, ierr);
+    call mpi_file_read_at_all(fp, disp, temp%data, &
+         & 1, parallel%restarttype, MPI_STATUS_IGNORE, ierr)
+
+    call mpi_file_close(fp, ierr)
+
+  end subroutine read_restart
+
+end module io
--- a/heat-equation/fortran/main.F90
+++ b/heat-equation/fortran/main.F90
+! Heat equation solver in 2D.
+
+program heat_solve
+  use heat
+  use core
+  use io
+  use setup
+  use utilities
+  use mpi
+
+  implicit none
+
+  real(dp), parameter :: a = 0.5 ! Diffusion constant
+  type(field) :: current, previous    ! Current and previus temperature fields
+
+  real(dp) :: dt     ! Time step
+  integer :: nsteps       ! Number of time steps
+  integer, parameter :: image_interval = 500 ! Image output interval
+  integer, parameter :: checkpoint_interval = 200 ! restart interval
+
+  type(parallel_data) :: parallelization
+  integer :: ierr
+
+  integer :: iter, iter0
+
+  real(kind=dp) :: start, stop ! Timers
+
+  call mpi_init(ierr)
+
+  call initialize(current, previous, nsteps, parallelization, iter0)
+
+  ! Draw the picture of the initial state
+  call write_field(current, iter0, parallelization)
+  iter0 = iter0 + 1
+
+  ! Largest stable time step
+  dt = current%dx**2 * current%dy**2 / &
+       & (2.0 * a * (current%dx**2 + current%dy**2))
+
+  ! Main iteration loop, save a picture every
+  ! image_interval steps
+
+  start =  mpi_wtime()
+
+  do iter = iter0, iter0 + nsteps
+     call exchange_init(previous, parallelization)
+     call evolve_interior(current, previous, a, dt)
+     call exchange_finalize(parallelization)
+     call evolve_edges(current,previous, a, dt)
+     if (mod(iter, image_interval) == 0) then
+        call write_field(current, iter, parallelization)
+     if (mod(iter, checkpoint_interval) == 0) then
+        call write_restart(current, parallelization, iter)
+     end if
+     end if
+     call swap_fields(current, previous)
+  end do
+
+  stop = mpi_wtime()
+
+  if (parallelization % rank == 0) then
+     write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
+     write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5)
+  end if
+
+  call finalize(current, previous)
+
+  call mpi_finalize(ierr)
+
+end program heat_solve