e4a395dc · e4a395dc · e4a395dc · e4a395dc · e4a395dc · e4a395dc
--- a/heat-equation/fortran/pngwriter_mod.F90
+++ b/heat-equation/fortran/pngwriter_mod.F90
+! PNG writer for heat equation solver
+module pngwriter
+  use heat
+
+contains
+
+  function save_png(data, nx, ny, fname) result(stat)
+
+    use, intrinsic :: ISO_C_BINDING
+    implicit none
+
+    real(dp), dimension(:,:), intent(in) :: data
+    integer, intent(in) :: nx, ny
+    character(len=*), intent(in) :: fname
+    integer :: stat
+
+    ! Interface for save_png C-function
+    interface
+       ! The C-function definition is
+       !   int save_png(double *data, const int nx, const int ny,
+       !                const char *fname)
+       function save_png_c(data, nx, ny, fname, order) &
+            & bind(C,name="save_png") result(stat)
+         use, intrinsic :: ISO_C_BINDING
+         implicit none
+         real(kind=C_DOUBLE) :: data(*)
+         integer(kind=C_INT), value, intent(IN) :: nx, ny
+         character(kind=C_CHAR), intent(IN) :: fname(*)
+         character(kind=C_CHAR), value, intent(IN) :: order
+         integer(kind=C_INT) :: stat
+       end function save_png_c
+    end interface
+
+    stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
+    if (stat /= 0) then
+       write(*,*) 'save_png returned error!'
+    end if
+
+  end function save_png
+
+end module pngwriter
--- a/heat-equation/fortran/setup.F90
+++ b/heat-equation/fortran/setup.F90
+! Setup routines for heat equation solver
+module setup
+  use heat
+
+contains
+
+  subroutine initialize(previous, current, nsteps, parallel, iter0)
+    use utilities
+    use io
+
+    implicit none
+
+    type(field), intent(out) :: previous, current
+    integer, intent(out) :: nsteps, iter0
+    type(parallel_data), intent(out) :: parallel
+
+    integer :: rows, cols
+    logical :: using_input_file, restart
+    character(len=85) :: input_file, arg  ! Input file name and command line arguments
+
+
+    ! Default values for grid size and time steps
+    rows = 2000
+    cols = 2000
+    nsteps = 500
+    using_input_file = .false.
+    iter0 = 0
+
+    ! Read in the command line arguments and
+    ! set up the needed variables
+    select case(command_argument_count())
+    case(0) ! No arguments -> default values
+    case(1) ! One argument -> input file name
+       using_input_file = .true.
+       call get_command_argument(1, input_file)
+    case(2) ! Two arguments -> input file name and number of steps
+       using_input_file = .true.
+       call get_command_argument(1, input_file)
+       call get_command_argument(2, arg)
+       read(arg, *) nsteps
+    case(3) ! Three arguments -> rows, cols and nsteps
+       call get_command_argument(1, arg)
+       read(arg, *) rows
+       call get_command_argument(2, arg)
+       read(arg, *) cols
+       call get_command_argument(3, arg)
+       read(arg, *) nsteps
+    case default
+       call usage()
+       stop
+    end select
+
+   ! Check if checkpoint exists
+    inquire (file="HEAT_RESTART.dat", exist=restart)
+    if (restart) then
+       call read_restart(previous, parallel, iter0)
+       if (parallel % rank == 0) then
+          write(*,'(a)') 'Restarting from an earlier checkpoint'
+          write(*,'(a,i0)') '  saved at iteration ', iter0
+       end if
+       call copy_fields(previous, current)
+    ! Initialize the fields according the command line arguments
+    else if (using_input_file) then
+       call read_field(previous, input_file, parallel)
+       call copy_fields(previous, current)
+    else
+       call parallel_setup(parallel, rows, cols)
+       call set_field_dimensions(previous, rows, cols, parallel)
+       call set_field_dimensions(current, rows, cols, parallel)
+       call generate_field(previous, parallel)
+       call copy_fields(previous, current)
+    end if
+
+  end subroutine initialize
+
+  ! Generate initial the temperature field.  Pattern is disc with a radius
+  ! of nx_full / 6 in the center of the grid.
+  ! Boundary conditions are (different) constant temperatures outside the grid
+  subroutine generate_field(field0, parallel)
+    use heat
+
+    implicit none
+
+    type(field), intent(inout) :: field0
+    type(parallel_data), intent(in) :: parallel
+
+    real(dp) :: radius2, ds2
+    integer :: i, j
+
+    integer, dimension(2) :: dims, coords
+    logical :: periods(2)
+    integer :: ierr
+
+    ! The arrays for field contain also a halo region
+    allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
+
+    call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
+
+    ! Square of the disk radius
+    radius2 = (field0%nx_full / 6.0)**2
+
+    do j = 0, field0%ny + 1
+       do i = 0, field0%nx + 1
+          ds2 = (i + coords(1) * field0%nx - field0%nx_full / 2.0 + 1)**2 + &
+               (j + coords(2) * field0%ny - field0%ny_full / 2.0 + 1)**2
+          if (ds2 < radius2) then
+             field0%data(i,j) = 5.0
+          else
+             field0%data(i,j) = 65.0
+          end if
+       end do
+    end do
+
+    ! Left boundary
+    if (coords(2) == 0) then
+       field0%data(:,0) = 20.0_dp
+    end if
+    ! Upper boundary
+    if (coords(1) == 0) then
+       field0%data(0,:) = 85.0_dp
+    end if
+    ! Right boundary
+    if (coords(2) == dims(2) - 1) then
+       field0%data(:,field0%ny+1) = 70.0_dp
+    end if
+    ! Lower boundary
+    if (coords(1) == dims(1) - 1) then
+       field0%data(field0%nx+1,:) = 5.0_dp
+    end if
+
+  end subroutine generate_field
+
+
+  ! Clean up routine for field type
+  ! Arguments:
+  !   field0 (type(field)): field variable to be cleared
+  subroutine finalize(field0, field1)
+    use heat
+
+    implicit none
+
+    type(field), intent(inout) :: field0, field1
+
+    deallocate(field0%data)
+    deallocate(field1%data)
+
+  end subroutine finalize
+
+  ! Helper routine that prints out a simple usage if
+  ! user gives more than three arguments
+  subroutine usage()
+    implicit none
+    character(len=256) :: buf
+
+    call get_command_argument(0, buf)
+    write (*,'(A)') 'Usage:'
+    write (*,'(A, " (default values will be used)")') trim(buf)
+    write (*,'(A, " <filename>")') trim(buf)
+    write (*,'(A, " <filename> <nsteps>")') trim(buf)
+    write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
+  end subroutine usage
+
+end module setup
--- a/heat-equation/fortran/utilities.F90
+++ b/heat-equation/fortran/utilities.F90
+! Utility routines for heat equation solver
+!   NOTE: This file does not need to be edited!
+module utilities
+  use heat
+
+contains
+
+  ! Swap the data fields of two variables of type field
+  ! Arguments:
+  !   curr, prev (type(field)): the two variables that are swapped
+  subroutine swap_fields(curr, prev)
+
+    implicit none
+
+    type(field), intent(inout) :: curr, prev
+    real(dp), allocatable, dimension(:,:) :: tmp
+
+    call move_alloc(curr%data, tmp)
+    call move_alloc(prev%data, curr%data)
+    call move_alloc(tmp, prev%data)
+  end subroutine swap_fields
+
+  ! Copy the data from one field to another
+  ! Arguments:
+  !   from_field (type(field)): variable to copy from
+  !   to_field (type(field)): variable to copy to
+  subroutine copy_fields(from_field, to_field)
+
+    implicit none
+
+    type(field), intent(in) :: from_field
+    type(field), intent(out) :: to_field
+
+    ! Consistency checks
+    if (.not.allocated(from_field%data)) then
+       write (*,*) "Can not copy from a field without allocated data"
+       stop
+    end if
+    if (.not.allocated(to_field%data)) then
+       ! Target is not initialize, allocate memory
+       allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
+            & lbound(from_field%data, 2):ubound(from_field%data, 2)))
+    else if (any(shape(from_field%data) /= shape(to_field%data))) then
+       write (*,*) "Wrong field data sizes in copy routine"
+       print *, shape(from_field%data), shape(to_field%data)
+       stop
+    end if
+
+    to_field%data = from_field%data
+
+    to_field%nx = from_field%nx
+    to_field%ny = from_field%ny
+    to_field%dx = from_field%dx
+    to_field%dy = from_field%dy
+  end subroutine copy_fields
+
+end module utilities
--- a/heat-equation/python/bottle.dat
+++ b/heat-equation/python/bottle.dat
--- a/heat-equation/python/evolve.py
+++ b/heat-equation/python/evolve.py
+import numpy as np
+
+# Time evolution for the inner part of the grid
+def evolve_inner(u, u_previous, a, dt, dx2, dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    u[2:-2, 2:-2] = u_previous[2:-2, 2:-2] + a * dt * ( \
+            (u_previous[3:-1, 2:-2] - 2*u_previous[2:-2, 2:-2] + \
+             u_previous[1:-3, 2:-2]) / dx2 + \
+            (u_previous[2:-2, 3:-1] - 2*u_previous[2:-2, 2:-2] + \
+                 u_previous[2:-2, 1:-3]) / dy2 )
+
+# Time evolution for the edges  of the grid
+def evolve_edges(u, u_previous, a, dt, dx2, dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    u[1, 1:-1] = u_previous[1, 1:-1] + a * dt * ( \
+            (u_previous[2, 1:-1] - 2*u_previous[1, 1:-1] + \
+             u_previous[0, 1:-1]) / dx2 + \
+            (u_previous[1, 2:] - 2*u_previous[1, 1:-1] + \
+                 u_previous[1, :-2]) / dy2 )
+
+    u[-2, 1:-1] = u_previous[-2, 1:-1] + a * dt * ( \
+            (u_previous[-1, 1:-1] - 2*u_previous[-2, 1:-1] + \
+             u_previous[-3, 1:-1]) / dx2 + \
+            (u_previous[-2, 2:] - 2*u_previous[-2, 1:-1] + \
+                 u_previous[-2, :-2]) / dy2 )
+
+    u[1:-1, 1] = u_previous[1:-1, 1] + a * dt * ( \
+            (u_previous[2:, 1] - 2*u_previous[1:-1, 1] + \
+             u_previous[:-2, 1]) / dx2 + \
+            (u_previous[1:-1, 2] - 2*u_previous[1:-1, 1] + \
+                 u_previous[1:-1, 0]) / dy2 )
+
+    u[1:-1, -2] = u_previous[1:-1, -2] + a * dt * ( \
+            (u_previous[2:, -2] - 2*u_previous[1:-1, -2] + \
+             u_previous[:-2, -2]) / dx2 + \
+            (u_previous[1:-1, -1] - 2*u_previous[1:-1, -2] + \
+                 u_previous[1:-1, -3]) / dy2 )
--- a/heat-equation/python/evolve.pyx
+++ b/heat-equation/python/evolve.pyx
+# cython: profile=True
+
+import numpy as np
+cimport numpy as cnp
+
+import cython
+
+# Time evolution for the inner part of the grid
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef evolve_inner(cnp.ndarray[cnp.double_t, ndim=2]u,
+           cnp.ndarray[cnp.double_t, ndim=2]u_previous,
+           double a, double dt, double dx2, double dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step. """
+
+    cdef int nx = u.shape[0] - 2
+    cdef int ny = u.shape[1] - 2
+
+    cdef int i,j
+
+    # Multiplication is more efficient than division
+    cdef double dx2inv = 1. / dx2
+    cdef double dy2inv = 1. / dy2
+
+    for i in range(2, nx):
+        for j in range(2, ny):
+            u[i, j] = u_previous[i, j] + a * dt * ( \
+             (u_previous[i+1, j] - 2*u_previous[i, j] + \
+              u_previous[i-1, j]) * dx2inv + \
+             (u_previous[i, j+1] - 2*u_previous[i, j] + \
+                 u_previous[i, j-1]) * dy2inv )
+
+# Time evolution for the edges  of the grid
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef evolve_edges(cnp.ndarray[cnp.double_t, ndim=2]u,
+           cnp.ndarray[cnp.double_t, ndim=2]u_previous,
+           double a, double dt, double dx2, double dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    cdef int nx = u.shape[0] - 2
+    cdef int ny = u.shape[1] - 2
+
+    cdef int i,j
+
+    # Multiplication is more efficient than division
+    cdef double dx2inv = 1. / dx2
+    cdef double dy2inv = 1. / dy2
+
+    j = 1
+    for i in range(1, nx+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    j = ny
+    for i in range(1, nx+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    i = 1
+    for j in range(1, ny+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    i = nx
+    for j in range(1, ny+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
--- a/heat-equation/python/exchange.py
+++ b/heat-equation/python/exchange.py
+from mpi4py.MPI import Request
+import numpy as np
+
+# Time evolution for the inner part of the grid
+def exchange_init(u, parallel):
+
+    # Send to the up, receive from down
+    parallel.requests[0] = parallel.comm.Isend((u[1,:], 1, parallel.rowtype),
+                                                dest=parallel.nup)
+    parallel.requests[1] = parallel.comm.Irecv((u[-1,:], 1, parallel.rowtype),
+                                                source=parallel.ndown)
+    # Send to the down, receive from up
+    parallel.requests[2] = parallel.comm.Isend((u[-2,:], 1, parallel.rowtype),
+                                                dest=parallel.ndown)
+    parallel.requests[3] = parallel.comm.Irecv((u[0,:], 1, parallel.rowtype),
+                                                source=parallel.nup)
+    # Send to the left, receive from right
+    parallel.requests[4] = parallel.comm.Isend((u.ravel()[1:], 1, 
+                                                parallel.columntype),
+                                                dest=parallel.nleft)
+    idx = u.shape[1] - 1 # ny + 1
+    parallel.requests[5] = parallel.comm.Irecv((u.ravel()[idx:], 1, 
+                                                parallel.columntype),
+                                                source=parallel.nright)
+    # Send to the right, receive from left
+    idx = u.shape[1] - 2 # ny
+    parallel.requests[6] = parallel.comm.Isend((u.ravel()[idx:], 1, 
+                                                parallel.columntype),
+                                                dest=parallel.nright)
+    parallel.requests[7] = parallel.comm.Irecv((u, 1, parallel.columntype),
+                                                source=parallel.nleft)
+
+def exchange_finalize(parallel):
+    # MPI.Request.Waitall(parallel.requests) 
+    Request.Waitall(parallel.requests) 
+
--- a/heat-equation/python/heat.py
+++ b/heat-equation/python/heat.py
+from __future__ import print_function
+import numpy as np
+import time
+
+from heat_setup import initialize
+from evolve import evolve_inner, evolve_edges
+from exchange import exchange_init, exchange_finalize
+from heat_io import write_field, write_restart
+
+
+# Basic parameters
+a = 0.5                # Diffusion constant
+image_interval = 100   # Write frequency for png files
+restart_interval= 200  # Write frequency for restart files
+
+# Grid spacings
+dx = 0.01
+dy = 0.01
+dx2 = dx**2
+dy2 = dy**2
+
+# For stability, this is the largest interval possible
+# for the size of the time-step:
+dt = dx2*dy2 / ( 2*a*(dx2+dy2) )
+
+def main():
+    # Initialize
+    field, field0, parallel, iter0, timesteps = initialize()
+
+    write_field(field, parallel, iter0)
+    t0 = time.time()
+    for iter in range(iter0, iter0 + timesteps):
+        exchange_init(field0, parallel)
+        evolve_inner(field, field0, a, dt, dx2, dy2)
+        exchange_finalize(parallel)
+        evolve_edges(field, field0, a, dt, dx2, dy2)
+        if iter % image_interval == 0:
+            write_field(field, parallel, iter)
+        if iter % restart_interval == 0:
+            write_restart(field, parallel, iter)
+        field, field0 = field0, field
+
+    t1 = time.time()
+    if parallel.rank == 0:
+        print("Running time: {0}".format(t1-t0))
+
+if __name__ == '__main__':
+    main()
--- a/heat-equation/python/heat_io.py
+++ b/heat-equation/python/heat_io.py
+from __future__ import print_function
+import numpy as np
+from mpi4py import MPI
+from parallel import Parallel
+
+try:
+    import h5py
+    use_hdf5 = True
+except ImportError:
+    use_hdf5 = False
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+# Set the colormap
+plt.rcParams['image.cmap'] = 'BrBG'
+
+def write_field(field, parallel, step):
+
+    nx, ny = [i - 2 for i in field.shape]
+    nx_full = parallel.dims[0] * nx
+    ny_full = parallel.dims[1] * ny
+    if parallel.rank == 0:
+        field_full = np.zeros((nx_full, ny_full), float)
+        field_full[:nx, :ny] = field[1:-1, 1:-1]
+        # Receive data from other ranks
+        for p in range(1, parallel.size):
+            coords = parallel.comm.Get_coords(p)
+            idx = coords[0] * nx * ny_full + coords[1] * ny
+            parallel.comm.Recv((field_full.ravel()[idx:], 1, 
+                                parallel.subarraytype), source=p)
+        # Write out the data
+        plt.figure()
+        plt.gca().clear()
+        plt.imshow(field_full)
+        plt.axis('off')
+        plt.savefig('heat_{0:04d}.png'.format(step))
+    else:
+        # Send the data
+        parallel.comm.Ssend((field, 1, parallel.subarraytype), dest=0)
+
+def read_field(filename):
+
+    # Read the initial temperature field from file
+    rank = MPI.COMM_WORLD.Get_rank()
+    if rank == 0:
+        with open(filename) as f:
+            line = f.readline().split()
+            shape = [int(n) for n in line[1:3]]
+    else:
+        shape = None
+    nx, ny = MPI.COMM_WORLD.bcast(shape, root=0)
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+    if parallel.rank == 0:
+        field_full = np.loadtxt(filename)
+        field[1:-1, 1:-1] = field_full[:nx_local, :ny_local]
+        # Send the data to other ranks
+        for p in range(1, parallel.size):
+            coords = parallel.comm.Get_coords(p)
+            idx = coords[0] * nx_local * ny + coords[1] * ny_local
+            parallel.comm.Send((field_full.ravel()[idx:], 1, 
+                                parallel.subarraytype), dest=p)
+    else:
+        parallel.comm.Recv((field, 1, parallel.subarraytype),
+                           source=0)
+        # fo = np.empty((nx_local, ny_local), dtype=float)
+        # parallel.comm.Recv(fo, source=0)
+        # import pickle
+        # pickle.dump(fo, open('fo_f{0}.pckl'.format(parallel.rank), 'w'))
+
+    # Set the boundary values
+    field[0, :] = field[1, :]
+    field[-1, :] = field[-2, :]
+    field[:, 0] = field[:, 1]
+    field[:,-1] = field[:,-2]
+
+
+    field0 = field.copy()
+    return field, field0, parallel
+
+def write_restart(field, parallel, iter):
+
+    nx, ny = [i - 2 for i in field.shape]
+    nx_full = parallel.dims[0] * nx
+    ny_full = parallel.dims[1] * ny
+
+    mode = MPI.MODE_CREATE | MPI.MODE_WRONLY
+    fh = MPI.File.Open(parallel.comm, "HEAT_RESTART.dat", mode) 
+    if parallel.rank == 0:
+        fh.Write(np.array((nx_full, ny_full, iter)))
+    disp = 3 * 8 # Python ints are 8 bytes
+    fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
+    fh.Write_all((field, 1, parallel.restarttype))
+    fh.Close()
+
+def read_restart():
+
+    mode = MPI.MODE_RDONLY
+    fh = MPI.File.Open(MPI.COMM_WORLD, "HEAT_RESTART.dat", mode) 
+    buf = np.zeros(3, int)
+    fh.Read_all(buf)
+    nx, ny, iter = buf[0], buf[1], buf[2]
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    if parallel.rank == 0:
+        print("Restarting from iteration ", iter) 
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+
+    disp = 3 * 8 # Python ints are 8 bytes
+    fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
+    fh.Read_all((field, 1, parallel.restarttype))
+    fh.Close()
+
+    field0 = field.copy()
+    return field, field0, parallel, iter
--- a/heat-equation/python/heat_setup.py
+++ b/heat-equation/python/heat_setup.py
+import numpy as np
+import sys
+from os.path import isfile
+
+from parallel import Parallel
+from heat_io import read_restart, read_field
+
+def initialize():
+
+    timesteps = 500        # Number of time-steps to evolve system
+    nx = 1000
+    ny = 1000
+
+    if isfile('HEAT_RESTART.dat'):
+        field, field0, parallel, iter0 = read_restart()
+    else:
+        if len(sys.argv) == 2:
+            filename = sys.argv[1]
+            field, field0, parallel = read_field(filename)
+        elif len(sys.argv) == 3:
+            filename = sys.argv[1]
+            timesteps = int(sys.argv[2])
+            field, field0, parallel = read_field(filename)
+        elif len(sys.argv) == 4:
+            nx = int(sys.argv[1])
+            ny = int(sys.argv[2])
+            timesteps = int(sys.argv[3])
+            field, field0, parallel = generate_field(nx, ny)
+        else:
+            field, field0, parallel = generate_field(nx, ny)
+
+        iter0 = 0
+
+    return field, field0, parallel, iter0, timesteps
+
+def generate_field(nx, ny):
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+
+    # Square of the disk radius
+    radius2 = (nx / 6.0)**2
+ 
+    X, Y = np.meshgrid(np.arange(nx_local + 2), np.arange(ny_local + 2), 
+                       indexing='ij')
+    ds2 = (X + parallel.coords[0] * nx_local - nx / 2.0 )**2 + \
+          (Y + parallel.coords[1] * ny_local - ny / 2.0 )**2
+    field[:] = np.where(ds2 < radius2, 5.0, 65.0)
+
+    # Boundaries
+    if parallel.coords[0] == 0:
+        field[0,:] = 85.0
+    if parallel.coords[0] ==  parallel.dims[0] - 1:
+        field[-1,:] = 5.0
+    if parallel.coords[1] == 0:
+        field[:,0] = 20.0
+    if parallel.coords[1] ==  parallel.dims[1] - 1:
+        field[:,-1] = 70.0
+
+    field0 = field.copy()
+
+    return field, field0, parallel
--- a/heat-equation/python/parallel.py
+++ b/heat-equation/python/parallel.py
+from mpi4py import MPI
+
+class Parallel(object):
+
+    def __init__(self, nx, ny):
+        self.requests = []
+        world = MPI.COMM_WORLD
+        world_size = world.Get_size()
+        self.dims = MPI.Compute_dims(world_size, (0,0))
+        nx_local = nx / self.dims[0]
+        ny_local = ny / self.dims[1]
+        if nx_local * self.dims[0] != nx:
+            print("Cannot divide grid evenly to processors in x-direction!")
+            print("{0} x {1} != {2}".format(nx_local, self.dims[0], nx))
+            raise RuntimeError('Invalid domain decomposition')
+        if ny_local * self.dims[1] != ny:
+            print("Cannot divide grid evenly to processors in y-direction!")
+            print("{0} x {1} != {2}".format(ny_local, self.dims[1], ny))
+            raise RuntimeError('Invalid domain decomposition')
+
+        self.comm = world.Create_cart(self.dims, periods=None, reorder=True)
+        self.nup, self.ndown = self.comm.Shift(0, 1)
+        self.nleft, self.nright = self.comm.Shift(1, 1)
+        self.size = self.comm.Get_size()
+        self.rank = self.comm.Get_rank()
+        self.coords = self.comm.coords
+
+        if self.rank == 0:
+            print("Using domain decomposition {0} x {1}".format(self.dims[0],
+                                                                self.dims[1]))
+            print("Local domain size {0} x {1}".format(nx_local, ny_local))
+
+        # Maybe there is more elegant way to initialize the requests array?
+        self.requests = [0, ] * 8
+
+        # Datatypes for halo exchange
+        self.rowtype = MPI.DOUBLE.Create_contiguous(ny_local + 2)
+        self.rowtype.Commit()
+        self.columntype = MPI.DOUBLE.Create_vector(nx_local + 2, 1, 
+                                                   ny_local + 2)
+        self.columntype.Commit()
+
+        # Datatypes for subblock needed in text I/O
+        # Rank 0 uses datatype for receiving data into full array while
+        # other ranks use datatype for sending the inner part of array
+
+        subsizes = [nx_local, ny_local]
+        if self.rank == 0:
+            sizes = [nx, ny]
+            offsets = [0, 0]
+        else:
+            sizes = [nx_local + 2, ny_local + 2]
+            offsets = [1, 1]
+
+        self.subarraytype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.subarraytype.Commit()
+
+        # Datatypes for restart I/O
+        sizes = [nx + 2, ny + 2]
+        offsets = [1 + self.coords[0] * nx_local, 1 + self.coords[1] * ny_local]
+        if self.coords[0] == 0:
+            offsets[0] -= 1
+            subsizes[0] += 1
+
+        if self.coords[0] == (self.dims[0] - 1):
+            subsizes[0] += 1
+
+        if self.coords[1] == 0:
+            offsets[1] -= 1
+            subsizes[1] += 1
+
+        if self.coords[1] == (self.dims[1] - 1):
+            subsizes[1] += 1
+
+        self.filetype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.filetype.Commit()
+
+        sizes = [nx_local + 2, ny_local + 2]
+        offsets = [1, 1]
+        if self.coords[0] == 0:
+            offsets[0] = 0
+        if self.coords[1] == 0:
+            offsets[1] = 0
+
+        self.restarttype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.restarttype.Commit()
+
--- a/heat-equation/python/setup.py
+++ b/heat-equation/python/setup.py
+from distutils.core import setup, Extension
+from Cython.Build import cythonize
+
+setup(
+     ext_modules=cythonize("evolve.pyx"),
+)
+
--- a/hello-world/LICENSE.txt
+++ b/hello-world/LICENSE.txt
+
+Copyright (C) 2018  CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+<http://www.gnu.org/licenses/>.
+
--- a/hello-world/README.md
+++ b/hello-world/README.md
+## Parallel Hello World
+
+a) Write a simple parallel program that prints out something
+(e.g. “Hello world!”) from multiple processes. Include the MPI headers
+(C), use the MPI module (Fortran), or import mpi4py (Python) and 
+call appropriate initialization and finalization routines.
+
+b) Modify the program so that each process prints out also its rank
+and so that the process with rank 0 prints out the total number of MPI
+processes as well. 
--- a/hello-world/solution/hello.F90
+++ b/hello-world/solution/hello.F90
+program hello
+  use mpi
+  implicit none
+  integer :: rc, myid, ntasks
+
+
+  call MPI_INIT(rc)
+
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, ntasks, rc)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, myid, rc)
+
+  if(myid == 0) then
+     write(*,*) 'In total there are ',ntasks, 'tasks'
+  endif
+
+  write(*,*) 'Hello from ',myid
+
+  call MPI_FINALIZE(rc)
+
+end program hello
--- a/hello-world/solution/hello.c
+++ b/hello-world/solution/hello.c
+#include<stdio.h>
+#include<stdlib.h>
+#include<mpi.h>
+
+int main(int argc, char *argv[])
+{
+    int i, myid, ntasks;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+
+    if (myid == 0) {
+        printf("In total there are %i tasks\n", ntasks);
+    }
+
+    printf("Hello from %i\n", myid);
+
+    MPI_Finalize();
+    return 0;
+}
--- a/hello-world/solution/hello.py
+++ b/hello-world/solution/hello.py
+from __future__ import print_function
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+size = comm.Get_size()
+rank = comm.Get_rank()
+
+if rank == 0:
+    print("In total there are {0} tasks".format(size))
+
+print("Hello from", rank)
+
--- a/message-chain/LICENSE.txt
+++ b/message-chain/LICENSE.txt
+
+Copyright (C) 2018  CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+<http://www.gnu.org/licenses/>.
+
--- a/message-chain/README.md
+++ b/message-chain/README.md
+## Message chain
+
+Write a simple program where every processor sends data to the next
+one. Let ntasks be the number of the tasks, and myid the rank of the
+current process. Your program should work as follows:
+
+ - Every task with a rank less than ntasks-1 sends a message to task
+myid+1.  For example, task 0 sends a message to task 1.
+
+ - The message content is an integer array where each element is
+   initialized to myid.
+
+ - The message tag is the receiver’s id number.
+
+ - The sender prints out the number of elements it sends and the tag
+number.  All tasks with rank &ge; 1 receive messages.
+
+- Each receiver prints out their myid, and the first element in the
+  received array.
+
+ a) Implement the program described above using `MPI_Send` and
+ `MPI_Recv`. You may start from scratch or use as a starting point 
+ [c/skeleton.c](c/skeleton.c), [fortran/skeleton.F90](fortran/skeleton.F90) or
+ [python/skeleton.py](c/skeleton.py). Investigate the timings with different
+ numbers of MPI tasks (e.g. 2, 4, 8, 16, ...), and pay attention especially to
+ rank 0. Can you explain the behaviour?
+
+ b) Rewrite the program using `MPI_Sendrecv` instead of `MPI_Send` and
+`MPI_Recv`. Investigate again timings with different numbers of MPI 
+tasks (e.g. 2, 4, 8, 16, ...). Can you explain the differences to case a)?
+
+ c) Try to simplify the code by employing the `MPI_PROC_NULL` in treating the
+ special cases of the first and last task in the chain.
+
+ d) Rewrite the program using non-blocking communication (MPI_Isend and
+ MPI_Irecv).
+
+ c) Rewrite the program using persistent communication operations
+ (MPI_Send_init, MPI_Recv_init, etc.).
+
--- a/message-chain/c/skeleton.c
+++ b/message-chain/c/skeleton.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+
+int main(int argc, char *argv[])
+{
+    int i, myid, ntasks;
+    int size = 10000000;
+    int *message;
+    int *receiveBuffer;
+    MPI_Status status;
+
+    double t0, t1;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+
+    /* Allocate message buffers */
+    message = malloc(sizeof(int) * size);
+    receiveBuffer = malloc(sizeof(int) * size);
+    /* Initialize message */
+    for (i = 0; i < size; i++) {
+        message[i] = myid;
+    }
+
+    /* Start measuring the time spent in communication */
+    MPI_Barrier(MPI_COMM_WORLD);
+    t0 = MPI_Wtime();
+
+    /* TODO start */
+    /* Send and receive messages as defined in exercise */
+    if (myid < ntasks - 1) {
+        printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
+               myid, size, myid + 1, myid + 1);
+    }
+
+    if (myid > 0) {
+
+        printf("Receiver: %d. first element %d.\n",
+               myid, receiveBuffer[0]);
+    }
+
+    /* TODO end */
+
+    /* Finalize measuring the time and print it out */
+    t1 = MPI_Wtime();
+    MPI_Barrier(MPI_COMM_WORLD);
+    fflush(stdout);
+
+    if (myid == 0) {
+        printf("\n");
+        printf("Timings:\n");
+        printf("Time elapsed in rank %2d: %6.3f\n", myid, t1 - t0);
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+    fflush(stdout);
+    if (myid == ntasks - 1) {
+        printf("Time elapsed in rank %2d: %6.3f\n", myid, t1 - t0);
+    }
+
+    free(message);
+    free(receiveBuffer);
+    MPI_Finalize();
+    return 0;
+}