bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6 · bbc45ac6
--- a/heat-equation/fortran/pngwriter.c
+++ b/heat-equation/fortran/pngwriter.c
+#include <png.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+    uint8_t red;
+    uint8_t green;
+    uint8_t blue;
+} pixel_t;
+
+
+void cmap(double value, const double scaling, const double maxval,
+          pixel_t *pix);
+
+static int heat_colormap[256][3] = {
+    {59, 76, 192}, {59, 76, 192}, {60, 78, 194}, {61, 80, 195},
+    {62, 81, 197}, {64, 83, 198}, {65, 85, 200}, {66, 87, 201},
+    {67, 88, 203}, {68, 90, 204}, {69, 92, 206}, {71, 93, 207},
+    {72, 95, 209}, {73, 97, 210}, {74, 99, 211}, {75, 100, 213},
+    {77, 102, 214}, {78, 104, 215}, {79, 105, 217}, {80, 107, 218},
+    {82, 109, 219}, {83, 110, 221}, {84, 112, 222}, {85, 114, 223},
+    {87, 115, 224}, {88, 117, 225}, {89, 119, 227}, {90, 120, 228},
+    {92, 122, 229}, {93, 124, 230}, {94, 125, 231}, {96, 127, 232},
+    {97, 129, 233}, {98, 130, 234}, {100, 132, 235}, {101, 133, 236},
+    {102, 135, 237}, {103, 137, 238}, {105, 138, 239}, {106, 140, 240},
+    {107, 141, 240}, {109, 143, 241}, {110, 144, 242}, {111, 146, 243},
+    {113, 147, 244}, {114, 149, 244}, {116, 150, 245}, {117, 152, 246},
+    {118, 153, 246}, {120, 155, 247}, {121, 156, 248}, {122, 157, 248},
+    {124, 159, 249}, {125, 160, 249}, {127, 162, 250}, {128, 163, 250},
+    {129, 164, 251}, {131, 166, 251}, {132, 167, 252}, {133, 168, 252},
+    {135, 170, 252}, {136, 171, 253}, {138, 172, 253}, {139, 174, 253},
+    {140, 175, 254}, {142, 176, 254}, {143, 177, 254}, {145, 179, 254},
+    {146, 180, 254}, {147, 181, 255}, {149, 182, 255}, {150, 183, 255},
+    {152, 185, 255}, {153, 186, 255}, {154, 187, 255}, {156, 188, 255},
+    {157, 189, 255}, {158, 190, 255}, {160, 191, 255}, {161, 192, 255},
+    {163, 193, 255}, {164, 194, 254}, {165, 195, 254}, {167, 196, 254},
+    {168, 197, 254}, {169, 198, 254}, {171, 199, 253}, {172, 200, 253},
+    {173, 201, 253}, {175, 202, 252}, {176, 203, 252}, {177, 203, 252},
+    {179, 204, 251}, {180, 205, 251}, {181, 206, 250}, {183, 207, 250},
+    {184, 207, 249}, {185, 208, 249}, {186, 209, 248}, {188, 209, 247},
+    {189, 210, 247}, {190, 211, 246}, {191, 211, 246}, {193, 212, 245},
+    {194, 213, 244}, {195, 213, 243}, {196, 214, 243}, {198, 214, 242},
+    {199, 215, 241}, {200, 215, 240}, {201, 216, 239}, {202, 216, 239},
+    {204, 217, 238}, {205, 217, 237}, {206, 217, 236}, {207, 218, 235},
+    {208, 218, 234}, {209, 218, 233}, {210, 219, 232}, {211, 219, 231},
+    {212, 219, 230}, {214, 220, 229}, {215, 220, 228}, {216, 220, 227},
+    {217, 220, 225}, {218, 220, 224}, {219, 220, 223}, {220, 221, 222},
+    {221, 221, 221}, {222, 220, 219}, {223, 220, 218}, {224, 219, 216},
+    {225, 219, 215}, {226, 218, 214}, {227, 218, 212}, {228, 217, 211},
+    {229, 216, 209}, {230, 216, 208}, {231, 215, 206}, {232, 215, 205},
+    {233, 214, 203}, {233, 213, 202}, {234, 212, 200}, {235, 212, 199},
+    {236, 211, 197}, {237, 210, 196}, {237, 209, 194}, {238, 208, 193},
+    {239, 208, 191}, {239, 207, 190}, {240, 206, 188}, {240, 205, 187},
+    {241, 204, 185}, {242, 203, 183}, {242, 202, 182}, {243, 201, 180},
+    {243, 200, 179}, {243, 199, 177}, {244, 198, 176}, {244, 197, 174},
+    {245, 196, 173}, {245, 195, 171}, {245, 194, 169}, {246, 193, 168},
+    {246, 192, 166}, {246, 190, 165}, {246, 189, 163}, {247, 188, 161},
+    {247, 187, 160}, {247, 186, 158}, {247, 184, 157}, {247, 183, 155},
+    {247, 182, 153}, {247, 181, 152}, {247, 179, 150}, {247, 178, 149},
+    {247, 177, 147}, {247, 175, 146}, {247, 174, 144}, {247, 172, 142},
+    {247, 171, 141}, {247, 170, 139}, {247, 168, 138}, {247, 167, 136},
+    {247, 165, 135}, {246, 164, 133}, {246, 162, 131}, {246, 161, 130},
+    {246, 159, 128}, {245, 158, 127}, {245, 156, 125}, {245, 155, 124},
+    {244, 153, 122}, {244, 151, 121}, {243, 150, 119}, {243, 148, 117},
+    {242, 147, 116}, {242, 145, 114}, {241, 143, 113}, {241, 142, 111},
+    {240, 140, 110}, {240, 138, 108}, {239, 136, 107}, {239, 135, 105},
+    {238, 133, 104}, {237, 131, 102}, {237, 129, 101}, {236, 128, 99},
+    {235, 126, 98}, {235, 124, 96}, {234, 122, 95}, {233, 120, 94},
+    {232, 118, 92}, {231, 117, 91}, {230, 115, 89}, {230, 113, 88},
+    {229, 111, 86}, {228, 109, 85}, {227, 107, 84}, {226, 105, 82},
+    {225, 103, 81}, {224, 101, 79}, {223, 99, 78}, {222, 97, 77},
+    {221, 95, 75}, {220, 93, 74}, {219, 91, 73}, {218, 89, 71},
+    {217, 87, 70}, {215, 85, 69}, {214, 82, 67}, {213, 80, 66},
+    {212, 78, 65}, {211, 76, 64}, {210, 74, 62}, {208, 71, 61},
+    {207, 69, 60}, {206, 67, 59}, {204, 64, 57}, {203, 62, 56},
+    {202, 59, 55}, {200, 57, 54}, {199, 54, 53}, {198, 52, 51},
+    {196, 49, 50}, {195, 46, 49}, {193, 43, 48}, {192, 40, 47},
+    {191, 37, 46}, {189, 34, 44}, {188, 30, 43}, {186, 26, 42},
+    {185, 22, 41}, {183, 17, 40}, {182, 11, 39}, {180, 4, 38}
+};
+
+
+/*
+ * Save the two dimensional array as a png image
+ * Arguments:
+ *   double *data - pointer to an array of nx * ny values
+ *   int nx       - number of COLUMNS to be written
+ *   int ny       - number of ROWS to be written
+ *   char *fname  - name of the picture
+ *   char lang    - either 'c' or 'f' denoting the memory
+ *                  layout. That is, if 'f' is given, then rows
+ *                  and columns are swapped.
+ */
+int save_png(double *data, const int height, const int width,
+             const char *fname, const char lang)
+{
+    FILE *fp;
+    png_structp pngstruct_ptr = NULL;
+    png_infop pnginfo_ptr = NULL;
+    png_byte **row_pointers = NULL;
+    int i, j;
+
+    /* Default return status is failure */
+    int status = -1;
+
+    int pixel_size = 3;
+    int depth = 8;
+
+    /* Open the file and initialize the png library.
+     * Note that in error cases we jump to clean up
+     * parts in the end of this function using goto. */
+    fp = fopen(fname, "wb");
+    if (fp == NULL) {
+        goto fopen_failed;
+    }
+
+    pngstruct_ptr =
+        png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+    if (pngstruct_ptr == NULL) {
+        goto pngstruct_create_failed;
+    }
+
+    pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+    if (pnginfo_ptr == NULL) {
+        goto pnginfo_create_failed;
+    }
+
+    if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+        goto setjmp_failed;
+    }
+
+    png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+                 (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+                 PNG_INTERLACE_NONE,
+                 PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+    row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+    for (i = 0; i < height; i++) {
+        png_byte *row = png_malloc(pngstruct_ptr,
+                                   sizeof(uint8_t) * width * pixel_size);
+        row_pointers[i] = row;
+
+        /* Branch according to the memory layout */
+        if (lang == 'c' || lang == 'C') {
+            for (j = 0; j < width; j++) {
+                pixel_t pixel;
+                /* Scale the values so that values between 0 and
+                 * 100 degrees are mapped to values between 0 and 255 */
+                cmap(data[j + i * width], 2.55, 0.0, &pixel);
+                *row++ = pixel.red;
+                *row++ = pixel.green;
+                *row++ = pixel.blue;
+            }
+        } else if (lang == 'f' || lang == 'F') {
+            for (j = 0; j < width; j++) {
+                pixel_t pixel;
+                cmap(data[i + j * height], 2.55, 0.0, &pixel);
+                *row++ = pixel.red;
+                *row++ = pixel.green;
+                *row++ = pixel.blue;
+            }
+        } else {
+            fprintf(stderr, "Unknown memory order %c for pngwriter!\n", lang);
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    png_init_io(pngstruct_ptr, fp);
+    png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+    png_write_png(pngstruct_ptr, pnginfo_ptr,
+                  PNG_TRANSFORM_IDENTITY, NULL);
+
+    status = 0;
+
+    for (i = 0; i < height; i++) {
+        png_free(pngstruct_ptr, row_pointers[i]);
+    }
+    png_free(pngstruct_ptr, row_pointers);
+
+    /* Cleanup with labels */
+setjmp_failed:
+pnginfo_create_failed:
+    png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+    fclose(fp);
+fopen_failed:
+    return status;
+}
+
+/*
+ * This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0, 255 blue or red color is used instead.
+ */
+void cmap(double value, const double scaling, const double offset,
+          pixel_t *pix)
+{
+    int ival;
+
+    ival = (int)(value * scaling + offset);
+    if (ival < 0) {             /* Colder than colorscale, substitute blue */
+        pix->red = 0;
+        pix->green = 0;
+        pix->blue = 255;
+    } else if (ival > 255) {
+        pix->red = 255;         /* Hotter than colormap, substitute red */
+        pix->green = 0;
+        pix->blue = 0;
+    } else {
+        pix->red = heat_colormap[ival][0];
+        pix->green = heat_colormap[ival][1];
+        pix->blue = heat_colormap[ival][2];
+    }
+}
+
--- a/heat-equation/fortran/pngwriter.h
+++ b/heat-equation/fortran/pngwriter.h
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(double *data, const int nx, const int ny, const char *fname,
+             const char lang);
+
+#endif
--- a/heat-equation/fortran/pngwriter_mod.F90
+++ b/heat-equation/fortran/pngwriter_mod.F90
+! PNG writer for heat equation solver
+module pngwriter
+  use heat
+
+contains
+
+  function save_png(data, nx, ny, fname) result(stat)
+
+    use, intrinsic :: ISO_C_BINDING
+    implicit none
+
+    real(dp), dimension(:,:), intent(in) :: data
+    integer, intent(in) :: nx, ny
+    character(len=*), intent(in) :: fname
+    integer :: stat
+
+    ! Interface for save_png C-function
+    interface
+       ! The C-function definition is
+       !   int save_png(double *data, const int nx, const int ny,
+       !                const char *fname)
+       function save_png_c(data, nx, ny, fname, order) &
+            & bind(C,name="save_png") result(stat)
+         use, intrinsic :: ISO_C_BINDING
+         implicit none
+         real(kind=C_DOUBLE) :: data(*)
+         integer(kind=C_INT), value, intent(IN) :: nx, ny
+         character(kind=C_CHAR), intent(IN) :: fname(*)
+         character(kind=C_CHAR), value, intent(IN) :: order
+         integer(kind=C_INT) :: stat
+       end function save_png_c
+    end interface
+
+    stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
+    if (stat /= 0) then
+       write(*,*) 'save_png returned error!'
+    end if
+
+  end function save_png
+
+end module pngwriter
--- a/heat-equation/fortran/setup.F90
+++ b/heat-equation/fortran/setup.F90
+! Setup routines for heat equation solver
+module setup
+  use heat
+
+contains
+
+  subroutine initialize(previous, current, nsteps, parallel, iter0)
+    use utilities
+    use io
+
+    implicit none
+
+    type(field), intent(out) :: previous, current
+    integer, intent(out) :: nsteps, iter0
+    type(parallel_data), intent(out) :: parallel
+
+    integer :: rows, cols
+    logical :: using_input_file, restart
+    character(len=85) :: input_file, arg  ! Input file name and command line arguments
+
+
+    ! Default values for grid size and time steps
+    rows = 2000
+    cols = 2000
+    nsteps = 500
+    using_input_file = .false.
+    iter0 = 0
+
+    ! Read in the command line arguments and
+    ! set up the needed variables
+    select case(command_argument_count())
+    case(0) ! No arguments -> default values
+    case(1) ! One argument -> input file name
+       using_input_file = .true.
+       call get_command_argument(1, input_file)
+    case(2) ! Two arguments -> input file name and number of steps
+       using_input_file = .true.
+       call get_command_argument(1, input_file)
+       call get_command_argument(2, arg)
+       read(arg, *) nsteps
+    case(3) ! Three arguments -> rows, cols and nsteps
+       call get_command_argument(1, arg)
+       read(arg, *) rows
+       call get_command_argument(2, arg)
+       read(arg, *) cols
+       call get_command_argument(3, arg)
+       read(arg, *) nsteps
+    case default
+       call usage()
+       stop
+    end select
+
+   ! Check if checkpoint exists
+    inquire (file="HEAT_RESTART.dat", exist=restart)
+    if (restart) then
+       call read_restart(previous, parallel, iter0)
+       if (parallel % rank == 0) then
+          write(*,'(a)') 'Restarting from an earlier checkpoint'
+          write(*,'(a,i0)') '  saved at iteration ', iter0
+       end if
+       call copy_fields(previous, current)
+    ! Initialize the fields according the command line arguments
+    else if (using_input_file) then
+       call read_field(previous, input_file, parallel)
+       call copy_fields(previous, current)
+    else
+       call parallel_setup(parallel, rows, cols)
+       call set_field_dimensions(previous, rows, cols, parallel)
+       call set_field_dimensions(current, rows, cols, parallel)
+       call generate_field(previous, parallel)
+       call copy_fields(previous, current)
+    end if
+
+  end subroutine initialize
+
+  ! Generate initial the temperature field.  Pattern is disc with a radius
+  ! of nx_full / 6 in the center of the grid.
+  ! Boundary conditions are (different) constant temperatures outside the grid
+  subroutine generate_field(field0, parallel)
+    use heat
+
+    implicit none
+
+    type(field), intent(inout) :: field0
+    type(parallel_data), intent(in) :: parallel
+
+    real(dp) :: radius2, ds2
+    integer :: i, j
+
+    integer, dimension(2) :: dims, coords
+    logical :: periods(2)
+    integer :: ierr
+
+    ! The arrays for field contain also a halo region
+    allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
+
+    call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
+
+    ! Square of the disk radius
+    radius2 = (field0%nx_full / 6.0)**2
+
+    do j = 0, field0%ny + 1
+       do i = 0, field0%nx + 1
+          ds2 = (i + coords(1) * field0%nx - field0%nx_full / 2.0 + 1)**2 + &
+               (j + coords(2) * field0%ny - field0%ny_full / 2.0 + 1)**2
+          if (ds2 < radius2) then
+             field0%data(i,j) = 5.0
+          else
+             field0%data(i,j) = 65.0
+          end if
+       end do
+    end do
+
+    ! Left boundary
+    if (coords(2) == 0) then
+       field0%data(:,0) = 20.0_dp
+    end if
+    ! Upper boundary
+    if (coords(1) == 0) then
+       field0%data(0,:) = 85.0_dp
+    end if
+    ! Right boundary
+    if (coords(2) == dims(2) - 1) then
+       field0%data(:,field0%ny+1) = 70.0_dp
+    end if
+    ! Lower boundary
+    if (coords(1) == dims(1) - 1) then
+       field0%data(field0%nx+1,:) = 5.0_dp
+    end if
+
+  end subroutine generate_field
+
+
+  ! Clean up routine for field type
+  ! Arguments:
+  !   field0 (type(field)): field variable to be cleared
+  subroutine finalize(field0, field1)
+    use heat
+
+    implicit none
+
+    type(field), intent(inout) :: field0, field1
+
+    deallocate(field0%data)
+    deallocate(field1%data)
+
+  end subroutine finalize
+
+  ! Helper routine that prints out a simple usage if
+  ! user gives more than three arguments
+  subroutine usage()
+    implicit none
+    character(len=256) :: buf
+
+    call get_command_argument(0, buf)
+    write (*,'(A)') 'Usage:'
+    write (*,'(A, " (default values will be used)")') trim(buf)
+    write (*,'(A, " <filename>")') trim(buf)
+    write (*,'(A, " <filename> <nsteps>")') trim(buf)
+    write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
+  end subroutine usage
+
+end module setup
--- a/heat-equation/fortran/utilities.F90
+++ b/heat-equation/fortran/utilities.F90
+! Utility routines for heat equation solver
+!   NOTE: This file does not need to be edited!
+module utilities
+  use heat
+
+contains
+
+  ! Swap the data fields of two variables of type field
+  ! Arguments:
+  !   curr, prev (type(field)): the two variables that are swapped
+  subroutine swap_fields(curr, prev)
+
+    implicit none
+
+    type(field), intent(inout) :: curr, prev
+    real(dp), allocatable, dimension(:,:) :: tmp
+
+    call move_alloc(curr%data, tmp)
+    call move_alloc(prev%data, curr%data)
+    call move_alloc(tmp, prev%data)
+  end subroutine swap_fields
+
+  ! Copy the data from one field to another
+  ! Arguments:
+  !   from_field (type(field)): variable to copy from
+  !   to_field (type(field)): variable to copy to
+  subroutine copy_fields(from_field, to_field)
+
+    implicit none
+
+    type(field), intent(in) :: from_field
+    type(field), intent(out) :: to_field
+
+    ! Consistency checks
+    if (.not.allocated(from_field%data)) then
+       write (*,*) "Can not copy from a field without allocated data"
+       stop
+    end if
+    if (.not.allocated(to_field%data)) then
+       ! Target is not initialize, allocate memory
+       allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
+            & lbound(from_field%data, 2):ubound(from_field%data, 2)))
+    else if (any(shape(from_field%data) /= shape(to_field%data))) then
+       write (*,*) "Wrong field data sizes in copy routine"
+       print *, shape(from_field%data), shape(to_field%data)
+       stop
+    end if
+
+    to_field%data = from_field%data
+
+    to_field%nx = from_field%nx
+    to_field%ny = from_field%ny
+    to_field%dx = from_field%dx
+    to_field%dy = from_field%dy
+  end subroutine copy_fields
+
+end module utilities
--- a/heat-equation/python/README.md
+++ b/heat-equation/python/README.md
+# Python implementation
+
+The Python implementation utilizes NumPy and mpi4py for computation and
+communication, as well as matplotlib for outputting images. The main program
+is implemented in [heat.py](heat.py), and the whole program can be executed as
+
+    mpirun -np xxx python heat.py
+
+
+## Using Cython kernel
+
+By default, the code uses pure NumPy for evaluating the time evolution
+step. In addition, a Cython version for the main computational kernels
+is provided in [evolve.pyx](evolve.pyx). The Cython kernels can be built
+into Python extension in current directory with
+
+    python setup.py build_ext --inplace
+
+After building the Cython kernels main program can be executed as previously.
--- a/heat-equation/python/bottle.dat
+++ b/heat-equation/python/bottle.dat
--- a/heat-equation/python/evolve.py
+++ b/heat-equation/python/evolve.py
+import numpy as np
+
+# Time evolution for the inner part of the grid
+def evolve_inner(u, u_previous, a, dt, dx2, dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    u[2:-2, 2:-2] = u_previous[2:-2, 2:-2] + a * dt * ( \
+            (u_previous[3:-1, 2:-2] - 2*u_previous[2:-2, 2:-2] + \
+             u_previous[1:-3, 2:-2]) / dx2 + \
+            (u_previous[2:-2, 3:-1] - 2*u_previous[2:-2, 2:-2] + \
+                 u_previous[2:-2, 1:-3]) / dy2 )
+
+# Time evolution for the edges  of the grid
+def evolve_edges(u, u_previous, a, dt, dx2, dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    u[1, 1:-1] = u_previous[1, 1:-1] + a * dt * ( \
+            (u_previous[2, 1:-1] - 2*u_previous[1, 1:-1] + \
+             u_previous[0, 1:-1]) / dx2 + \
+            (u_previous[1, 2:] - 2*u_previous[1, 1:-1] + \
+                 u_previous[1, :-2]) / dy2 )
+
+    u[-2, 1:-1] = u_previous[-2, 1:-1] + a * dt * ( \
+            (u_previous[-1, 1:-1] - 2*u_previous[-2, 1:-1] + \
+             u_previous[-3, 1:-1]) / dx2 + \
+            (u_previous[-2, 2:] - 2*u_previous[-2, 1:-1] + \
+                 u_previous[-2, :-2]) / dy2 )
+
+    u[1:-1, 1] = u_previous[1:-1, 1] + a * dt * ( \
+            (u_previous[2:, 1] - 2*u_previous[1:-1, 1] + \
+             u_previous[:-2, 1]) / dx2 + \
+            (u_previous[1:-1, 2] - 2*u_previous[1:-1, 1] + \
+                 u_previous[1:-1, 0]) / dy2 )
+
+    u[1:-1, -2] = u_previous[1:-1, -2] + a * dt * ( \
+            (u_previous[2:, -2] - 2*u_previous[1:-1, -2] + \
+             u_previous[:-2, -2]) / dx2 + \
+            (u_previous[1:-1, -1] - 2*u_previous[1:-1, -2] + \
+                 u_previous[1:-1, -3]) / dy2 )
--- a/heat-equation/python/evolve.pyx
+++ b/heat-equation/python/evolve.pyx
+# cython: profile=True
+
+import numpy as np
+cimport numpy as cnp
+
+import cython
+
+# Time evolution for the inner part of the grid
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef evolve_inner(cnp.ndarray[cnp.double_t, ndim=2]u,
+           cnp.ndarray[cnp.double_t, ndim=2]u_previous,
+           double a, double dt, double dx2, double dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step. """
+
+    cdef int nx = u.shape[0] - 2
+    cdef int ny = u.shape[1] - 2
+
+    cdef int i,j
+
+    # Multiplication is more efficient than division
+    cdef double dx2inv = 1. / dx2
+    cdef double dy2inv = 1. / dy2
+
+    for i in range(2, nx):
+        for j in range(2, ny):
+            u[i, j] = u_previous[i, j] + a * dt * ( \
+             (u_previous[i+1, j] - 2*u_previous[i, j] + \
+              u_previous[i-1, j]) * dx2inv + \
+             (u_previous[i, j+1] - 2*u_previous[i, j] + \
+                 u_previous[i, j-1]) * dy2inv )
+
+# Time evolution for the edges  of the grid
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef evolve_edges(cnp.ndarray[cnp.double_t, ndim=2]u,
+           cnp.ndarray[cnp.double_t, ndim=2]u_previous,
+           double a, double dt, double dx2, double dy2):
+    """Explicit time evolution.
+       u:            new temperature field
+       u_previous:   previous field
+       a:            diffusion constant
+       dt:           time step
+       dx2:          grid spacing squared, i.e. dx^2
+       dy2:            -- "" --          , i.e. dy^2"""
+
+    cdef int nx = u.shape[0] - 2
+    cdef int ny = u.shape[1] - 2
+
+    cdef int i,j
+
+    # Multiplication is more efficient than division
+    cdef double dx2inv = 1. / dx2
+    cdef double dy2inv = 1. / dy2
+
+    j = 1
+    for i in range(1, nx+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    j = ny
+    for i in range(1, nx+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    i = 1
+    for j in range(1, ny+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
+    i = nx
+    for j in range(1, ny+1):
+        u[i, j] = u_previous[i, j] + a * dt * ( \
+         (u_previous[i+1, j] - 2*u_previous[i, j] + \
+          u_previous[i-1, j]) * dx2inv + \
+         (u_previous[i, j+1] - 2*u_previous[i, j] + \
+          u_previous[i, j-1]) * dy2inv )
+
--- a/heat-equation/python/exchange.py
+++ b/heat-equation/python/exchange.py
+from mpi4py.MPI import Request
+import numpy as np
+
+# Time evolution for the inner part of the grid
+def exchange_init(u, parallel):
+
+    # Send to the up, receive from down
+    parallel.requests[0] = parallel.comm.Isend((u[1,:], 1, parallel.rowtype),
+                                                dest=parallel.nup)
+    parallel.requests[1] = parallel.comm.Irecv((u[-1,:], 1, parallel.rowtype),
+                                                source=parallel.ndown)
+    # Send to the down, receive from up
+    parallel.requests[2] = parallel.comm.Isend((u[-2,:], 1, parallel.rowtype),
+                                                dest=parallel.ndown)
+    parallel.requests[3] = parallel.comm.Irecv((u[0,:], 1, parallel.rowtype),
+                                                source=parallel.nup)
+    # Send to the left, receive from right
+    parallel.requests[4] = parallel.comm.Isend((u.ravel()[1:], 1, 
+                                                parallel.columntype),
+                                                dest=parallel.nleft)
+    idx = u.shape[1] - 1 # ny + 1
+    parallel.requests[5] = parallel.comm.Irecv((u.ravel()[idx:], 1, 
+                                                parallel.columntype),
+                                                source=parallel.nright)
+    # Send to the right, receive from left
+    idx = u.shape[1] - 2 # ny
+    parallel.requests[6] = parallel.comm.Isend((u.ravel()[idx:], 1, 
+                                                parallel.columntype),
+                                                dest=parallel.nright)
+    parallel.requests[7] = parallel.comm.Irecv((u, 1, parallel.columntype),
+                                                source=parallel.nleft)
+
+def exchange_finalize(parallel):
+    # MPI.Request.Waitall(parallel.requests) 
+    Request.Waitall(parallel.requests) 
+
--- a/heat-equation/python/heat.py
+++ b/heat-equation/python/heat.py
+from __future__ import print_function
+import numpy as np
+import time
+
+from heat_setup import initialize
+from evolve import evolve_inner, evolve_edges
+from exchange import exchange_init, exchange_finalize
+from heat_io import write_field, write_restart
+
+
+# Basic parameters
+a = 0.5                # Diffusion constant
+image_interval = 100   # Write frequency for png files
+restart_interval= 200  # Write frequency for restart files
+
+# Grid spacings
+dx = 0.01
+dy = 0.01
+dx2 = dx**2
+dy2 = dy**2
+
+# For stability, this is the largest interval possible
+# for the size of the time-step:
+dt = dx2*dy2 / ( 2*a*(dx2+dy2) )
+
+def main():
+    # Initialize
+    field, field0, parallel, iter0, timesteps = initialize()
+
+    write_field(field, parallel, iter0)
+    t0 = time.time()
+    for iter in range(iter0, iter0 + timesteps):
+        exchange_init(field0, parallel)
+        evolve_inner(field, field0, a, dt, dx2, dy2)
+        exchange_finalize(parallel)
+        evolve_edges(field, field0, a, dt, dx2, dy2)
+        if iter % image_interval == 0:
+            write_field(field, parallel, iter)
+        if iter % restart_interval == 0:
+            write_restart(field, parallel, iter)
+        field, field0 = field0, field
+
+    t1 = time.time()
+    if parallel.rank == 0:
+        print("Running time: {0}".format(t1-t0))
+
+if __name__ == '__main__':
+    main()
--- a/heat-equation/python/heat_io.py
+++ b/heat-equation/python/heat_io.py
+from __future__ import print_function
+import numpy as np
+from mpi4py import MPI
+from parallel import Parallel
+
+try:
+    import h5py
+    use_hdf5 = True
+except ImportError:
+    use_hdf5 = False
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+# Set the colormap
+plt.rcParams['image.cmap'] = 'BrBG'
+
+def write_field(field, parallel, step):
+
+    nx, ny = [i - 2 for i in field.shape]
+    nx_full = parallel.dims[0] * nx
+    ny_full = parallel.dims[1] * ny
+    if parallel.rank == 0:
+        field_full = np.zeros((nx_full, ny_full), float)
+        field_full[:nx, :ny] = field[1:-1, 1:-1]
+        # Receive data from other ranks
+        for p in range(1, parallel.size):
+            coords = parallel.comm.Get_coords(p)
+            idx = coords[0] * nx * ny_full + coords[1] * ny
+            parallel.comm.Recv((field_full.ravel()[idx:], 1, 
+                                parallel.subarraytype), source=p)
+        # Write out the data
+        plt.figure()
+        plt.gca().clear()
+        plt.imshow(field_full)
+        plt.axis('off')
+        plt.savefig('heat_{0:04d}.png'.format(step))
+    else:
+        # Send the data
+        parallel.comm.Ssend((field, 1, parallel.subarraytype), dest=0)
+
+def read_field(filename):
+
+    # Read the initial temperature field from file
+    rank = MPI.COMM_WORLD.Get_rank()
+    if rank == 0:
+        with open(filename) as f:
+            line = f.readline().split()
+            shape = [int(n) for n in line[1:3]]
+    else:
+        shape = None
+    nx, ny = MPI.COMM_WORLD.bcast(shape, root=0)
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+    if parallel.rank == 0:
+        field_full = np.loadtxt(filename)
+        field[1:-1, 1:-1] = field_full[:nx_local, :ny_local]
+        # Send the data to other ranks
+        for p in range(1, parallel.size):
+            coords = parallel.comm.Get_coords(p)
+            idx = coords[0] * nx_local * ny + coords[1] * ny_local
+            parallel.comm.Send((field_full.ravel()[idx:], 1, 
+                                parallel.subarraytype), dest=p)
+    else:
+        parallel.comm.Recv((field, 1, parallel.subarraytype),
+                           source=0)
+        # fo = np.empty((nx_local, ny_local), dtype=float)
+        # parallel.comm.Recv(fo, source=0)
+        # import pickle
+        # pickle.dump(fo, open('fo_f{0}.pckl'.format(parallel.rank), 'w'))
+
+    # Set the boundary values
+    field[0, :] = field[1, :]
+    field[-1, :] = field[-2, :]
+    field[:, 0] = field[:, 1]
+    field[:,-1] = field[:,-2]
+
+
+    field0 = field.copy()
+    return field, field0, parallel
+
+def write_restart(field, parallel, iter):
+
+    nx, ny = [i - 2 for i in field.shape]
+    nx_full = parallel.dims[0] * nx
+    ny_full = parallel.dims[1] * ny
+
+    mode = MPI.MODE_CREATE | MPI.MODE_WRONLY
+    fh = MPI.File.Open(parallel.comm, "HEAT_RESTART.dat", mode) 
+    if parallel.rank == 0:
+        fh.Write(np.array((nx_full, ny_full, iter)))
+    disp = 3 * 8 # Python ints are 8 bytes
+    fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
+    fh.Write_all((field, 1, parallel.restarttype))
+    fh.Close()
+
+def read_restart():
+
+    mode = MPI.MODE_RDONLY
+    fh = MPI.File.Open(MPI.COMM_WORLD, "HEAT_RESTART.dat", mode) 
+    buf = np.zeros(3, int)
+    fh.Read_all(buf)
+    nx, ny, iter = buf[0], buf[1], buf[2]
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    if parallel.rank == 0:
+        print("Restarting from iteration ", iter) 
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+
+    disp = 3 * 8 # Python ints are 8 bytes
+    fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
+    fh.Read_all((field, 1, parallel.restarttype))
+    fh.Close()
+
+    field0 = field.copy()
+    return field, field0, parallel, iter
--- a/heat-equation/python/heat_setup.py
+++ b/heat-equation/python/heat_setup.py
+import numpy as np
+import sys
+from os.path import isfile
+
+from parallel import Parallel
+from heat_io import read_restart, read_field
+
+def initialize():
+
+    timesteps = 500        # Number of time-steps to evolve system
+    nx = 2000
+    ny = 2000
+
+    if isfile('HEAT_RESTART.dat'):
+        field, field0, parallel, iter0 = read_restart()
+    else:
+        if len(sys.argv) == 2:
+            filename = sys.argv[1]
+            field, field0, parallel = read_field(filename)
+        elif len(sys.argv) == 3:
+            filename = sys.argv[1]
+            timesteps = int(sys.argv[2])
+            field, field0, parallel = read_field(filename)
+        elif len(sys.argv) == 4:
+            nx = int(sys.argv[1])
+            ny = int(sys.argv[2])
+            timesteps = int(sys.argv[3])
+            field, field0, parallel = generate_field(nx, ny)
+        else:
+            field, field0, parallel = generate_field(nx, ny)
+
+        iter0 = 0
+
+    return field, parallel, iter0, timesteps
+
+def generate_field(nx, ny):
+
+    parallel = Parallel(nx, ny)
+    nx_local = nx / parallel.dims[0]
+    ny_local = ny / parallel.dims[1]
+
+    field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
+
+    # Square of the disk radius
+    radius2 = (nx / 6.0)**2
+ 
+    X, Y = np.meshgrid(np.arange(nx_local + 2), np.arange(ny_local + 2), 
+                       indexing='ij')
+    ds2 = (X + parallel.coords[0] * nx_local - nx / 2.0 )**2 + \
+          (Y + parallel.coords[1] * ny_local - ny / 2.0 )**2
+    field[:] = np.where(ds2 < radius2, 5.0, 65.0)
+
+    # Boundaries
+    if parallel.coords[0] == 0:
+        field[0,:] = 85.0
+    if parallel.coords[0] ==  parallel.dims[0] - 1:
+        field[-1,:] = 5.0
+    if parallel.coords[1] == 0:
+        field[:,0] = 20.0
+    if parallel.coords[1] ==  parallel.dims[1] - 1:
+        field[:,-1] = 70.0
+
+    field0 = field.copy()
+
+    return field, field0, parallel
--- a/heat-equation/python/parallel.py
+++ b/heat-equation/python/parallel.py
+from mpi4py import MPI
+
+class Parallel(object):
+
+    def __init__(self, nx, ny):
+        self.requests = []
+        world = MPI.COMM_WORLD
+        world_size = world.Get_size()
+        self.dims = MPI.Compute_dims(world_size, (0,0))
+        nx_local = nx / self.dims[0]
+        ny_local = ny / self.dims[1]
+        if nx_local * self.dims[0] != nx:
+            print("Cannot divide grid evenly to processors in x-direction!")
+            print("{0} x {1} != {2}".format(nx_local, self.dims[0], nx))
+            raise RuntimeError('Invalid domain decomposition')
+        if ny_local * self.dims[1] != ny:
+            print("Cannot divide grid evenly to processors in y-direction!")
+            print("{0} x {1} != {2}".format(ny_local, self.dims[1], ny))
+            raise RuntimeError('Invalid domain decomposition')
+
+        self.comm = world.Create_cart(self.dims, periods=None, reorder=True)
+        self.nup, self.ndown = self.comm.Shift(0, 1)
+        self.nleft, self.nright = self.comm.Shift(1, 1)
+        self.size = self.comm.Get_size()
+        self.rank = self.comm.Get_rank()
+        self.coords = self.comm.coords
+
+        if self.rank == 0:
+            print("Using domain decomposition {0} x {1}".format(self.dims[0],
+                                                                self.dims[1]))
+            print("Local domain size {0} x {1}".format(nx_local, ny_local))
+
+        # Maybe there is more elegant way to initialize the requests array?
+        self.requests = [0, ] * 8
+
+        # Datatypes for halo exchange
+        self.rowtype = MPI.DOUBLE.Create_contiguous(ny_local + 2)
+        self.rowtype.Commit()
+        self.columntype = MPI.DOUBLE.Create_vector(nx_local + 2, 1, 
+                                                   ny_local + 2)
+        self.columntype.Commit()
+
+        # Datatypes for subblock needed in text I/O
+        # Rank 0 uses datatype for receiving data into full array while
+        # other ranks use datatype for sending the inner part of array
+
+        subsizes = [nx_local, ny_local]
+        if self.rank == 0:
+            sizes = [nx, ny]
+            offsets = [0, 0]
+        else:
+            sizes = [nx_local + 2, ny_local + 2]
+            offsets = [1, 1]
+
+        self.subarraytype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.subarraytype.Commit()
+
+        # Datatypes for restart I/O
+        sizes = [nx + 2, ny + 2]
+        offsets = [1 + self.coords[0] * nx_local, 1 + self.coords[1] * ny_local]
+        if self.coords[0] == 0:
+            offsets[0] -= 1
+            subsizes[0] += 1
+
+        if self.coords[0] == (self.dims[0] - 1):
+            subsizes[0] += 1
+
+        if self.coords[1] == 0:
+            offsets[1] -= 1
+            subsizes[1] += 1
+
+        if self.coords[1] == (self.dims[1] - 1):
+            subsizes[1] += 1
+
+        self.filetype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.filetype.Commit()
+
+        sizes = [nx_local + 2, ny_local + 2]
+        offsets = [1, 1]
+        if self.coords[0] == 0:
+            offsets[0] = 0
+        if self.coords[1] == 0:
+            offsets[1] = 0
+
+        self.restarttype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
+        self.restarttype.Commit()
+
--- a/heat-equation/python/setup.py
+++ b/heat-equation/python/setup.py
+from distutils.core import setup, Extension
+from Cython.Build import cythonize
+
+setup(
+     ext_modules=cythonize("evolve.pyx"),
+)
+
--- a/hello-world/LICENSE.txt
+++ b/hello-world/LICENSE.txt
+
+Copyright (C) 2018  CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+<http://www.gnu.org/licenses/>.
+
--- a/hello-world/README.md
+++ b/hello-world/README.md
+## Parallel Hello World
+
+a) Write a simple parallel program that prints out something
+(e.g. “Hello world!”) from multiple processes. Include the MPI headers
+(C), use the MPI module (Fortran), or import mpi4py (Python) and 
+call appropriate initialization and finalization routines.
+
+b) Modify the program so that each process prints out also its rank
+and so that the process with rank 0 prints out the total number of MPI
+processes as well. 
--- a/hello-world/solution/hello.F90
+++ b/hello-world/solution/hello.F90
+program hello
+  use mpi
+  implicit none
+  integer :: rc, myid, ntasks
+
+
+  call MPI_INIT(rc)
+
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, ntasks, rc)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, myid, rc)
+
+  if(myid == 0) then
+     write(*,*) 'In total there are ',ntasks, 'tasks'
+  endif
+
+  write(*,*) 'Hello from ',myid
+
+  call MPI_FINALIZE(rc)
+
+end program hello
--- a/hello-world/solution/hello.c
+++ b/hello-world/solution/hello.c
+#include<stdio.h>
+#include<stdlib.h>
+#include<mpi.h>
+
+int main(int argc, char *argv[])
+{
+    int i, myid, ntasks;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+
+    if (myid == 0) {
+        printf("In total there are %i tasks\n", ntasks);
+    }
+
+    printf("Hello from %i\n", myid);
+
+    MPI_Finalize();
+    return 0;
+}
--- a/hello-world/solution/hello.py
+++ b/hello-world/solution/hello.py
+from __future__ import print_function
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+size = comm.Get_size()
+rank = comm.Get_rank()
+
+if rank == 0:
+    print("In total there are {0} tasks".format(size))
+
+print("Hello from", rank)
+