Skip to content
#include <png.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "pngwriter.h"
/* Datatype for RGB pixel */
typedef struct {
uint8_t red;
uint8_t green;
uint8_t blue;
} pixel_t;
void cmap(double value, const double scaling, const double maxval,
pixel_t *pix);
static int heat_colormap[256][3] = {
{59, 76, 192}, {59, 76, 192}, {60, 78, 194}, {61, 80, 195},
{62, 81, 197}, {64, 83, 198}, {65, 85, 200}, {66, 87, 201},
{67, 88, 203}, {68, 90, 204}, {69, 92, 206}, {71, 93, 207},
{72, 95, 209}, {73, 97, 210}, {74, 99, 211}, {75, 100, 213},
{77, 102, 214}, {78, 104, 215}, {79, 105, 217}, {80, 107, 218},
{82, 109, 219}, {83, 110, 221}, {84, 112, 222}, {85, 114, 223},
{87, 115, 224}, {88, 117, 225}, {89, 119, 227}, {90, 120, 228},
{92, 122, 229}, {93, 124, 230}, {94, 125, 231}, {96, 127, 232},
{97, 129, 233}, {98, 130, 234}, {100, 132, 235}, {101, 133, 236},
{102, 135, 237}, {103, 137, 238}, {105, 138, 239}, {106, 140, 240},
{107, 141, 240}, {109, 143, 241}, {110, 144, 242}, {111, 146, 243},
{113, 147, 244}, {114, 149, 244}, {116, 150, 245}, {117, 152, 246},
{118, 153, 246}, {120, 155, 247}, {121, 156, 248}, {122, 157, 248},
{124, 159, 249}, {125, 160, 249}, {127, 162, 250}, {128, 163, 250},
{129, 164, 251}, {131, 166, 251}, {132, 167, 252}, {133, 168, 252},
{135, 170, 252}, {136, 171, 253}, {138, 172, 253}, {139, 174, 253},
{140, 175, 254}, {142, 176, 254}, {143, 177, 254}, {145, 179, 254},
{146, 180, 254}, {147, 181, 255}, {149, 182, 255}, {150, 183, 255},
{152, 185, 255}, {153, 186, 255}, {154, 187, 255}, {156, 188, 255},
{157, 189, 255}, {158, 190, 255}, {160, 191, 255}, {161, 192, 255},
{163, 193, 255}, {164, 194, 254}, {165, 195, 254}, {167, 196, 254},
{168, 197, 254}, {169, 198, 254}, {171, 199, 253}, {172, 200, 253},
{173, 201, 253}, {175, 202, 252}, {176, 203, 252}, {177, 203, 252},
{179, 204, 251}, {180, 205, 251}, {181, 206, 250}, {183, 207, 250},
{184, 207, 249}, {185, 208, 249}, {186, 209, 248}, {188, 209, 247},
{189, 210, 247}, {190, 211, 246}, {191, 211, 246}, {193, 212, 245},
{194, 213, 244}, {195, 213, 243}, {196, 214, 243}, {198, 214, 242},
{199, 215, 241}, {200, 215, 240}, {201, 216, 239}, {202, 216, 239},
{204, 217, 238}, {205, 217, 237}, {206, 217, 236}, {207, 218, 235},
{208, 218, 234}, {209, 218, 233}, {210, 219, 232}, {211, 219, 231},
{212, 219, 230}, {214, 220, 229}, {215, 220, 228}, {216, 220, 227},
{217, 220, 225}, {218, 220, 224}, {219, 220, 223}, {220, 221, 222},
{221, 221, 221}, {222, 220, 219}, {223, 220, 218}, {224, 219, 216},
{225, 219, 215}, {226, 218, 214}, {227, 218, 212}, {228, 217, 211},
{229, 216, 209}, {230, 216, 208}, {231, 215, 206}, {232, 215, 205},
{233, 214, 203}, {233, 213, 202}, {234, 212, 200}, {235, 212, 199},
{236, 211, 197}, {237, 210, 196}, {237, 209, 194}, {238, 208, 193},
{239, 208, 191}, {239, 207, 190}, {240, 206, 188}, {240, 205, 187},
{241, 204, 185}, {242, 203, 183}, {242, 202, 182}, {243, 201, 180},
{243, 200, 179}, {243, 199, 177}, {244, 198, 176}, {244, 197, 174},
{245, 196, 173}, {245, 195, 171}, {245, 194, 169}, {246, 193, 168},
{246, 192, 166}, {246, 190, 165}, {246, 189, 163}, {247, 188, 161},
{247, 187, 160}, {247, 186, 158}, {247, 184, 157}, {247, 183, 155},
{247, 182, 153}, {247, 181, 152}, {247, 179, 150}, {247, 178, 149},
{247, 177, 147}, {247, 175, 146}, {247, 174, 144}, {247, 172, 142},
{247, 171, 141}, {247, 170, 139}, {247, 168, 138}, {247, 167, 136},
{247, 165, 135}, {246, 164, 133}, {246, 162, 131}, {246, 161, 130},
{246, 159, 128}, {245, 158, 127}, {245, 156, 125}, {245, 155, 124},
{244, 153, 122}, {244, 151, 121}, {243, 150, 119}, {243, 148, 117},
{242, 147, 116}, {242, 145, 114}, {241, 143, 113}, {241, 142, 111},
{240, 140, 110}, {240, 138, 108}, {239, 136, 107}, {239, 135, 105},
{238, 133, 104}, {237, 131, 102}, {237, 129, 101}, {236, 128, 99},
{235, 126, 98}, {235, 124, 96}, {234, 122, 95}, {233, 120, 94},
{232, 118, 92}, {231, 117, 91}, {230, 115, 89}, {230, 113, 88},
{229, 111, 86}, {228, 109, 85}, {227, 107, 84}, {226, 105, 82},
{225, 103, 81}, {224, 101, 79}, {223, 99, 78}, {222, 97, 77},
{221, 95, 75}, {220, 93, 74}, {219, 91, 73}, {218, 89, 71},
{217, 87, 70}, {215, 85, 69}, {214, 82, 67}, {213, 80, 66},
{212, 78, 65}, {211, 76, 64}, {210, 74, 62}, {208, 71, 61},
{207, 69, 60}, {206, 67, 59}, {204, 64, 57}, {203, 62, 56},
{202, 59, 55}, {200, 57, 54}, {199, 54, 53}, {198, 52, 51},
{196, 49, 50}, {195, 46, 49}, {193, 43, 48}, {192, 40, 47},
{191, 37, 46}, {189, 34, 44}, {188, 30, 43}, {186, 26, 42},
{185, 22, 41}, {183, 17, 40}, {182, 11, 39}, {180, 4, 38}
};
/*
* Save the two dimensional array as a png image
* Arguments:
* double *data - pointer to an array of nx * ny values
* int nx - number of COLUMNS to be written
* int ny - number of ROWS to be written
* char *fname - name of the picture
* char lang - either 'c' or 'f' denoting the memory
* layout. That is, if 'f' is given, then rows
* and columns are swapped.
*/
int save_png(double *data, const int height, const int width,
const char *fname, const char lang)
{
FILE *fp;
png_structp pngstruct_ptr = NULL;
png_infop pnginfo_ptr = NULL;
png_byte **row_pointers = NULL;
int i, j;
/* Default return status is failure */
int status = -1;
int pixel_size = 3;
int depth = 8;
/* Open the file and initialize the png library.
* Note that in error cases we jump to clean up
* parts in the end of this function using goto. */
fp = fopen(fname, "wb");
if (fp == NULL) {
goto fopen_failed;
}
pngstruct_ptr =
png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (pngstruct_ptr == NULL) {
goto pngstruct_create_failed;
}
pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
if (pnginfo_ptr == NULL) {
goto pnginfo_create_failed;
}
if (setjmp(png_jmpbuf(pngstruct_ptr))) {
goto setjmp_failed;
}
png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
(size_t) height, depth, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
for (i = 0; i < height; i++) {
png_byte *row = png_malloc(pngstruct_ptr,
sizeof(uint8_t) * width * pixel_size);
row_pointers[i] = row;
/* Branch according to the memory layout */
if (lang == 'c' || lang == 'C') {
for (j = 0; j < width; j++) {
pixel_t pixel;
/* Scale the values so that values between 0 and
* 100 degrees are mapped to values between 0 and 255 */
cmap(data[j + i * width], 2.55, 0.0, &pixel);
*row++ = pixel.red;
*row++ = pixel.green;
*row++ = pixel.blue;
}
} else if (lang == 'f' || lang == 'F') {
for (j = 0; j < width; j++) {
pixel_t pixel;
cmap(data[i + j * height], 2.55, 0.0, &pixel);
*row++ = pixel.red;
*row++ = pixel.green;
*row++ = pixel.blue;
}
} else {
fprintf(stderr, "Unknown memory order %c for pngwriter!\n", lang);
exit(EXIT_FAILURE);
}
}
png_init_io(pngstruct_ptr, fp);
png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
png_write_png(pngstruct_ptr, pnginfo_ptr,
PNG_TRANSFORM_IDENTITY, NULL);
status = 0;
for (i = 0; i < height; i++) {
png_free(pngstruct_ptr, row_pointers[i]);
}
png_free(pngstruct_ptr, row_pointers);
/* Cleanup with labels */
setjmp_failed:
pnginfo_create_failed:
png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
pngstruct_create_failed:
fclose(fp);
fopen_failed:
return status;
}
/*
* This routine sets the RGB values for the pixel_t structure using
* the colormap data heat_colormap. If the value is outside the
* acceptable png values 0, 255 blue or red color is used instead.
*/
void cmap(double value, const double scaling, const double offset,
pixel_t *pix)
{
int ival;
ival = (int)(value * scaling + offset);
if (ival < 0) { /* Colder than colorscale, substitute blue */
pix->red = 0;
pix->green = 0;
pix->blue = 255;
} else if (ival > 255) {
pix->red = 255; /* Hotter than colormap, substitute red */
pix->green = 0;
pix->blue = 0;
} else {
pix->red = heat_colormap[ival][0];
pix->green = heat_colormap[ival][1];
pix->blue = heat_colormap[ival][2];
}
}
#ifndef PNGWRITER_H_
#define PNGWRITER_H_
int save_png(double *data, const int nx, const int ny, const char *fname,
const char lang);
#endif
! PNG writer for heat equation solver
module pngwriter
use heat
contains
function save_png(data, nx, ny, fname) result(stat)
use, intrinsic :: ISO_C_BINDING
implicit none
real(dp), dimension(:,:), intent(in) :: data
integer, intent(in) :: nx, ny
character(len=*), intent(in) :: fname
integer :: stat
! Interface for save_png C-function
interface
! The C-function definition is
! int save_png(double *data, const int nx, const int ny,
! const char *fname)
function save_png_c(data, nx, ny, fname, order) &
& bind(C,name="save_png") result(stat)
use, intrinsic :: ISO_C_BINDING
implicit none
real(kind=C_DOUBLE) :: data(*)
integer(kind=C_INT), value, intent(IN) :: nx, ny
character(kind=C_CHAR), intent(IN) :: fname(*)
character(kind=C_CHAR), value, intent(IN) :: order
integer(kind=C_INT) :: stat
end function save_png_c
end interface
stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
if (stat /= 0) then
write(*,*) 'save_png returned error!'
end if
end function save_png
end module pngwriter
! Setup routines for heat equation solver
module setup
use heat
contains
subroutine initialize(previous, current, nsteps, parallel, iter0)
use utilities
use io
implicit none
type(field), intent(out) :: previous, current
integer, intent(out) :: nsteps, iter0
type(parallel_data), intent(out) :: parallel
integer :: rows, cols
logical :: using_input_file, restart
character(len=85) :: input_file, arg ! Input file name and command line arguments
! Default values for grid size and time steps
rows = 2000
cols = 2000
nsteps = 500
using_input_file = .false.
iter0 = 0
! Read in the command line arguments and
! set up the needed variables
select case(command_argument_count())
case(0) ! No arguments -> default values
case(1) ! One argument -> input file name
using_input_file = .true.
call get_command_argument(1, input_file)
case(2) ! Two arguments -> input file name and number of steps
using_input_file = .true.
call get_command_argument(1, input_file)
call get_command_argument(2, arg)
read(arg, *) nsteps
case(3) ! Three arguments -> rows, cols and nsteps
call get_command_argument(1, arg)
read(arg, *) rows
call get_command_argument(2, arg)
read(arg, *) cols
call get_command_argument(3, arg)
read(arg, *) nsteps
case default
call usage()
stop
end select
! Check if checkpoint exists
inquire (file="HEAT_RESTART.dat", exist=restart)
if (restart) then
call read_restart(previous, parallel, iter0)
if (parallel % rank == 0) then
write(*,'(a)') 'Restarting from an earlier checkpoint'
write(*,'(a,i0)') ' saved at iteration ', iter0
end if
call copy_fields(previous, current)
! Initialize the fields according the command line arguments
else if (using_input_file) then
call read_field(previous, input_file, parallel)
call copy_fields(previous, current)
else
call parallel_setup(parallel, rows, cols)
call set_field_dimensions(previous, rows, cols, parallel)
call set_field_dimensions(current, rows, cols, parallel)
call generate_field(previous, parallel)
call copy_fields(previous, current)
end if
end subroutine initialize
! Generate initial the temperature field. Pattern is disc with a radius
! of nx_full / 6 in the center of the grid.
! Boundary conditions are (different) constant temperatures outside the grid
subroutine generate_field(field0, parallel)
use heat
implicit none
type(field), intent(inout) :: field0
type(parallel_data), intent(in) :: parallel
real(dp) :: radius2, ds2
integer :: i, j
integer, dimension(2) :: dims, coords
logical :: periods(2)
integer :: ierr
! The arrays for field contain also a halo region
allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
! Square of the disk radius
radius2 = (field0%nx_full / 6.0)**2
do j = 0, field0%ny + 1
do i = 0, field0%nx + 1
ds2 = (i + coords(1) * field0%nx - field0%nx_full / 2.0 + 1)**2 + &
(j + coords(2) * field0%ny - field0%ny_full / 2.0 + 1)**2
if (ds2 < radius2) then
field0%data(i,j) = 5.0
else
field0%data(i,j) = 65.0
end if
end do
end do
! Left boundary
if (coords(2) == 0) then
field0%data(:,0) = 20.0_dp
end if
! Upper boundary
if (coords(1) == 0) then
field0%data(0,:) = 85.0_dp
end if
! Right boundary
if (coords(2) == dims(2) - 1) then
field0%data(:,field0%ny+1) = 70.0_dp
end if
! Lower boundary
if (coords(1) == dims(1) - 1) then
field0%data(field0%nx+1,:) = 5.0_dp
end if
end subroutine generate_field
! Clean up routine for field type
! Arguments:
! field0 (type(field)): field variable to be cleared
subroutine finalize(field0, field1)
use heat
implicit none
type(field), intent(inout) :: field0, field1
deallocate(field0%data)
deallocate(field1%data)
end subroutine finalize
! Helper routine that prints out a simple usage if
! user gives more than three arguments
subroutine usage()
implicit none
character(len=256) :: buf
call get_command_argument(0, buf)
write (*,'(A)') 'Usage:'
write (*,'(A, " (default values will be used)")') trim(buf)
write (*,'(A, " <filename>")') trim(buf)
write (*,'(A, " <filename> <nsteps>")') trim(buf)
write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
end subroutine usage
end module setup
! Utility routines for heat equation solver
! NOTE: This file does not need to be edited!
module utilities
use heat
contains
! Swap the data fields of two variables of type field
! Arguments:
! curr, prev (type(field)): the two variables that are swapped
subroutine swap_fields(curr, prev)
implicit none
type(field), intent(inout) :: curr, prev
real(dp), allocatable, dimension(:,:) :: tmp
call move_alloc(curr%data, tmp)
call move_alloc(prev%data, curr%data)
call move_alloc(tmp, prev%data)
end subroutine swap_fields
! Copy the data from one field to another
! Arguments:
! from_field (type(field)): variable to copy from
! to_field (type(field)): variable to copy to
subroutine copy_fields(from_field, to_field)
implicit none
type(field), intent(in) :: from_field
type(field), intent(out) :: to_field
! Consistency checks
if (.not.allocated(from_field%data)) then
write (*,*) "Can not copy from a field without allocated data"
stop
end if
if (.not.allocated(to_field%data)) then
! Target is not initialize, allocate memory
allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
& lbound(from_field%data, 2):ubound(from_field%data, 2)))
else if (any(shape(from_field%data) /= shape(to_field%data))) then
write (*,*) "Wrong field data sizes in copy routine"
print *, shape(from_field%data), shape(to_field%data)
stop
end if
to_field%data = from_field%data
to_field%nx = from_field%nx
to_field%ny = from_field%ny
to_field%dx = from_field%dx
to_field%dy = from_field%dy
end subroutine copy_fields
end module utilities
# Python implementation
The Python implementation utilizes NumPy and mpi4py for computation and
communication, as well as matplotlib for outputting images. The main program
is implemented in [heat.py](heat.py), and the whole program can be executed as
mpirun -np xxx python heat.py
## Using Cython kernel
By default, the code uses pure NumPy for evaluating the time evolution
step. In addition, a Cython version for the main computational kernels
is provided in [evolve.pyx](evolve.pyx). The Cython kernels can be built
into Python extension in current directory with
python setup.py build_ext --inplace
After building the Cython kernels main program can be executed as previously.
This diff is collapsed.
import numpy as np
# Time evolution for the inner part of the grid
def evolve_inner(u, u_previous, a, dt, dx2, dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
u[2:-2, 2:-2] = u_previous[2:-2, 2:-2] + a * dt * ( \
(u_previous[3:-1, 2:-2] - 2*u_previous[2:-2, 2:-2] + \
u_previous[1:-3, 2:-2]) / dx2 + \
(u_previous[2:-2, 3:-1] - 2*u_previous[2:-2, 2:-2] + \
u_previous[2:-2, 1:-3]) / dy2 )
# Time evolution for the edges of the grid
def evolve_edges(u, u_previous, a, dt, dx2, dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
u[1, 1:-1] = u_previous[1, 1:-1] + a * dt * ( \
(u_previous[2, 1:-1] - 2*u_previous[1, 1:-1] + \
u_previous[0, 1:-1]) / dx2 + \
(u_previous[1, 2:] - 2*u_previous[1, 1:-1] + \
u_previous[1, :-2]) / dy2 )
u[-2, 1:-1] = u_previous[-2, 1:-1] + a * dt * ( \
(u_previous[-1, 1:-1] - 2*u_previous[-2, 1:-1] + \
u_previous[-3, 1:-1]) / dx2 + \
(u_previous[-2, 2:] - 2*u_previous[-2, 1:-1] + \
u_previous[-2, :-2]) / dy2 )
u[1:-1, 1] = u_previous[1:-1, 1] + a * dt * ( \
(u_previous[2:, 1] - 2*u_previous[1:-1, 1] + \
u_previous[:-2, 1]) / dx2 + \
(u_previous[1:-1, 2] - 2*u_previous[1:-1, 1] + \
u_previous[1:-1, 0]) / dy2 )
u[1:-1, -2] = u_previous[1:-1, -2] + a * dt * ( \
(u_previous[2:, -2] - 2*u_previous[1:-1, -2] + \
u_previous[:-2, -2]) / dx2 + \
(u_previous[1:-1, -1] - 2*u_previous[1:-1, -2] + \
u_previous[1:-1, -3]) / dy2 )
# cython: profile=True
import numpy as np
cimport numpy as cnp
import cython
# Time evolution for the inner part of the grid
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef evolve_inner(cnp.ndarray[cnp.double_t, ndim=2]u,
cnp.ndarray[cnp.double_t, ndim=2]u_previous,
double a, double dt, double dx2, double dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step. """
cdef int nx = u.shape[0] - 2
cdef int ny = u.shape[1] - 2
cdef int i,j
# Multiplication is more efficient than division
cdef double dx2inv = 1. / dx2
cdef double dy2inv = 1. / dy2
for i in range(2, nx):
for j in range(2, ny):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
# Time evolution for the edges of the grid
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef evolve_edges(cnp.ndarray[cnp.double_t, ndim=2]u,
cnp.ndarray[cnp.double_t, ndim=2]u_previous,
double a, double dt, double dx2, double dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
cdef int nx = u.shape[0] - 2
cdef int ny = u.shape[1] - 2
cdef int i,j
# Multiplication is more efficient than division
cdef double dx2inv = 1. / dx2
cdef double dy2inv = 1. / dy2
j = 1
for i in range(1, nx+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
j = ny
for i in range(1, nx+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
i = 1
for j in range(1, ny+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
i = nx
for j in range(1, ny+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
from mpi4py.MPI import Request
import numpy as np
# Time evolution for the inner part of the grid
def exchange_init(u, parallel):
# Send to the up, receive from down
parallel.requests[0] = parallel.comm.Isend((u[1,:], 1, parallel.rowtype),
dest=parallel.nup)
parallel.requests[1] = parallel.comm.Irecv((u[-1,:], 1, parallel.rowtype),
source=parallel.ndown)
# Send to the down, receive from up
parallel.requests[2] = parallel.comm.Isend((u[-2,:], 1, parallel.rowtype),
dest=parallel.ndown)
parallel.requests[3] = parallel.comm.Irecv((u[0,:], 1, parallel.rowtype),
source=parallel.nup)
# Send to the left, receive from right
parallel.requests[4] = parallel.comm.Isend((u.ravel()[1:], 1,
parallel.columntype),
dest=parallel.nleft)
idx = u.shape[1] - 1 # ny + 1
parallel.requests[5] = parallel.comm.Irecv((u.ravel()[idx:], 1,
parallel.columntype),
source=parallel.nright)
# Send to the right, receive from left
idx = u.shape[1] - 2 # ny
parallel.requests[6] = parallel.comm.Isend((u.ravel()[idx:], 1,
parallel.columntype),
dest=parallel.nright)
parallel.requests[7] = parallel.comm.Irecv((u, 1, parallel.columntype),
source=parallel.nleft)
def exchange_finalize(parallel):
# MPI.Request.Waitall(parallel.requests)
Request.Waitall(parallel.requests)
from __future__ import print_function
import numpy as np
import time
from heat_setup import initialize
from evolve import evolve_inner, evolve_edges
from exchange import exchange_init, exchange_finalize
from heat_io import write_field, write_restart
# Basic parameters
a = 0.5 # Diffusion constant
image_interval = 100 # Write frequency for png files
restart_interval= 200 # Write frequency for restart files
# Grid spacings
dx = 0.01
dy = 0.01
dx2 = dx**2
dy2 = dy**2
# For stability, this is the largest interval possible
# for the size of the time-step:
dt = dx2*dy2 / ( 2*a*(dx2+dy2) )
def main():
# Initialize
field, field0, parallel, iter0, timesteps = initialize()
write_field(field, parallel, iter0)
t0 = time.time()
for iter in range(iter0, iter0 + timesteps):
exchange_init(field0, parallel)
evolve_inner(field, field0, a, dt, dx2, dy2)
exchange_finalize(parallel)
evolve_edges(field, field0, a, dt, dx2, dy2)
if iter % image_interval == 0:
write_field(field, parallel, iter)
if iter % restart_interval == 0:
write_restart(field, parallel, iter)
field, field0 = field0, field
t1 = time.time()
if parallel.rank == 0:
print("Running time: {0}".format(t1-t0))
if __name__ == '__main__':
main()
from __future__ import print_function
import numpy as np
from mpi4py import MPI
from parallel import Parallel
try:
import h5py
use_hdf5 = True
except ImportError:
use_hdf5 = False
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# Set the colormap
plt.rcParams['image.cmap'] = 'BrBG'
def write_field(field, parallel, step):
nx, ny = [i - 2 for i in field.shape]
nx_full = parallel.dims[0] * nx
ny_full = parallel.dims[1] * ny
if parallel.rank == 0:
field_full = np.zeros((nx_full, ny_full), float)
field_full[:nx, :ny] = field[1:-1, 1:-1]
# Receive data from other ranks
for p in range(1, parallel.size):
coords = parallel.comm.Get_coords(p)
idx = coords[0] * nx * ny_full + coords[1] * ny
parallel.comm.Recv((field_full.ravel()[idx:], 1,
parallel.subarraytype), source=p)
# Write out the data
plt.figure()
plt.gca().clear()
plt.imshow(field_full)
plt.axis('off')
plt.savefig('heat_{0:04d}.png'.format(step))
else:
# Send the data
parallel.comm.Ssend((field, 1, parallel.subarraytype), dest=0)
def read_field(filename):
# Read the initial temperature field from file
rank = MPI.COMM_WORLD.Get_rank()
if rank == 0:
with open(filename) as f:
line = f.readline().split()
shape = [int(n) for n in line[1:3]]
else:
shape = None
nx, ny = MPI.COMM_WORLD.bcast(shape, root=0)
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
if parallel.rank == 0:
field_full = np.loadtxt(filename)
field[1:-1, 1:-1] = field_full[:nx_local, :ny_local]
# Send the data to other ranks
for p in range(1, parallel.size):
coords = parallel.comm.Get_coords(p)
idx = coords[0] * nx_local * ny + coords[1] * ny_local
parallel.comm.Send((field_full.ravel()[idx:], 1,
parallel.subarraytype), dest=p)
else:
parallel.comm.Recv((field, 1, parallel.subarraytype),
source=0)
# fo = np.empty((nx_local, ny_local), dtype=float)
# parallel.comm.Recv(fo, source=0)
# import pickle
# pickle.dump(fo, open('fo_f{0}.pckl'.format(parallel.rank), 'w'))
# Set the boundary values
field[0, :] = field[1, :]
field[-1, :] = field[-2, :]
field[:, 0] = field[:, 1]
field[:,-1] = field[:,-2]
field0 = field.copy()
return field, field0, parallel
def write_restart(field, parallel, iter):
nx, ny = [i - 2 for i in field.shape]
nx_full = parallel.dims[0] * nx
ny_full = parallel.dims[1] * ny
mode = MPI.MODE_CREATE | MPI.MODE_WRONLY
fh = MPI.File.Open(parallel.comm, "HEAT_RESTART.dat", mode)
if parallel.rank == 0:
fh.Write(np.array((nx_full, ny_full, iter)))
disp = 3 * 8 # Python ints are 8 bytes
fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
fh.Write_all((field, 1, parallel.restarttype))
fh.Close()
def read_restart():
mode = MPI.MODE_RDONLY
fh = MPI.File.Open(MPI.COMM_WORLD, "HEAT_RESTART.dat", mode)
buf = np.zeros(3, int)
fh.Read_all(buf)
nx, ny, iter = buf[0], buf[1], buf[2]
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
if parallel.rank == 0:
print("Restarting from iteration ", iter)
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
disp = 3 * 8 # Python ints are 8 bytes
fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
fh.Read_all((field, 1, parallel.restarttype))
fh.Close()
field0 = field.copy()
return field, field0, parallel, iter
import numpy as np
import sys
from os.path import isfile
from parallel import Parallel
from heat_io import read_restart, read_field
def initialize():
timesteps = 500 # Number of time-steps to evolve system
nx = 2000
ny = 2000
if isfile('HEAT_RESTART.dat'):
field, field0, parallel, iter0 = read_restart()
else:
if len(sys.argv) == 2:
filename = sys.argv[1]
field, field0, parallel = read_field(filename)
elif len(sys.argv) == 3:
filename = sys.argv[1]
timesteps = int(sys.argv[2])
field, field0, parallel = read_field(filename)
elif len(sys.argv) == 4:
nx = int(sys.argv[1])
ny = int(sys.argv[2])
timesteps = int(sys.argv[3])
field, field0, parallel = generate_field(nx, ny)
else:
field, field0, parallel = generate_field(nx, ny)
iter0 = 0
return field, parallel, iter0, timesteps
def generate_field(nx, ny):
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
# Square of the disk radius
radius2 = (nx / 6.0)**2
X, Y = np.meshgrid(np.arange(nx_local + 2), np.arange(ny_local + 2),
indexing='ij')
ds2 = (X + parallel.coords[0] * nx_local - nx / 2.0 )**2 + \
(Y + parallel.coords[1] * ny_local - ny / 2.0 )**2
field[:] = np.where(ds2 < radius2, 5.0, 65.0)
# Boundaries
if parallel.coords[0] == 0:
field[0,:] = 85.0
if parallel.coords[0] == parallel.dims[0] - 1:
field[-1,:] = 5.0
if parallel.coords[1] == 0:
field[:,0] = 20.0
if parallel.coords[1] == parallel.dims[1] - 1:
field[:,-1] = 70.0
field0 = field.copy()
return field, field0, parallel
from mpi4py import MPI
class Parallel(object):
def __init__(self, nx, ny):
self.requests = []
world = MPI.COMM_WORLD
world_size = world.Get_size()
self.dims = MPI.Compute_dims(world_size, (0,0))
nx_local = nx / self.dims[0]
ny_local = ny / self.dims[1]
if nx_local * self.dims[0] != nx:
print("Cannot divide grid evenly to processors in x-direction!")
print("{0} x {1} != {2}".format(nx_local, self.dims[0], nx))
raise RuntimeError('Invalid domain decomposition')
if ny_local * self.dims[1] != ny:
print("Cannot divide grid evenly to processors in y-direction!")
print("{0} x {1} != {2}".format(ny_local, self.dims[1], ny))
raise RuntimeError('Invalid domain decomposition')
self.comm = world.Create_cart(self.dims, periods=None, reorder=True)
self.nup, self.ndown = self.comm.Shift(0, 1)
self.nleft, self.nright = self.comm.Shift(1, 1)
self.size = self.comm.Get_size()
self.rank = self.comm.Get_rank()
self.coords = self.comm.coords
if self.rank == 0:
print("Using domain decomposition {0} x {1}".format(self.dims[0],
self.dims[1]))
print("Local domain size {0} x {1}".format(nx_local, ny_local))
# Maybe there is more elegant way to initialize the requests array?
self.requests = [0, ] * 8
# Datatypes for halo exchange
self.rowtype = MPI.DOUBLE.Create_contiguous(ny_local + 2)
self.rowtype.Commit()
self.columntype = MPI.DOUBLE.Create_vector(nx_local + 2, 1,
ny_local + 2)
self.columntype.Commit()
# Datatypes for subblock needed in text I/O
# Rank 0 uses datatype for receiving data into full array while
# other ranks use datatype for sending the inner part of array
subsizes = [nx_local, ny_local]
if self.rank == 0:
sizes = [nx, ny]
offsets = [0, 0]
else:
sizes = [nx_local + 2, ny_local + 2]
offsets = [1, 1]
self.subarraytype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.subarraytype.Commit()
# Datatypes for restart I/O
sizes = [nx + 2, ny + 2]
offsets = [1 + self.coords[0] * nx_local, 1 + self.coords[1] * ny_local]
if self.coords[0] == 0:
offsets[0] -= 1
subsizes[0] += 1
if self.coords[0] == (self.dims[0] - 1):
subsizes[0] += 1
if self.coords[1] == 0:
offsets[1] -= 1
subsizes[1] += 1
if self.coords[1] == (self.dims[1] - 1):
subsizes[1] += 1
self.filetype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.filetype.Commit()
sizes = [nx_local + 2, ny_local + 2]
offsets = [1, 1]
if self.coords[0] == 0:
offsets[0] = 0
if self.coords[1] == 0:
offsets[1] = 0
self.restarttype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.restarttype.Commit()
from distutils.core import setup, Extension
from Cython.Build import cythonize
setup(
ext_modules=cythonize("evolve.pyx"),
)
Copyright (C) 2018 CSC - IT Center for Science Ltd.
Licensed under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Code is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Copy of the GNU General Public License can be obtained from
<http://www.gnu.org/licenses/>.
## Parallel Hello World
a) Write a simple parallel program that prints out something
(e.g. “Hello world!”) from multiple processes. Include the MPI headers
(C), use the MPI module (Fortran), or import mpi4py (Python) and
call appropriate initialization and finalization routines.
b) Modify the program so that each process prints out also its rank
and so that the process with rank 0 prints out the total number of MPI
processes as well.
program hello
use mpi
implicit none
integer :: rc, myid, ntasks
call MPI_INIT(rc)
call MPI_COMM_SIZE(MPI_COMM_WORLD, ntasks, rc)
call MPI_COMM_RANK(MPI_COMM_WORLD, myid, rc)
if(myid == 0) then
write(*,*) 'In total there are ',ntasks, 'tasks'
endif
write(*,*) 'Hello from ',myid
call MPI_FINALIZE(rc)
end program hello
#include<stdio.h>
#include<stdlib.h>
#include<mpi.h>
int main(int argc, char *argv[])
{
int i, myid, ntasks;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
if (myid == 0) {
printf("In total there are %i tasks\n", ntasks);
}
printf("Hello from %i\n", myid);
MPI_Finalize();
return 0;
}
from __future__ import print_function
from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
print("In total there are {0} tasks".format(size))
print("Hello from", rank)