Skip to content
! PNG writer for heat equation solver
module pngwriter
use heat
contains
function save_png(data, nx, ny, fname) result(stat)
use, intrinsic :: ISO_C_BINDING
implicit none
real(dp), dimension(:,:), intent(in) :: data
integer, intent(in) :: nx, ny
character(len=*), intent(in) :: fname
integer :: stat
! Interface for save_png C-function
interface
! The C-function definition is
! int save_png(double *data, const int nx, const int ny,
! const char *fname)
function save_png_c(data, nx, ny, fname, order) &
& bind(C,name="save_png") result(stat)
use, intrinsic :: ISO_C_BINDING
implicit none
real(kind=C_DOUBLE) :: data(*)
integer(kind=C_INT), value, intent(IN) :: nx, ny
character(kind=C_CHAR), intent(IN) :: fname(*)
character(kind=C_CHAR), value, intent(IN) :: order
integer(kind=C_INT) :: stat
end function save_png_c
end interface
stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
if (stat /= 0) then
write(*,*) 'save_png returned error!'
end if
end function save_png
end module pngwriter
! Setup routines for heat equation solver
module setup
use heat
contains
subroutine initialize(previous, current, nsteps, parallel, iter0)
use utilities
use io
implicit none
type(field), intent(out) :: previous, current
integer, intent(out) :: nsteps, iter0
type(parallel_data), intent(out) :: parallel
integer :: rows, cols
logical :: using_input_file, restart
character(len=85) :: input_file, arg ! Input file name and command line arguments
! Default values for grid size and time steps
rows = 2000
cols = 2000
nsteps = 500
using_input_file = .false.
iter0 = 0
! Read in the command line arguments and
! set up the needed variables
select case(command_argument_count())
case(0) ! No arguments -> default values
case(1) ! One argument -> input file name
using_input_file = .true.
call get_command_argument(1, input_file)
case(2) ! Two arguments -> input file name and number of steps
using_input_file = .true.
call get_command_argument(1, input_file)
call get_command_argument(2, arg)
read(arg, *) nsteps
case(3) ! Three arguments -> rows, cols and nsteps
call get_command_argument(1, arg)
read(arg, *) rows
call get_command_argument(2, arg)
read(arg, *) cols
call get_command_argument(3, arg)
read(arg, *) nsteps
case default
call usage()
stop
end select
! Check if checkpoint exists
inquire (file="HEAT_RESTART.dat", exist=restart)
if (restart) then
call read_restart(previous, parallel, iter0)
if (parallel % rank == 0) then
write(*,'(a)') 'Restarting from an earlier checkpoint'
write(*,'(a,i0)') ' saved at iteration ', iter0
end if
call copy_fields(previous, current)
! Initialize the fields according the command line arguments
else if (using_input_file) then
call read_field(previous, input_file, parallel)
call copy_fields(previous, current)
else
call parallel_setup(parallel, rows, cols)
call set_field_dimensions(previous, rows, cols, parallel)
call set_field_dimensions(current, rows, cols, parallel)
call generate_field(previous, parallel)
call copy_fields(previous, current)
end if
end subroutine initialize
! Generate initial the temperature field. Pattern is disc with a radius
! of nx_full / 6 in the center of the grid.
! Boundary conditions are (different) constant temperatures outside the grid
subroutine generate_field(field0, parallel)
use heat
implicit none
type(field), intent(inout) :: field0
type(parallel_data), intent(in) :: parallel
real(dp) :: radius2, ds2
integer :: i, j
integer, dimension(2) :: dims, coords
logical :: periods(2)
integer :: ierr
! The arrays for field contain also a halo region
allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
call mpi_cart_get(parallel%comm, 2, dims, periods, coords, ierr)
! Square of the disk radius
radius2 = (field0%nx_full / 6.0)**2
do j = 0, field0%ny + 1
do i = 0, field0%nx + 1
ds2 = (i + coords(1) * field0%nx - field0%nx_full / 2.0 + 1)**2 + &
(j + coords(2) * field0%ny - field0%ny_full / 2.0 + 1)**2
if (ds2 < radius2) then
field0%data(i,j) = 5.0
else
field0%data(i,j) = 65.0
end if
end do
end do
! Left boundary
if (coords(2) == 0) then
field0%data(:,0) = 20.0_dp
end if
! Upper boundary
if (coords(1) == 0) then
field0%data(0,:) = 85.0_dp
end if
! Right boundary
if (coords(2) == dims(2) - 1) then
field0%data(:,field0%ny+1) = 70.0_dp
end if
! Lower boundary
if (coords(1) == dims(1) - 1) then
field0%data(field0%nx+1,:) = 5.0_dp
end if
end subroutine generate_field
! Clean up routine for field type
! Arguments:
! field0 (type(field)): field variable to be cleared
subroutine finalize(field0, field1)
use heat
implicit none
type(field), intent(inout) :: field0, field1
deallocate(field0%data)
deallocate(field1%data)
end subroutine finalize
! Helper routine that prints out a simple usage if
! user gives more than three arguments
subroutine usage()
implicit none
character(len=256) :: buf
call get_command_argument(0, buf)
write (*,'(A)') 'Usage:'
write (*,'(A, " (default values will be used)")') trim(buf)
write (*,'(A, " <filename>")') trim(buf)
write (*,'(A, " <filename> <nsteps>")') trim(buf)
write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
end subroutine usage
end module setup
! Utility routines for heat equation solver
! NOTE: This file does not need to be edited!
module utilities
use heat
contains
! Swap the data fields of two variables of type field
! Arguments:
! curr, prev (type(field)): the two variables that are swapped
subroutine swap_fields(curr, prev)
implicit none
type(field), intent(inout) :: curr, prev
real(dp), allocatable, dimension(:,:) :: tmp
call move_alloc(curr%data, tmp)
call move_alloc(prev%data, curr%data)
call move_alloc(tmp, prev%data)
end subroutine swap_fields
! Copy the data from one field to another
! Arguments:
! from_field (type(field)): variable to copy from
! to_field (type(field)): variable to copy to
subroutine copy_fields(from_field, to_field)
implicit none
type(field), intent(in) :: from_field
type(field), intent(out) :: to_field
! Consistency checks
if (.not.allocated(from_field%data)) then
write (*,*) "Can not copy from a field without allocated data"
stop
end if
if (.not.allocated(to_field%data)) then
! Target is not initialize, allocate memory
allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
& lbound(from_field%data, 2):ubound(from_field%data, 2)))
else if (any(shape(from_field%data) /= shape(to_field%data))) then
write (*,*) "Wrong field data sizes in copy routine"
print *, shape(from_field%data), shape(to_field%data)
stop
end if
to_field%data = from_field%data
to_field%nx = from_field%nx
to_field%ny = from_field%ny
to_field%dx = from_field%dx
to_field%dy = from_field%dy
end subroutine copy_fields
end module utilities
This diff is collapsed.
import numpy as np
# Time evolution for the inner part of the grid
def evolve_inner(u, u_previous, a, dt, dx2, dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
u[2:-2, 2:-2] = u_previous[2:-2, 2:-2] + a * dt * ( \
(u_previous[3:-1, 2:-2] - 2*u_previous[2:-2, 2:-2] + \
u_previous[1:-3, 2:-2]) / dx2 + \
(u_previous[2:-2, 3:-1] - 2*u_previous[2:-2, 2:-2] + \
u_previous[2:-2, 1:-3]) / dy2 )
# Time evolution for the edges of the grid
def evolve_edges(u, u_previous, a, dt, dx2, dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
u[1, 1:-1] = u_previous[1, 1:-1] + a * dt * ( \
(u_previous[2, 1:-1] - 2*u_previous[1, 1:-1] + \
u_previous[0, 1:-1]) / dx2 + \
(u_previous[1, 2:] - 2*u_previous[1, 1:-1] + \
u_previous[1, :-2]) / dy2 )
u[-2, 1:-1] = u_previous[-2, 1:-1] + a * dt * ( \
(u_previous[-1, 1:-1] - 2*u_previous[-2, 1:-1] + \
u_previous[-3, 1:-1]) / dx2 + \
(u_previous[-2, 2:] - 2*u_previous[-2, 1:-1] + \
u_previous[-2, :-2]) / dy2 )
u[1:-1, 1] = u_previous[1:-1, 1] + a * dt * ( \
(u_previous[2:, 1] - 2*u_previous[1:-1, 1] + \
u_previous[:-2, 1]) / dx2 + \
(u_previous[1:-1, 2] - 2*u_previous[1:-1, 1] + \
u_previous[1:-1, 0]) / dy2 )
u[1:-1, -2] = u_previous[1:-1, -2] + a * dt * ( \
(u_previous[2:, -2] - 2*u_previous[1:-1, -2] + \
u_previous[:-2, -2]) / dx2 + \
(u_previous[1:-1, -1] - 2*u_previous[1:-1, -2] + \
u_previous[1:-1, -3]) / dy2 )
# cython: profile=True
import numpy as np
cimport numpy as cnp
import cython
# Time evolution for the inner part of the grid
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef evolve_inner(cnp.ndarray[cnp.double_t, ndim=2]u,
cnp.ndarray[cnp.double_t, ndim=2]u_previous,
double a, double dt, double dx2, double dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step. """
cdef int nx = u.shape[0] - 2
cdef int ny = u.shape[1] - 2
cdef int i,j
# Multiplication is more efficient than division
cdef double dx2inv = 1. / dx2
cdef double dy2inv = 1. / dy2
for i in range(2, nx):
for j in range(2, ny):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
# Time evolution for the edges of the grid
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef evolve_edges(cnp.ndarray[cnp.double_t, ndim=2]u,
cnp.ndarray[cnp.double_t, ndim=2]u_previous,
double a, double dt, double dx2, double dy2):
"""Explicit time evolution.
u: new temperature field
u_previous: previous field
a: diffusion constant
dt: time step
dx2: grid spacing squared, i.e. dx^2
dy2: -- "" -- , i.e. dy^2"""
cdef int nx = u.shape[0] - 2
cdef int ny = u.shape[1] - 2
cdef int i,j
# Multiplication is more efficient than division
cdef double dx2inv = 1. / dx2
cdef double dy2inv = 1. / dy2
j = 1
for i in range(1, nx+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
j = ny
for i in range(1, nx+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
i = 1
for j in range(1, ny+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
i = nx
for j in range(1, ny+1):
u[i, j] = u_previous[i, j] + a * dt * ( \
(u_previous[i+1, j] - 2*u_previous[i, j] + \
u_previous[i-1, j]) * dx2inv + \
(u_previous[i, j+1] - 2*u_previous[i, j] + \
u_previous[i, j-1]) * dy2inv )
from mpi4py.MPI import Request
import numpy as np
# Time evolution for the inner part of the grid
def exchange_init(u, parallel):
# Send to the up, receive from down
parallel.requests[0] = parallel.comm.Isend((u[1,:], 1, parallel.rowtype),
dest=parallel.nup)
parallel.requests[1] = parallel.comm.Irecv((u[-1,:], 1, parallel.rowtype),
source=parallel.ndown)
# Send to the down, receive from up
parallel.requests[2] = parallel.comm.Isend((u[-2,:], 1, parallel.rowtype),
dest=parallel.ndown)
parallel.requests[3] = parallel.comm.Irecv((u[0,:], 1, parallel.rowtype),
source=parallel.nup)
# Send to the left, receive from right
parallel.requests[4] = parallel.comm.Isend((u.ravel()[1:], 1,
parallel.columntype),
dest=parallel.nleft)
idx = u.shape[1] - 1 # ny + 1
parallel.requests[5] = parallel.comm.Irecv((u.ravel()[idx:], 1,
parallel.columntype),
source=parallel.nright)
# Send to the right, receive from left
idx = u.shape[1] - 2 # ny
parallel.requests[6] = parallel.comm.Isend((u.ravel()[idx:], 1,
parallel.columntype),
dest=parallel.nright)
parallel.requests[7] = parallel.comm.Irecv((u, 1, parallel.columntype),
source=parallel.nleft)
def exchange_finalize(parallel):
# MPI.Request.Waitall(parallel.requests)
Request.Waitall(parallel.requests)
from __future__ import print_function
import numpy as np
import time
from heat_setup import initialize
from evolve import evolve_inner, evolve_edges
from exchange import exchange_init, exchange_finalize
from heat_io import write_field, write_restart
# Basic parameters
a = 0.5 # Diffusion constant
image_interval = 100 # Write frequency for png files
restart_interval= 200 # Write frequency for restart files
# Grid spacings
dx = 0.01
dy = 0.01
dx2 = dx**2
dy2 = dy**2
# For stability, this is the largest interval possible
# for the size of the time-step:
dt = dx2*dy2 / ( 2*a*(dx2+dy2) )
def main():
# Initialize
field, field0, parallel, iter0, timesteps = initialize()
write_field(field, parallel, iter0)
t0 = time.time()
for iter in range(iter0, iter0 + timesteps):
exchange_init(field0, parallel)
evolve_inner(field, field0, a, dt, dx2, dy2)
exchange_finalize(parallel)
evolve_edges(field, field0, a, dt, dx2, dy2)
if iter % image_interval == 0:
write_field(field, parallel, iter)
if iter % restart_interval == 0:
write_restart(field, parallel, iter)
field, field0 = field0, field
t1 = time.time()
if parallel.rank == 0:
print("Running time: {0}".format(t1-t0))
if __name__ == '__main__':
main()
from __future__ import print_function
import numpy as np
from mpi4py import MPI
from parallel import Parallel
try:
import h5py
use_hdf5 = True
except ImportError:
use_hdf5 = False
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# Set the colormap
plt.rcParams['image.cmap'] = 'BrBG'
def write_field(field, parallel, step):
nx, ny = [i - 2 for i in field.shape]
nx_full = parallel.dims[0] * nx
ny_full = parallel.dims[1] * ny
if parallel.rank == 0:
field_full = np.zeros((nx_full, ny_full), float)
field_full[:nx, :ny] = field[1:-1, 1:-1]
# Receive data from other ranks
for p in range(1, parallel.size):
coords = parallel.comm.Get_coords(p)
idx = coords[0] * nx * ny_full + coords[1] * ny
parallel.comm.Recv((field_full.ravel()[idx:], 1,
parallel.subarraytype), source=p)
# Write out the data
plt.figure()
plt.gca().clear()
plt.imshow(field_full)
plt.axis('off')
plt.savefig('heat_{0:04d}.png'.format(step))
else:
# Send the data
parallel.comm.Ssend((field, 1, parallel.subarraytype), dest=0)
def read_field(filename):
# Read the initial temperature field from file
rank = MPI.COMM_WORLD.Get_rank()
if rank == 0:
with open(filename) as f:
line = f.readline().split()
shape = [int(n) for n in line[1:3]]
else:
shape = None
nx, ny = MPI.COMM_WORLD.bcast(shape, root=0)
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
if parallel.rank == 0:
field_full = np.loadtxt(filename)
field[1:-1, 1:-1] = field_full[:nx_local, :ny_local]
# Send the data to other ranks
for p in range(1, parallel.size):
coords = parallel.comm.Get_coords(p)
idx = coords[0] * nx_local * ny + coords[1] * ny_local
parallel.comm.Send((field_full.ravel()[idx:], 1,
parallel.subarraytype), dest=p)
else:
parallel.comm.Recv((field, 1, parallel.subarraytype),
source=0)
# fo = np.empty((nx_local, ny_local), dtype=float)
# parallel.comm.Recv(fo, source=0)
# import pickle
# pickle.dump(fo, open('fo_f{0}.pckl'.format(parallel.rank), 'w'))
# Set the boundary values
field[0, :] = field[1, :]
field[-1, :] = field[-2, :]
field[:, 0] = field[:, 1]
field[:,-1] = field[:,-2]
field0 = field.copy()
return field, field0, parallel
def write_restart(field, parallel, iter):
nx, ny = [i - 2 for i in field.shape]
nx_full = parallel.dims[0] * nx
ny_full = parallel.dims[1] * ny
mode = MPI.MODE_CREATE | MPI.MODE_WRONLY
fh = MPI.File.Open(parallel.comm, "HEAT_RESTART.dat", mode)
if parallel.rank == 0:
fh.Write(np.array((nx_full, ny_full, iter)))
disp = 3 * 8 # Python ints are 8 bytes
fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
fh.Write_all((field, 1, parallel.restarttype))
fh.Close()
def read_restart():
mode = MPI.MODE_RDONLY
fh = MPI.File.Open(MPI.COMM_WORLD, "HEAT_RESTART.dat", mode)
buf = np.zeros(3, int)
fh.Read_all(buf)
nx, ny, iter = buf[0], buf[1], buf[2]
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
if parallel.rank == 0:
print("Restarting from iteration ", iter)
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
disp = 3 * 8 # Python ints are 8 bytes
fh.Set_view(disp, MPI.DOUBLE, parallel.filetype)
fh.Read_all((field, 1, parallel.restarttype))
fh.Close()
field0 = field.copy()
return field, field0, parallel, iter
import numpy as np
import sys
from os.path import isfile
from parallel import Parallel
from heat_io import read_restart, read_field
def initialize():
timesteps = 500 # Number of time-steps to evolve system
nx = 1000
ny = 1000
if isfile('HEAT_RESTART.dat'):
field, field0, parallel, iter0 = read_restart()
else:
if len(sys.argv) == 2:
filename = sys.argv[1]
field, field0, parallel = read_field(filename)
elif len(sys.argv) == 3:
filename = sys.argv[1]
timesteps = int(sys.argv[2])
field, field0, parallel = read_field(filename)
elif len(sys.argv) == 4:
nx = int(sys.argv[1])
ny = int(sys.argv[2])
timesteps = int(sys.argv[3])
field, field0, parallel = generate_field(nx, ny)
else:
field, field0, parallel = generate_field(nx, ny)
iter0 = 0
return field, field0, parallel, iter0, timesteps
def generate_field(nx, ny):
parallel = Parallel(nx, ny)
nx_local = nx / parallel.dims[0]
ny_local = ny / parallel.dims[1]
field = np.zeros((nx_local + 2, ny_local + 2), dtype=float)
# Square of the disk radius
radius2 = (nx / 6.0)**2
X, Y = np.meshgrid(np.arange(nx_local + 2), np.arange(ny_local + 2),
indexing='ij')
ds2 = (X + parallel.coords[0] * nx_local - nx / 2.0 )**2 + \
(Y + parallel.coords[1] * ny_local - ny / 2.0 )**2
field[:] = np.where(ds2 < radius2, 5.0, 65.0)
# Boundaries
if parallel.coords[0] == 0:
field[0,:] = 85.0
if parallel.coords[0] == parallel.dims[0] - 1:
field[-1,:] = 5.0
if parallel.coords[1] == 0:
field[:,0] = 20.0
if parallel.coords[1] == parallel.dims[1] - 1:
field[:,-1] = 70.0
field0 = field.copy()
return field, field0, parallel
from mpi4py import MPI
class Parallel(object):
def __init__(self, nx, ny):
self.requests = []
world = MPI.COMM_WORLD
world_size = world.Get_size()
self.dims = MPI.Compute_dims(world_size, (0,0))
nx_local = nx / self.dims[0]
ny_local = ny / self.dims[1]
if nx_local * self.dims[0] != nx:
print("Cannot divide grid evenly to processors in x-direction!")
print("{0} x {1} != {2}".format(nx_local, self.dims[0], nx))
raise RuntimeError('Invalid domain decomposition')
if ny_local * self.dims[1] != ny:
print("Cannot divide grid evenly to processors in y-direction!")
print("{0} x {1} != {2}".format(ny_local, self.dims[1], ny))
raise RuntimeError('Invalid domain decomposition')
self.comm = world.Create_cart(self.dims, periods=None, reorder=True)
self.nup, self.ndown = self.comm.Shift(0, 1)
self.nleft, self.nright = self.comm.Shift(1, 1)
self.size = self.comm.Get_size()
self.rank = self.comm.Get_rank()
self.coords = self.comm.coords
if self.rank == 0:
print("Using domain decomposition {0} x {1}".format(self.dims[0],
self.dims[1]))
print("Local domain size {0} x {1}".format(nx_local, ny_local))
# Maybe there is more elegant way to initialize the requests array?
self.requests = [0, ] * 8
# Datatypes for halo exchange
self.rowtype = MPI.DOUBLE.Create_contiguous(ny_local + 2)
self.rowtype.Commit()
self.columntype = MPI.DOUBLE.Create_vector(nx_local + 2, 1,
ny_local + 2)
self.columntype.Commit()
# Datatypes for subblock needed in text I/O
# Rank 0 uses datatype for receiving data into full array while
# other ranks use datatype for sending the inner part of array
subsizes = [nx_local, ny_local]
if self.rank == 0:
sizes = [nx, ny]
offsets = [0, 0]
else:
sizes = [nx_local + 2, ny_local + 2]
offsets = [1, 1]
self.subarraytype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.subarraytype.Commit()
# Datatypes for restart I/O
sizes = [nx + 2, ny + 2]
offsets = [1 + self.coords[0] * nx_local, 1 + self.coords[1] * ny_local]
if self.coords[0] == 0:
offsets[0] -= 1
subsizes[0] += 1
if self.coords[0] == (self.dims[0] - 1):
subsizes[0] += 1
if self.coords[1] == 0:
offsets[1] -= 1
subsizes[1] += 1
if self.coords[1] == (self.dims[1] - 1):
subsizes[1] += 1
self.filetype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.filetype.Commit()
sizes = [nx_local + 2, ny_local + 2]
offsets = [1, 1]
if self.coords[0] == 0:
offsets[0] = 0
if self.coords[1] == 0:
offsets[1] = 0
self.restarttype = MPI.DOUBLE.Create_subarray(sizes, subsizes, offsets)
self.restarttype.Commit()
from distutils.core import setup, Extension
from Cython.Build import cythonize
setup(
ext_modules=cythonize("evolve.pyx"),
)
Copyright (C) 2018 CSC - IT Center for Science Ltd.
Licensed under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Code is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Copy of the GNU General Public License can be obtained from
<http://www.gnu.org/licenses/>.
## Parallel Hello World
a) Write a simple parallel program that prints out something
(e.g. “Hello world!”) from multiple processes. Include the MPI headers
(C), use the MPI module (Fortran), or import mpi4py (Python) and
call appropriate initialization and finalization routines.
b) Modify the program so that each process prints out also its rank
and so that the process with rank 0 prints out the total number of MPI
processes as well.
program hello
use mpi
implicit none
integer :: rc, myid, ntasks
call MPI_INIT(rc)
call MPI_COMM_SIZE(MPI_COMM_WORLD, ntasks, rc)
call MPI_COMM_RANK(MPI_COMM_WORLD, myid, rc)
if(myid == 0) then
write(*,*) 'In total there are ',ntasks, 'tasks'
endif
write(*,*) 'Hello from ',myid
call MPI_FINALIZE(rc)
end program hello
#include<stdio.h>
#include<stdlib.h>
#include<mpi.h>
int main(int argc, char *argv[])
{
int i, myid, ntasks;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
if (myid == 0) {
printf("In total there are %i tasks\n", ntasks);
}
printf("Hello from %i\n", myid);
MPI_Finalize();
return 0;
}
from __future__ import print_function
from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
print("In total there are {0} tasks".format(size))
print("Hello from", rank)
Copyright (C) 2018 CSC - IT Center for Science Ltd.
Licensed under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Code is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Copy of the GNU General Public License can be obtained from
<http://www.gnu.org/licenses/>.
## Message chain
Write a simple program where every processor sends data to the next
one. Let ntasks be the number of the tasks, and myid the rank of the
current process. Your program should work as follows:
- Every task with a rank less than ntasks-1 sends a message to task
myid+1. For example, task 0 sends a message to task 1.
- The message content is an integer array where each element is
initialized to myid.
- The message tag is the receiver’s id number.
- The sender prints out the number of elements it sends and the tag
number. All tasks with rank &ge; 1 receive messages.
- Each receiver prints out their myid, and the first element in the
received array.
a) Implement the program described above using `MPI_Send` and
`MPI_Recv`. You may start from scratch or use as a starting point
[c/skeleton.c](c/skeleton.c), [fortran/skeleton.F90](fortran/skeleton.F90) or
[python/skeleton.py](c/skeleton.py). Investigate the timings with different
numbers of MPI tasks (e.g. 2, 4, 8, 16, ...), and pay attention especially to
rank 0. Can you explain the behaviour?
b) Rewrite the program using `MPI_Sendrecv` instead of `MPI_Send` and
`MPI_Recv`. Investigate again timings with different numbers of MPI
tasks (e.g. 2, 4, 8, 16, ...). Can you explain the differences to case a)?
c) Try to simplify the code by employing the `MPI_PROC_NULL` in treating the
special cases of the first and last task in the chain.
d) Rewrite the program using non-blocking communication (MPI_Isend and
MPI_Irecv).
c) Rewrite the program using persistent communication operations
(MPI_Send_init, MPI_Recv_init, etc.).
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
int i, myid, ntasks;
int size = 10000000;
int *message;
int *receiveBuffer;
MPI_Status status;
double t0, t1;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
/* Allocate message buffers */
message = malloc(sizeof(int) * size);
receiveBuffer = malloc(sizeof(int) * size);
/* Initialize message */
for (i = 0; i < size; i++) {
message[i] = myid;
}
/* Start measuring the time spent in communication */
MPI_Barrier(MPI_COMM_WORLD);
t0 = MPI_Wtime();
/* TODO start */
/* Send and receive messages as defined in exercise */
if (myid < ntasks - 1) {
printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
myid, size, myid + 1, myid + 1);
}
if (myid > 0) {
printf("Receiver: %d. first element %d.\n",
myid, receiveBuffer[0]);
}
/* TODO end */
/* Finalize measuring the time and print it out */
t1 = MPI_Wtime();
MPI_Barrier(MPI_COMM_WORLD);
fflush(stdout);
if (myid == 0) {
printf("\n");
printf("Timings:\n");
printf("Time elapsed in rank %2d: %6.3f\n", myid, t1 - t0);
}
MPI_Barrier(MPI_COMM_WORLD);
fflush(stdout);
if (myid == ntasks - 1) {
printf("Time elapsed in rank %2d: %6.3f\n", myid, t1 - t0);
}
free(message);
free(receiveBuffer);
MPI_Finalize();
return 0;
}