From 4076693e8668d74af8685e1561e1c942f1f1e7bd Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 8 Apr 2016 13:06:02 +0200 Subject: [PATCH 1/6] Added training materials for the Chapel Programming Language --- README.md | 14 +++++++++ heat_equation/Makefile | 20 ++++++++++++ heat_equation/src/multiple_machines.chpl | 40 ++++++++++++++++++++++++ heat_equation/src/single_machine.chpl | 38 ++++++++++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 README.md create mode 100644 heat_equation/Makefile create mode 100644 heat_equation/src/multiple_machines.chpl create mode 100644 heat_equation/src/single_machine.chpl diff --git a/README.md b/README.md new file mode 100644 index 0000000..084c70b --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +Chapel +====== + +Compilation instructions +------------------------ +There are no specific requirements for building examples, +just standard make, working MPI environment (for MPI examples) and +OpenMP enabled C or Fortran compiler (for OpenMP examples). + +Move to proper subfolder (C or Fortran) and modify the top of the **Makefile** +according to your environment (proper compiler commands and compiler flags). + +All examples can be built with simple **make**, **make mpi** builds the MPI +examples and **make omp** OpenMP examples. diff --git a/heat_equation/Makefile b/heat_equation/Makefile new file mode 100644 index 0000000..0768425 --- /dev/null +++ b/heat_equation/Makefile @@ -0,0 +1,20 @@ +# Makefile that builds each src/*.chpl file into a binary in bin/* + +CC=chpl +CFLAGS=-g +LDFLAGS= + +SRC=$(wildcard src/*.chpl) +PROGRAM=$(addprefix bin/, $(subst .chpl,, $(subst src/,,$(SRC)))) + +all: mkdir $(PROGRAM) + +bin/% : src/%.chpl + $(CC) $(CFLAGS) -o $@ $< + +.PHONY: clean mkdir + +mkdir: + mkdir -p bin +clean: + rm -R bin diff --git a/heat_equation/src/multiple_machines.chpl b/heat_equation/src/multiple_machines.chpl new file mode 100644 index 0000000..9418082 --- /dev/null +++ b/heat_equation/src/multiple_machines.chpl @@ -0,0 +1,40 @@ +use BlockDist; + +config const n = 8;//Size of the domain squired +config const epsilon = 1.0e-10;//Stop condition in amount of change +config var iterations = 1000;//Stop condition in number of iterations + +//A n+2 by n+2 domain. +const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); + +//A n by n domain that represents the interior of 'Grid' +const Interior = {1..n, 1..n}; + +var A, T : [Grid] real;//Zero initialized as default + +A[..,0] = -273.15; //Left column +A[..,n+1] = -273.15; //Right column +A[n+1,..] = -273.15; //Bottom row +A[0,..] = 40.0; //Top row + +do{ + + //Since all iterations are independent, we can use 'forall', which allows + //the Chapel runtime system to calculate the iterations in parallel + forall (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + //Delta is the total amount of change done in this iteration + const delta = + reduce abs(A[Interior] - T[Interior]); + + //Copy back the non-border cells + A[Interior] = T[Interior]; + + //When 'delta' is smaller than 'epsilon' the calculation has converged + iterations -= 1; +} while (delta > epsilon && iterations > 0); + + diff --git a/heat_equation/src/single_machine.chpl b/heat_equation/src/single_machine.chpl new file mode 100644 index 0000000..9453d75 --- /dev/null +++ b/heat_equation/src/single_machine.chpl @@ -0,0 +1,38 @@ +config const n = 8;//Size of the domain squired +config const epsilon = 1.0e-10;//Stop condition in amount of change +config var iterations = 1000;//Stop condition in number of iterations + +//A n+2 by n+2 domain. +const Grid = {0..n+1, 0..n+1}; + +//A n by n domain that represents the interior of 'Grid' +const Interior = {1..n, 1..n}; + +var A, T : [Grid] real;//Zero initialized as default + +A[..,0] = -273.15; //Left column +A[..,n+1] = -273.15; //Right column +A[n+1,..] = -273.15; //Bottom row +A[0,..] = 40.0; //Top row + +do{ + + //Since all iterations are independent, we can use 'forall', which allows + //the Chapel runtime system to calculate the iterations in parallel + forall (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + //Delta is the total amount of change done in this iteration + const delta = + reduce abs(A[Interior] - T[Interior]); + + //Copy back the non-border cells + A[Interior] = T[Interior]; + + //When 'delta' is smaller than 'epsilon' the calculation has converged + iterations -= 1; +} while (delta > epsilon && iterations > 0); + + -- GitLab From d87e74ee4f5ebd817e7d6b5c2a9ec6cf3d908390 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 8 Apr 2016 16:22:36 +0200 Subject: [PATCH 2/6] added a sequential implementation of the heat equation --- README.md | 14 ------- README.rst | 9 +++++ heat_equation/README.rst | 51 ++++++++++++++++++++++++ heat_equation/src/multiple_machines.chpl | 2 +- heat_equation/src/sequential.chpl | 38 ++++++++++++++++++ 5 files changed, 99 insertions(+), 15 deletions(-) delete mode 100644 README.md create mode 100644 README.rst create mode 100644 heat_equation/README.rst create mode 100644 heat_equation/src/sequential.chpl diff --git a/README.md b/README.md deleted file mode 100644 index 084c70b..0000000 --- a/README.md +++ /dev/null @@ -1,14 +0,0 @@ -Chapel -====== - -Compilation instructions ------------------------- -There are no specific requirements for building examples, -just standard make, working MPI environment (for MPI examples) and -OpenMP enabled C or Fortran compiler (for OpenMP examples). - -Move to proper subfolder (C or Fortran) and modify the top of the **Makefile** -according to your environment (proper compiler commands and compiler flags). - -All examples can be built with simple **make**, **make mpi** builds the MPI -examples and **make omp** OpenMP examples. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..b1f7fe1 --- /dev/null +++ b/README.rst @@ -0,0 +1,9 @@ +Chapel +====== + +Compilation instructions +------------------------ +In order to compile and run these examples, the only requirement is a working Chapel compiler and make. You can download Chapel at http://chapel.cray.com/. + +All examples can be built with simple **make**. + diff --git a/heat_equation/README.rst b/heat_equation/README.rst new file mode 100644 index 0000000..440a205 --- /dev/null +++ b/heat_equation/README.rst @@ -0,0 +1,51 @@ +Heat Equation +============= + +In this example, we solve the heat equation. The idea is to apply a 5-point stencil on a domain iteratively until equilibrium. + +Sequential +---------- + +`sequential.chpl ` is a sequential implementation of the heat equation written in Chapel. The stencil computation is the most time consuming part of the code and look like:: + + for (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + +Basically, each *interior* element in ``T`` gets the mean of the corresponding element in ``A`` as well as the neighboring elements. Since ``for`` is a sequential language construct in Chapel, a single CPU-core will execute this code. + + +Multi-core +---------- + +In order to improve the performance, we can tell Chapel to use threads to execute the stencil operations in parallel (`single_machine.chpl `). We do that by replacing ``for`` with ``forall``, which tells Chapel to execute each iteration in ``Interior`` parallel. +It is our responsibility to make sure that each iteration in the ``forall`` loop is independent in order not to introduce race conditions. + +Clearly in this case iteration is clearly independent since we do not read ``T``:: + + forall (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + +Multiple Machines +----------------- + +In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines (`multiple_machines.chpl `). +We still use the ``forall`` loop construct, be we have to tell Chapel how to distributes ``A`` and ``T`` between the multiple machines. For that, we use the ``dmapped`` language construct when defining the ``Grid`` and ``Interior`` domain:: + + //A n+2 by n+2 domain. + const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); + + //A n by n domain that represents the interior of 'Grid' + const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); + + var A, T : [Grid] real;//Zero initialized as default + +We tell Chapel to use the same *block* distribution of the ``Grid`` and ``Interior`` domain such that each index in ``Grid`` has the same location as the corresponding index in ``Interior``. Because they use the same distribution, no communication is needed when accessing the same index. For example, the operations ``A[2,4] + T[2,4]`` can be done locally on the machine that *owns* index ``[2,4]``. However, it also means that a operations such as ``A[2,4] + T[3,4]`` will generally require communication. + +In relation to HPC, it is very importation use ``dmapped`` such that you minimize the communication requirements of your application. diff --git a/heat_equation/src/multiple_machines.chpl b/heat_equation/src/multiple_machines.chpl index 9418082..aaccf39 100644 --- a/heat_equation/src/multiple_machines.chpl +++ b/heat_equation/src/multiple_machines.chpl @@ -8,7 +8,7 @@ config var iterations = 1000;//Stop condition in number of iterations const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); //A n by n domain that represents the interior of 'Grid' -const Interior = {1..n, 1..n}; +const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); var A, T : [Grid] real;//Zero initialized as default diff --git a/heat_equation/src/sequential.chpl b/heat_equation/src/sequential.chpl new file mode 100644 index 0000000..968d491 --- /dev/null +++ b/heat_equation/src/sequential.chpl @@ -0,0 +1,38 @@ +config const n = 8;//Size of the domain squired +config const epsilon = 1.0e-10;//Stop condition in amount of change +config var iterations = 1000;//Stop condition in number of iterations + +//A n+2 by n+2 domain. +const Grid = {0..n+1, 0..n+1}; + +//A n by n domain that represents the interior of 'Grid' +const Interior = {1..n, 1..n}; + +var A, T : [Grid] real;//Zero initialized as default + +A[..,0] = -273.15; //Left column +A[..,n+1] = -273.15; //Right column +A[n+1,..] = -273.15; //Bottom row +A[0,..] = 40.0; //Top row + +do{ + + //Since all iterations are independent, we can use 'forall', which allows + //the Chapel runtime system to calculate the iterations in parallel + for (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + //Delta is the total amount of change done in this iteration + const delta = + reduce abs(A[Interior] - T[Interior]); + + //Copy back the non-border cells + A[Interior] = T[Interior]; + + //When 'delta' is smaller than 'epsilon' the calculation has converged + iterations -= 1; +} while (delta > epsilon && iterations > 0); + + -- GitLab From 1d8e39635510e8b843cfdba5d0795433eade6c67 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 12 Apr 2016 14:11:44 +0200 Subject: [PATCH 3/6] Now using the --size argument in the Heat Equation example --- heat_equation/src/multiple_machines.chpl | 35 +++++++++++++++++++----- heat_equation/src/sequential.chpl | 30 ++++++++++++++++---- heat_equation/src/single_machine.chpl | 30 ++++++++++++++++---- 3 files changed, 78 insertions(+), 17 deletions(-) diff --git a/heat_equation/src/multiple_machines.chpl b/heat_equation/src/multiple_machines.chpl index aaccf39..a414ca9 100644 --- a/heat_equation/src/multiple_machines.chpl +++ b/heat_equation/src/multiple_machines.chpl @@ -1,8 +1,23 @@ -use BlockDist; - -config const n = 8;//Size of the domain squired +//The format of 'size' is two integers separated with a '*'. +//The first integer is the domain size squired and the second integer is +//the number of iterations. +config const size = "100*10";//Default, 100 by 100 domain and 10 iterations config const epsilon = 1.0e-10;//Stop condition in amount of change -config var iterations = 1000;//Stop condition in number of iterations + +//Parse the --size argument into 'n' and 'iterations' +use Regexp; +const arg = size.matches(compile("(\\d+)*(\\d+)")); +const n = size.substring(arg[1][1]) : int; +const iterations = size.substring(arg[2][1]) : int; + +//Initiate a Timer object +use Time; +var timer : Timer; + +//Now, let's implement the heat equation! + +//We will use the Block distribution +use BlockDist; //A n+2 by n+2 domain. const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); @@ -17,8 +32,9 @@ A[..,n+1] = -273.15; //Right column A[n+1,..] = -273.15; //Bottom row A[0,..] = 40.0; //Top row +timer.start(); +var iter_count = 0; do{ - //Since all iterations are independent, we can use 'forall', which allows //the Chapel runtime system to calculate the iterations in parallel forall (i,j) in Interior do//Iterate over all non-border cells @@ -34,7 +50,12 @@ do{ A[Interior] = T[Interior]; //When 'delta' is smaller than 'epsilon' the calculation has converged - iterations -= 1; -} while (delta > epsilon && iterations > 0); + iter_count += 1; +} while (delta > epsilon && iter_count >= iterations); + +timer.stop(); +writeln("Heat Equation (multiple machines) - n: ",n, + ", iterations: ", iterations, + ", elapsed: ", timer.elapsed(), " seconds"); diff --git a/heat_equation/src/sequential.chpl b/heat_equation/src/sequential.chpl index 968d491..776e86e 100644 --- a/heat_equation/src/sequential.chpl +++ b/heat_equation/src/sequential.chpl @@ -1,6 +1,20 @@ -config const n = 8;//Size of the domain squired +//The format of 'size' is two integers separated with a '*'. +//The first integer is the domain size squired and the second integer is +//the number of iterations. +config const size = "100*10";//Default, 100 by 100 domain and 10 iterations config const epsilon = 1.0e-10;//Stop condition in amount of change -config var iterations = 1000;//Stop condition in number of iterations + +//Parse the --size argument into 'n' and 'iterations' +use Regexp; +const arg = size.matches(compile("(\\d+)*(\\d+)")); +const n = size.substring(arg[1][1]) : int; +const iterations = size.substring(arg[2][1]) : int; + +//Initiate a Timer object +use Time; +var timer : Timer; + +//Now, let's implement the heat equation! //A n+2 by n+2 domain. const Grid = {0..n+1, 0..n+1}; @@ -15,8 +29,9 @@ A[..,n+1] = -273.15; //Right column A[n+1,..] = -273.15; //Bottom row A[0,..] = 40.0; //Top row +timer.start(); +var iter_count = 0; do{ - //Since all iterations are independent, we can use 'forall', which allows //the Chapel runtime system to calculate the iterations in parallel for (i,j) in Interior do//Iterate over all non-border cells @@ -32,7 +47,12 @@ do{ A[Interior] = T[Interior]; //When 'delta' is smaller than 'epsilon' the calculation has converged - iterations -= 1; -} while (delta > epsilon && iterations > 0); + iter_count += 1; +} while (delta > epsilon && iter_count >= iterations); + +timer.stop(); +writeln("Heat Equation (sequential) - n: ",n, + ", iterations: ", iterations, + ", elapsed: ", timer.elapsed(), " seconds"); diff --git a/heat_equation/src/single_machine.chpl b/heat_equation/src/single_machine.chpl index 9453d75..e3f147c 100644 --- a/heat_equation/src/single_machine.chpl +++ b/heat_equation/src/single_machine.chpl @@ -1,6 +1,20 @@ -config const n = 8;//Size of the domain squired +//The format of 'size' is two integers separated with a '*'. +//The first integer is the domain size squired and the second integer is +//the number of iterations. +config const size = "100*10";//Default, 100 by 100 domain and 10 iterations config const epsilon = 1.0e-10;//Stop condition in amount of change -config var iterations = 1000;//Stop condition in number of iterations + +//Parse the --size argument into 'n' and 'iterations' +use Regexp; +const arg = size.matches(compile("(\\d+)*(\\d+)")); +const n = size.substring(arg[1][1]) : int; +const iterations = size.substring(arg[2][1]) : int; + +//Initiate a Timer object +use Time; +var timer : Timer; + +//Now, let's implement the heat equation! //A n+2 by n+2 domain. const Grid = {0..n+1, 0..n+1}; @@ -15,8 +29,9 @@ A[..,n+1] = -273.15; //Right column A[n+1,..] = -273.15; //Bottom row A[0,..] = 40.0; //Top row +timer.start(); +var iter_count = 0; do{ - //Since all iterations are independent, we can use 'forall', which allows //the Chapel runtime system to calculate the iterations in parallel forall (i,j) in Interior do//Iterate over all non-border cells @@ -32,7 +47,12 @@ do{ A[Interior] = T[Interior]; //When 'delta' is smaller than 'epsilon' the calculation has converged - iterations -= 1; -} while (delta > epsilon && iterations > 0); + iter_count += 1; +} while (delta > epsilon && iter_count >= iterations); + +timer.stop(); +writeln("Heat Equation (single machine) - n: ",n, + ", iterations: ", iterations, + ", elapsed: ", timer.elapsed(), " seconds"); -- GitLab From d6c2a7c3306c00f19ddcb0c43a3078e51a30bf48 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 15 Apr 2016 14:25:10 +0200 Subject: [PATCH 4/6] Updated the chapel/heat_equation --- heat_equation/README.md | 85 ++++++++++++++++++++++++ heat_equation/README.rst | 51 -------------- heat_equation/src/multiple_machines.chpl | 28 ++++++-- heat_equation/src/sequential.chpl | 28 ++++++-- heat_equation/src/single_machine.chpl | 28 ++++++-- 5 files changed, 151 insertions(+), 69 deletions(-) create mode 100644 heat_equation/README.md delete mode 100644 heat_equation/README.rst diff --git a/heat_equation/README.md b/heat_equation/README.md new file mode 100644 index 0000000..62898aa --- /dev/null +++ b/heat_equation/README.md @@ -0,0 +1,85 @@ +Heat Equation +============= + +In this example, we solve the heat equation. The idea is to apply a 5-point stencil on a domain iteratively until equilibrium. + +Sequential +---------- + +[sequential.chpl](src/sequential.chpl) is a sequential implementation of the heat equation written in Chapel. The stencil computation is the most time consuming part of the code and look like: + +``` + for (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } +``` + +Basically, each *interior* element in `T` gets the mean of the corresponding element in `A` as well as the neighboring elements. Since `for` is a sequential language construct in Chapel, a single CPU-core will execute this code. + +Now, let's run it: + +``` + ./bin/heat_equation -nl 1 --size=5000*10 + Heat Equation (sequential) - n: 5000, iterations: 10, elapsed-time: 381.5 seconds +``` + +Multi-core +---------- + +In order to improve the performance, we can tell Chapel to use threads to execute the stencil operations in parallel ([single_machine.chpl](src/single_machine.chpl)). We do that by replacing `for` with `forall`, which tells Chapel to execute each iteration in `Interior` parallel. +It is our responsibility to make sure that each iteration in the `forall` loop is independent in order not to introduce race conditions. + +Clearly in this case iteration is clearly independent since we do not read `T`: + +``` + forall (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } +``` + +Now, let's run it (note that `CHPL_RT_NUM_THREADS_PER_LOCALE` tells Chapel the number of threads to use):: + +``` + export CHPL_RT_NUM_THREADS_PER_LOCALE=16 + ./bin/heat_equation -nl 1 --size=5000*10 + Heat Equation (single machine) - n: 5000, iterations: 10, elapsed-time: 25.7052 seconds +``` + +Multiple Machines +----------------- + +In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines (`multiple_machines.chpl `). +We still use the `forall` loop construct, be we have to tell Chapel how to distributes `A` and `T` between the multiple machines. For that, we use the `dmapped` language construct when defining the `Grid` and `Interior` domain: + +``` + //A n+2 by n+2 domain. + const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); + + //A n by n domain that represents the interior of 'Grid' + const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); + + var A, T : [Grid] real;//Zero initialized as default +``` + +We tell Chapel to use the same *block* distribution of the `Grid` and `Interior` domain such that each index in `Grid` has the same location as the corresponding index in `Interior`. Because they use the same distribution, no communication is needed when accessing the same index. For example, the operations `A[2,4] + T[2,4]` can be done locally on the machine that *owns* index `[2,4]`. However, it also means that a operations such as `A[2,4] + T[3,4]` will generally require communication. + +Now, let's run it (note that `-nl 8` tells Chapel to use eight locations): + +``` + export CHPL_RT_NUM_THREADS_PER_LOCALE=16 + ./bin/heat_equation -nl 8 --size=5000*10 + Heat Equation (multiple machines) - n: 5000, iterations: 10, elapsed-time: 5.13 seconds +``` + +It is very importation that all arrays in the calculation uses similar `dmapped` layouts. For example, if we do not use `dmapped` when defines `Interior` we get horrible performance: + +``` + export CHPL_RT_NUM_THREADS_PER_LOCALE=16 + ./bin/heat_equation -nl 8 --size=5000*10 + Heat Equation (multiple machines) - n: 5000, iterations: 10, elapsed-time: 1823.23 seconds +``` + diff --git a/heat_equation/README.rst b/heat_equation/README.rst deleted file mode 100644 index 440a205..0000000 --- a/heat_equation/README.rst +++ /dev/null @@ -1,51 +0,0 @@ -Heat Equation -============= - -In this example, we solve the heat equation. The idea is to apply a 5-point stencil on a domain iteratively until equilibrium. - -Sequential ----------- - -`sequential.chpl ` is a sequential implementation of the heat equation written in Chapel. The stencil computation is the most time consuming part of the code and look like:: - - for (i,j) in Interior do//Iterate over all non-border cells - { - //Assign each cell in 'T' the mean of its neighboring cells in 'A' - T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; - } - -Basically, each *interior* element in ``T`` gets the mean of the corresponding element in ``A`` as well as the neighboring elements. Since ``for`` is a sequential language construct in Chapel, a single CPU-core will execute this code. - - -Multi-core ----------- - -In order to improve the performance, we can tell Chapel to use threads to execute the stencil operations in parallel (`single_machine.chpl `). We do that by replacing ``for`` with ``forall``, which tells Chapel to execute each iteration in ``Interior`` parallel. -It is our responsibility to make sure that each iteration in the ``forall`` loop is independent in order not to introduce race conditions. - -Clearly in this case iteration is clearly independent since we do not read ``T``:: - - forall (i,j) in Interior do//Iterate over all non-border cells - { - //Assign each cell in 'T' the mean of its neighboring cells in 'A' - T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; - } - - -Multiple Machines ------------------ - -In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines (`multiple_machines.chpl `). -We still use the ``forall`` loop construct, be we have to tell Chapel how to distributes ``A`` and ``T`` between the multiple machines. For that, we use the ``dmapped`` language construct when defining the ``Grid`` and ``Interior`` domain:: - - //A n+2 by n+2 domain. - const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); - - //A n by n domain that represents the interior of 'Grid' - const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); - - var A, T : [Grid] real;//Zero initialized as default - -We tell Chapel to use the same *block* distribution of the ``Grid`` and ``Interior`` domain such that each index in ``Grid`` has the same location as the corresponding index in ``Interior``. Because they use the same distribution, no communication is needed when accessing the same index. For example, the operations ``A[2,4] + T[2,4]`` can be done locally on the machine that *owns* index ``[2,4]``. However, it also means that a operations such as ``A[2,4] + T[3,4]`` will generally require communication. - -In relation to HPC, it is very importation use ``dmapped`` such that you minimize the communication requirements of your application. diff --git a/heat_equation/src/multiple_machines.chpl b/heat_equation/src/multiple_machines.chpl index a414ca9..0aa625f 100644 --- a/heat_equation/src/multiple_machines.chpl +++ b/heat_equation/src/multiple_machines.chpl @@ -2,13 +2,17 @@ //The first integer is the domain size squired and the second integer is //the number of iterations. config const size = "100*10";//Default, 100 by 100 domain and 10 iterations -config const epsilon = 1.0e-10;//Stop condition in amount of change + +//Stop condition in amount of change (ignored when 'iterations' are non-zero). +config const epsilon = 1.0e-10; //Parse the --size argument into 'n' and 'iterations' use Regexp; const arg = size.matches(compile("(\\d+)*(\\d+)")); -const n = size.substring(arg[1][1]) : int; -const iterations = size.substring(arg[2][1]) : int; +const arg_n = arg[1][1]; +const arg_i = arg[2][1]; +const n = size[arg_n.offset+1..arg_n.offset+arg_n.length] : int; +const iterations = size[arg_i.offset+1..arg_i.offset+arg_i.length]: int; //Initiate a Timer object use Time; @@ -49,9 +53,21 @@ do{ //Copy back the non-border cells A[Interior] = T[Interior]; - //When 'delta' is smaller than 'epsilon' the calculation has converged - iter_count += 1; -} while (delta > epsilon && iter_count >= iterations); + //if 'iterations' is non-zero we stop after a fixed number of iterations + //otherwise we stop when the calculation has converged, i.e. 'delta' is smaller than 'epsilon'. + var stop = false; + if(iterations > 0) + { + if iter_count >= iterations then + stop = true; + } + else + { + if delta < epsilon then + stop = true; + } + +} while (!stop); timer.stop(); writeln("Heat Equation (multiple machines) - n: ",n, diff --git a/heat_equation/src/sequential.chpl b/heat_equation/src/sequential.chpl index 776e86e..5d110b6 100644 --- a/heat_equation/src/sequential.chpl +++ b/heat_equation/src/sequential.chpl @@ -2,13 +2,17 @@ //The first integer is the domain size squired and the second integer is //the number of iterations. config const size = "100*10";//Default, 100 by 100 domain and 10 iterations -config const epsilon = 1.0e-10;//Stop condition in amount of change + +//Stop condition in amount of change (ignored when 'iterations' are non-zero). +config const epsilon = 1.0e-10; //Parse the --size argument into 'n' and 'iterations' use Regexp; const arg = size.matches(compile("(\\d+)*(\\d+)")); -const n = size.substring(arg[1][1]) : int; -const iterations = size.substring(arg[2][1]) : int; +const arg_n = arg[1][1]; +const arg_i = arg[2][1]; +const n = size[arg_n.offset+1..arg_n.offset+arg_n.length] : int; +const iterations = size[arg_i.offset+1..arg_i.offset+arg_i.length]: int; //Initiate a Timer object use Time; @@ -46,9 +50,21 @@ do{ //Copy back the non-border cells A[Interior] = T[Interior]; - //When 'delta' is smaller than 'epsilon' the calculation has converged - iter_count += 1; -} while (delta > epsilon && iter_count >= iterations); + //if 'iterations' is non-zero we stop after a fixed number of iterations + //otherwise we stop when the calculation has converged, i.e. 'delta' is smaller than 'epsilon'. + var stop = false; + if(iterations > 0) + { + if iter_count >= iterations then + stop = true; + } + else + { + if delta < epsilon then + stop = true; + } + +} while (!stop); timer.stop(); writeln("Heat Equation (sequential) - n: ",n, diff --git a/heat_equation/src/single_machine.chpl b/heat_equation/src/single_machine.chpl index e3f147c..54af20b 100644 --- a/heat_equation/src/single_machine.chpl +++ b/heat_equation/src/single_machine.chpl @@ -2,13 +2,17 @@ //The first integer is the domain size squired and the second integer is //the number of iterations. config const size = "100*10";//Default, 100 by 100 domain and 10 iterations -config const epsilon = 1.0e-10;//Stop condition in amount of change + +//Stop condition in amount of change (ignored when 'iterations' are non-zero). +config const epsilon = 1.0e-10; //Parse the --size argument into 'n' and 'iterations' use Regexp; const arg = size.matches(compile("(\\d+)*(\\d+)")); -const n = size.substring(arg[1][1]) : int; -const iterations = size.substring(arg[2][1]) : int; +const arg_n = arg[1][1]; +const arg_i = arg[2][1]; +const n = size[arg_n.offset+1..arg_n.offset+arg_n.length] : int; +const iterations = size[arg_i.offset+1..arg_i.offset+arg_i.length]: int; //Initiate a Timer object use Time; @@ -46,9 +50,21 @@ do{ //Copy back the non-border cells A[Interior] = T[Interior]; - //When 'delta' is smaller than 'epsilon' the calculation has converged - iter_count += 1; -} while (delta > epsilon && iter_count >= iterations); + //if 'iterations' is non-zero we stop after a fixed number of iterations + //otherwise we stop when the calculation has converged, i.e. 'delta' is smaller than 'epsilon'. + var stop = false; + if(iterations > 0) + { + if iter_count >= iterations then + stop = true; + } + else + { + if delta < epsilon then + stop = true; + } + +} while (!stop); timer.stop(); writeln("Heat Equation (single machine) - n: ",n, -- GitLab From 3d22aaeee799c8429b3b674a22a2069154bd3b8d Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 15 Apr 2016 14:27:15 +0200 Subject: [PATCH 5/6] fixed link typo --- heat_equation/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heat_equation/README.md b/heat_equation/README.md index 62898aa..980bc3d 100644 --- a/heat_equation/README.md +++ b/heat_equation/README.md @@ -52,7 +52,7 @@ Now, let's run it (note that `CHPL_RT_NUM_THREADS_PER_LOCALE` tells Chapel the n Multiple Machines ----------------- -In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines (`multiple_machines.chpl `). +In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines ([multiple_machines.chpl](src/multiple_machines.chpl)). We still use the `forall` loop construct, be we have to tell Chapel how to distributes `A` and `T` between the multiple machines. For that, we use the `dmapped` language construct when defining the `Grid` and `Interior` domain: ``` -- GitLab From b633f9b274a79daefcebf0f22d5dc6e3a9f7409a Mon Sep 17 00:00:00 2001 From: Jussi Enkovaara Date: Thu, 21 Jun 2018 10:54:48 +0300 Subject: [PATCH 6/6] Removed the duplicate README --- README.rst | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 README.rst diff --git a/README.rst b/README.rst deleted file mode 100644 index b1f7fe1..0000000 --- a/README.rst +++ /dev/null @@ -1,9 +0,0 @@ -Chapel -====== - -Compilation instructions ------------------------- -In order to compile and run these examples, the only requirement is a working Chapel compiler and make. You can download Chapel at http://chapel.cray.com/. - -All examples can be built with simple **make**. - -- GitLab