diff --git a/README.md b/README.md deleted file mode 100644 index 084c70bdb5f31182ae9ebf3a7dba4e0d22563fce..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,14 +0,0 @@ -Chapel -====== - -Compilation instructions ------------------------- -There are no specific requirements for building examples, -just standard make, working MPI environment (for MPI examples) and -OpenMP enabled C or Fortran compiler (for OpenMP examples). - -Move to proper subfolder (C or Fortran) and modify the top of the **Makefile** -according to your environment (proper compiler commands and compiler flags). - -All examples can be built with simple **make**, **make mpi** builds the MPI -examples and **make omp** OpenMP examples. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..b1f7fe1994d6d4a59f4097bf6c94a603d0bb5826 --- /dev/null +++ b/README.rst @@ -0,0 +1,9 @@ +Chapel +====== + +Compilation instructions +------------------------ +In order to compile and run these examples, the only requirement is a working Chapel compiler and make. You can download Chapel at http://chapel.cray.com/. + +All examples can be built with simple **make**. + diff --git a/heat_equation/README.rst b/heat_equation/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..440a205f0d0cd9d121261b7a14314b8c4301d8ac --- /dev/null +++ b/heat_equation/README.rst @@ -0,0 +1,51 @@ +Heat Equation +============= + +In this example, we solve the heat equation. The idea is to apply a 5-point stencil on a domain iteratively until equilibrium. + +Sequential +---------- + +`sequential.chpl ` is a sequential implementation of the heat equation written in Chapel. The stencil computation is the most time consuming part of the code and look like:: + + for (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + +Basically, each *interior* element in ``T`` gets the mean of the corresponding element in ``A`` as well as the neighboring elements. Since ``for`` is a sequential language construct in Chapel, a single CPU-core will execute this code. + + +Multi-core +---------- + +In order to improve the performance, we can tell Chapel to use threads to execute the stencil operations in parallel (`single_machine.chpl `). We do that by replacing ``for`` with ``forall``, which tells Chapel to execute each iteration in ``Interior`` parallel. +It is our responsibility to make sure that each iteration in the ``forall`` loop is independent in order not to introduce race conditions. + +Clearly in this case iteration is clearly independent since we do not read ``T``:: + + forall (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + +Multiple Machines +----------------- + +In order to improve the performance even further, we can tell Chapel to execute the stencil operation in parallel on multiple machines (`multiple_machines.chpl `). +We still use the ``forall`` loop construct, be we have to tell Chapel how to distributes ``A`` and ``T`` between the multiple machines. For that, we use the ``dmapped`` language construct when defining the ``Grid`` and ``Interior`` domain:: + + //A n+2 by n+2 domain. + const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); + + //A n by n domain that represents the interior of 'Grid' + const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); + + var A, T : [Grid] real;//Zero initialized as default + +We tell Chapel to use the same *block* distribution of the ``Grid`` and ``Interior`` domain such that each index in ``Grid`` has the same location as the corresponding index in ``Interior``. Because they use the same distribution, no communication is needed when accessing the same index. For example, the operations ``A[2,4] + T[2,4]`` can be done locally on the machine that *owns* index ``[2,4]``. However, it also means that a operations such as ``A[2,4] + T[3,4]`` will generally require communication. + +In relation to HPC, it is very importation use ``dmapped`` such that you minimize the communication requirements of your application. diff --git a/heat_equation/src/multiple_machines.chpl b/heat_equation/src/multiple_machines.chpl index 94180825516da77ab66378852e3745a667d13cf1..aaccf3960655587bbbd719b79e217fb55dad7e7c 100644 --- a/heat_equation/src/multiple_machines.chpl +++ b/heat_equation/src/multiple_machines.chpl @@ -8,7 +8,7 @@ config var iterations = 1000;//Stop condition in number of iterations const Grid = {0..n+1, 0..n+1} dmapped Block({1..n, 1..n}); //A n by n domain that represents the interior of 'Grid' -const Interior = {1..n, 1..n}; +const Interior = {1..n, 1..n} dmapped Block({1..n, 1..n}); var A, T : [Grid] real;//Zero initialized as default diff --git a/heat_equation/src/sequential.chpl b/heat_equation/src/sequential.chpl new file mode 100644 index 0000000000000000000000000000000000000000..968d49179f4fe92eaba9094d1d4ded04e8b0b3a5 --- /dev/null +++ b/heat_equation/src/sequential.chpl @@ -0,0 +1,38 @@ +config const n = 8;//Size of the domain squired +config const epsilon = 1.0e-10;//Stop condition in amount of change +config var iterations = 1000;//Stop condition in number of iterations + +//A n+2 by n+2 domain. +const Grid = {0..n+1, 0..n+1}; + +//A n by n domain that represents the interior of 'Grid' +const Interior = {1..n, 1..n}; + +var A, T : [Grid] real;//Zero initialized as default + +A[..,0] = -273.15; //Left column +A[..,n+1] = -273.15; //Right column +A[n+1,..] = -273.15; //Bottom row +A[0,..] = 40.0; //Top row + +do{ + + //Since all iterations are independent, we can use 'forall', which allows + //the Chapel runtime system to calculate the iterations in parallel + for (i,j) in Interior do//Iterate over all non-border cells + { + //Assign each cell in 'T' the mean of its neighboring cells in 'A' + T[i,j] = (A[i,j] + A[i-1,j] + A[i+1,j] + A[i,j-1] + A[i,j+1]) / 5; + } + + //Delta is the total amount of change done in this iteration + const delta = + reduce abs(A[Interior] - T[Interior]); + + //Copy back the non-border cells + A[Interior] = T[Interior]; + + //When 'delta' is smaller than 'epsilon' the calculation has converged + iterations -= 1; +} while (delta > epsilon && iterations > 0); + +