diff --git a/README.md b/README.md
index dad80016ae15b6a1b2f1077072a7fd240dc32d86..2724e49d6d1c9993cf31034688bf78731b4ba905 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,43 @@ be built and run as:
- gcc -o exe -fopenmp exercise.c
- gfortran -o exe -fopenmp exercise.c
-where gcc/gfortran and -fopenmp should be replaced by proper compiler commands and options if you are not using the GNU compilers.
+where gcc/gfortran and -fopenmp should be replaced by proper compiler commands
+and options if you are not using the GNU compilers.
For more complex cases a Makefile is provided.
## Exercises
-## Examples
\ No newline at end of file
+ - [Hello world](hello-world) Simplest possible OpenMP program
+ (C and Fortran versions). Level: **basic**
+ - [Parallel region and data sharing](data-sharing) The basic data sharing
+ primitives (C and Fortran versions). Level: **basic**
+ - [Work sharing for a simple loop](work-sharing) Simple parallelization of
+ for/do loop (C and Fortran versions). Level: **basic**
+ - [Vector sum and race condition](race-condition) A basic example of race
+ condition and various ways for resolving it (C and Fortran versions).
+ Level: **intemediate**
+ - [Using OpenMP tasks for dynamic parallelization](tasks) Utilising OpenMP
+ task construct for more dynamic parallelization (C and Fortran versions).
+ Level: **intemediate**
+
+
+
+## Examples
+ - [Heat equation](heat-equation) A two dimensional heat equation solver which
+ is parallelized with OpenMP. (C and Fortran versions).
+ Level: **intermediate**
+
+## How to contribute
+
+Any contributions (new exercises and examples, bug fixes, improvements etc.) are
+warmly welcome. In order to contribute, please follow the standard
+Gitlab workflow:
+
+1. Fork the project into your personal space
+2. Create a feature branch
+3. Work on your contributions
+4. Push the commit(s) to your fork
+5. Submit a merge request to the master branch
+
+As a quality assurance, the merge request is reviewed by PRACE staff before it is accepted into main branch.
+
diff --git a/data-sharing/LICENSE.txt b/data-sharing/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/data-sharing/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/data-sharing/README.md b/data-sharing/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a21d1de1afe39e0b0ef059e7dc669d8a7504a06b
--- /dev/null
+++ b/data-sharing/README.md
@@ -0,0 +1,9 @@
+## Parallel region and data sharing ##
+
+Take a look at the exercise skeleton in [variables.c](variables.c) (or
+[variables.F90](variables.F90) for Fortran). Add an OpenMP parallel
+region aroung the block where the variables `Var1` and `Var2` are
+printed and manipulated. What results do you get when you define the
+variables as **shared**, **private** or **firstprivate**? Explain why
+do you get different results.
+
diff --git a/data-sharing/solution/variables.F90 b/data-sharing/solution/variables.F90
new file mode 100644
index 0000000000000000000000000000000000000000..776adc3d3321273f74410b0756899ab3b0fff6d4
--- /dev/null
+++ b/data-sharing/solution/variables.F90
@@ -0,0 +1,31 @@
+program exer1
+ implicit none
+ integer :: var1, var2
+ var1 = 1
+ var2 = 2
+
+ !$OMP PARALLEL PRIVATE(VAR1, VAR2)
+ print *, 'Region 1: var1=', var1, 'var2=', var2
+ var1 = var1 + 1
+ var2 = var2 + 1
+ !$OMP END PARALLEL
+ print *, 'After region 1: var1=', var1, 'var2=', var2
+ print *
+
+ !$OMP PARALLEL FIRSTPRIVATE(VAR1, VAR2)
+ print *, 'Region 2: var1=', var1, 'var2=', var2
+ var1 = var1 + 1
+ var2 = var2 + 1
+ !$OMP END PARALLEL
+ print *, 'After region 2: var1=', var1, 'var2=', var2
+ print *
+
+ !$OMP PARALLEL
+ print *, 'Region 3: var1=', var1, 'var2=', var2
+ var1 = var1 + 1
+ var2 = var2 + 1
+ !$OMP END PARALLEL
+ print *, 'After region 3: var1=', var1, 'var2=', var2
+ print *
+
+end program exer1
diff --git a/data-sharing/solution/variables.c b/data-sharing/solution/variables.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf23d18d243423762f27059cb9815539e90e25fa
--- /dev/null
+++ b/data-sharing/solution/variables.c
@@ -0,0 +1,33 @@
+#include
+
+int main(void)
+{
+ int var1 = 1, var2 = 2;
+
+ #pragma omp parallel private(var1, var2)
+ {
+ printf("Region 1: var1=%i, var2=%i\n", var1, var2);
+ var1++;
+ var2++;
+ }
+ printf("After region 1: var1=%i, var2=%i\n\n", var1, var2);
+
+ #pragma omp parallel firstprivate(var1, var2)
+ {
+ printf("Region 2: var1=%i, var2=%i\n", var1, var2);
+ var1++;
+ var2++;
+ }
+ printf("After region 2: var1=%i, var2=%i\n\n", var1, var2);
+
+ #pragma omp parallel /* same as omp parallel shared(var1, var2) */
+ {
+ printf("Region 3: var1=%i, var2=%i\n", var1, var2);
+ /* Note that this introduces the data race condition! */
+ var1++;
+ var2++;
+ }
+ printf("After region 3: var1=%i, var2=%i\n\n", var1, var2);
+
+ return 0;
+}
diff --git a/data-sharing/variables.F90 b/data-sharing/variables.F90
new file mode 100644
index 0000000000000000000000000000000000000000..034639f54fcf24ebb154be007e6e8dca7844bd62
--- /dev/null
+++ b/data-sharing/variables.F90
@@ -0,0 +1,16 @@
+program exer1
+ implicit none
+ integer :: var1, var2
+ var1 = 1
+ var2 = 2
+
+ ! TODO:
+ ! Test different data sharing clauses here
+ print *, 'Region 1: var1=', var1, 'var2=', var2
+ var1 = var1 + 1
+ var2 = var2 + 1
+ !end here :)
+ print *, 'After region 1: var1=', var1, 'var2=', var2
+ print *
+
+end program exer1
diff --git a/data-sharing/variables.c b/data-sharing/variables.c
new file mode 100644
index 0000000000000000000000000000000000000000..8221baaf950a9e9c91aa3a58a7f648179e95b71f
--- /dev/null
+++ b/data-sharing/variables.c
@@ -0,0 +1,18 @@
+#include
+
+int main(void)
+{
+ int var1 = 1, var2 = 2;
+
+ /* TODO:
+ * Test the effect of different data sharing clauses here
+ */
+ {
+ printf("Region 1: var1=%i, var2=%i\n", var1, var2);
+ var1++;
+ var2++;
+ }
+ printf("After region 1: var1=%i, var2=%i\n\n", var1, var2);
+
+ return 0;
+}
diff --git a/heat-equation/LICENSE.txt b/heat-equation/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/heat-equation/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/heat-equation/README.md b/heat-equation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4c6fc5c37b745187f15dea57c5654acd61262677
--- /dev/null
+++ b/heat-equation/README.md
@@ -0,0 +1,72 @@
+Two dimensional heat equation
+=============================
+This folder contains a code which solves two dimensional heat equation
+with OpenMP parallelization. A coarse grained parallelization is used, where
+the threads are launched in the start of the program and kept alive throughout
+the whole execution.
+
+Heat (or diffusion) equation is
+
+
+![img](http://quicklatex.com/cache3/d2/ql_b3f6b8bdc3a8862c73c5a97862afb9d2_l3.png)
+
+where **u(x, y, t)** is the temperature field that varies in space and time,
+and α is thermal diffusivity constant. The two dimensional Laplacian can be
+discretized with finite differences as
+
+
+![img](http://quicklatex.com/cache3/2d/ql_59f49ed64dbbe76704e0679b8ad7c22d_l3.png)
+
+Given an initial condition (u(t=0) = u0) one can follow the time dependence of
+the temperature field with explicit time evolution method:
+
+
+![img](http://quicklatex.com/cache3/9e/ql_9eb7ce5f3d5eccd6cfc1ff5638bf199e_l3.png)
+
+Note: Algorithm is stable only when
+
+
+![img](http://quicklatex.com/cache3/d1/ql_0e7107049c9183d11dbb1e81174280d1_l3.png)
+
+The two dimensional grid is decomposed along both dimensions, and the
+communication of boundary data is overlapped with computation. Restart files
+are written and read with MPI I/O.
+
+Compilation instructions
+------------------------
+For building and running the example one needs to have the
+[libpng](http://www.libpng.org/pub/png/libpng.html) library installed.
+
+Move to proper subfolder (C or Fortran) and modify the top of the **Makefile**
+according to your environment (proper compiler commands and compiler flags).
+Code can be build simple with **make**
+
+How to run
+----------
+The code can be run with arbitrary number of OpenMP threads. The default
+initial temperature field is a disk in a 2000 x 2000 grid. Initial
+temperature field can be read also from a file, the provided **bottle.dat**
+illustrates what happens to a cold soda bottle in sauna.
+
+
+ * Running with defaults: OMP_NUM_THREADS=4 ./heat_omp
+ * Initial field from a file: OMP_NUM_THREADS=4 ./heat_omp bottle.dat
+ * Initial field from a file, given number of time steps:
+ OMP_NUM_THREADS=4./heat_omp bottle.dat 1000
+ * Default pattern with given dimensions and time steps:
+ OMP_NUM_THREADS=4 ./heat_omp 800 800 1000
+
+ The program produces a series of heat_XXXX.png files which show the
+ time development of the temperature field
+
diff --git a/heat-equation/c/solution/Makefile b/heat-equation/c/solution/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..c772c962c5a4aac27df0cd8be3580b4071afddfc
--- /dev/null
+++ b/heat-equation/c/solution/Makefile
@@ -0,0 +1,46 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../common
+
+ifeq ($(COMP),cray)
+CC=cc
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+CC=gcc
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=heat_omp
+OBJS=core.o setup.o utilities.o io.o main.o
+OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+core.o: core.c heat.h
+utilities.o: utilities.c heat.h
+setup.o: setup.c heat.h
+io.o: io.c heat.h
+main.o: main.c heat.h
+
+$(OBJS_PNG): C_COMPILER := $(CC)
+$(OBJS): C_COMPILER := $(CC)
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.c
+ $(C_COMPILER) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.png *~
diff --git a/heat-equation/c/solution/core.c b/heat-equation/c/solution/core.c
new file mode 100644
index 0000000000000000000000000000000000000000..06f55947d027c6403fecdb187c55cfa67108f49c
--- /dev/null
+++ b/heat-equation/c/solution/core.c
@@ -0,0 +1,35 @@
+/* Main solver routines for heat equation solver */
+
+#include
+#include
+#include
+#include
+
+#include "heat.h"
+
+/* Update the temperature values using five-point stencil */
+void evolve(field *curr, field *prev, double a, double dt)
+{
+ int i, j;
+ double dx2, dy2;
+
+ /* Determine the temperature field at next time step
+ * As we have fixed boundary conditions, the outermost gridpoints
+ * are not updated. */
+ dx2 = prev->dx * prev->dx;
+ dy2 = prev->dy * prev->dy;
+ #pragma omp for private(i, j)
+ for (i = 1; i < curr->nx + 1; i++) {
+ for (j = 1; j < curr->ny + 1; j++) {
+ curr->data[i][j] = prev->data[i][j] + a * dt *
+ ((prev->data[i + 1][j] -
+ 2.0 * prev->data[i][j] +
+ prev->data[i - 1][j]) / dx2 +
+ (prev->data[i][j + 1] -
+ 2.0 * prev->data[i][j] +
+ prev->data[i][j - 1]) / dy2);
+ }
+ }
+}
+
+
diff --git a/heat-equation/c/solution/heat.h b/heat-equation/c/solution/heat.h
new file mode 100644
index 0000000000000000000000000000000000000000..d6a9e30d55c1d99eca083758d2fc188097ad20f7
--- /dev/null
+++ b/heat-equation/c/solution/heat.h
@@ -0,0 +1,46 @@
+#ifndef __HEAT_H__
+#define __HEAT_H__
+
+/* Datatype for temperature field */
+typedef struct {
+ /* nx and ny are the true dimensions of the field. The array data
+ * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */
+ int nx;
+ int ny;
+ double dx;
+ double dy;
+ double **data;
+} field;
+
+/* We use here fixed grid spacing */
+#define DX 0.01
+#define DY 0.01
+
+
+/* Function prototypes */
+double **malloc_2d(int nx, int ny);
+
+void free_2d(double **array);
+
+void set_field_dimensions(field *temperature, int nx, int ny);
+
+void initialize(int argc, char *argv[], field *temperature1,
+ field *temperature2, int *nsteps);
+
+void generate_field(field *temperature);
+
+void evolve(field *curr, field *prev, double a, double dt);
+
+void write_field(field *temperature, int iter);
+
+void read_field(field *temperature1, field *temperature2,
+ char *filename);
+
+void copy_field(field *temperature1, field *temperature2);
+
+void swap_fields(field *temperature1, field *temperature2);
+
+void finalize(field *temperature1, field *temperature2);
+
+#endif /* __HEAT_H__ */
+
diff --git a/heat-equation/c/solution/io.c b/heat-equation/c/solution/io.c
new file mode 100644
index 0000000000000000000000000000000000000000..2399510cab7463257e7af33cc30860fcf22966f0
--- /dev/null
+++ b/heat-equation/c/solution/io.c
@@ -0,0 +1,83 @@
+/* I/O related functions for heat equation solver */
+
+#include
+#include
+#include
+#include
+
+#include "heat.h"
+#include "pngwriter.h"
+
+/* Output routine that prints out a picture of the temperature
+ * distribution. */
+void write_field(field *temperature, int iter)
+{
+ char filename[64];
+
+ /* The actual write routine takes only the actual data
+ * (without ghost layers) so we need array for that. */
+ int height, width;
+ double **full_data;
+
+ int i;
+
+ height = temperature->nx;
+ width = temperature->ny;
+
+ /* Copy the inner data */
+ full_data = malloc_2d(height, width);
+ for (i = 0; i < temperature->nx; i++)
+ memcpy(full_data[i], &temperature->data[i + 1][1],
+ temperature->ny * sizeof(double));
+
+ sprintf(filename, "%s_%04d.png", "heat", iter);
+ save_png(full_data[0], height, width, filename, 'c');
+
+ free_2d(full_data);
+}
+
+/* Read the initial temperature distribution from a file and
+ * initialize the temperature fields temperature1 and
+ * temperature2 to the same initial state. */
+void read_field(field *temperature1, field *temperature2, char *filename)
+{
+ FILE *fp;
+ int nx, ny, i, j;
+ int count;
+
+ fp = fopen(filename, "r");
+ /* Read the header */
+ count = fscanf(fp, "# %d %d \n", &nx, &ny);
+ if (count < 2) {
+ fprintf(stderr, "Error while reading the input file!\n");
+ exit(EXIT_FAILURE);
+ }
+
+ set_field_dimensions(temperature1, nx, ny);
+ set_field_dimensions(temperature2, nx, ny);
+
+ /* Allocate arrays (including ghost layers */
+ temperature1->data = malloc_2d(nx + 2, ny + 2);
+ temperature2->data = malloc_2d(nx + 2, ny + 2);
+
+ /* Read the actual data */
+ for (i = 1; i < nx + 1; i++) {
+ for (j = 1; j < ny + 1; j++) {
+ fscanf(fp, "%lf", &temperature1->data[i][j]);
+ }
+ }
+
+ /* Set the boundary values */
+ for (i = 1; i < nx + 1; i++) {
+ temperature1->data[i][0] = temperature1->data[i][1];
+ temperature1->data[i][ny + 1] = temperature1->data[i][ny];
+ }
+ for (j = 0; j < ny + 2; j++) {
+ temperature1->data[0][j] = temperature1->data[1][j];
+ temperature1->data[nx + 1][j] = temperature1->data[nx][j];
+ }
+
+ copy_field(temperature1, temperature2);
+
+ fclose(fp);
+}
diff --git a/heat-equation/c/solution/main.c b/heat-equation/c/solution/main.c
new file mode 100644
index 0000000000000000000000000000000000000000..959a4ea72cdd1e8e3ff53ec17aa40a13951321da
--- /dev/null
+++ b/heat-equation/c/solution/main.c
@@ -0,0 +1,66 @@
+/* Heat equation solver in 2D. */
+
+#include
+#include
+#include
+#include
+
+#include "heat.h"
+
+
+int main(int argc, char **argv)
+{
+ double a = 0.5; //!< Diffusion constant
+ field current, previous; //!< Current and previous temperature fields
+
+ double dt; //!< Time step
+ int nsteps; //!< Number of time steps
+
+ int image_interval = 1000; //!< Image output interval
+
+ int iter; //!< Iteration counter
+
+ double dx2, dy2; //!< delta x and y squared
+
+ clock_t start_clock; //!< Time stamps
+
+ #pragma omp parallel private(iter)
+ {
+ initialize(argc, argv, ¤t, &previous, &nsteps);
+
+ #pragma omp single
+ {
+ /* Output the initial field */
+ write_field(¤t, 0);
+
+ /* Largest stable time step */
+ dx2 = current.dx * current.dx;
+ dy2 = current.dy * current.dy;
+ dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
+
+ /* Get the start time stamp */
+ start_clock = clock();
+ }
+
+ /* Time evolve */
+ for (iter = 1; iter < nsteps; iter++) {
+ evolve(¤t, &previous, a, dt);
+ if (iter % image_interval == 0) {
+ #pragma omp single
+ write_field(¤t, iter);
+ }
+ /* Swap current field so that it will be used
+ as previous for next iteration step */
+ #pragma omp single
+ swap_fields(¤t, &previous);
+ }
+ } /* End of parallel region */
+
+ /* Determine the CPU time used for the iteration */
+ printf("Iteration took %.3f seconds.\n", (double)(clock() - start_clock) /
+ (double)CLOCKS_PER_SEC);
+ printf("Reference value at 5,5: %f\n", previous.data[5][5]);
+
+ finalize(¤t, &previous);
+ return 0;
+}
diff --git a/heat-equation/c/solution/setup.c b/heat-equation/c/solution/setup.c
new file mode 100644
index 0000000000000000000000000000000000000000..2f3a6c6a6948e6160e0c875e9160131ccccd1775
--- /dev/null
+++ b/heat-equation/c/solution/setup.c
@@ -0,0 +1,145 @@
+/* Setup routines for heat equation solver */
+
+#include
+#include
+#include
+#include
+
+#include "heat.h"
+#include "pngwriter.h"
+
+#define NSTEPS 500 // Default number of iteration steps
+
+/* Initialize the heat equation solver */
+void initialize(int argc, char *argv[], field *current,
+ field *previous, int *nsteps)
+{
+ /*
+ * Following combinations of command line arguments are possible:
+ * No arguments: use default field dimensions and number of time steps
+ * One argument: read initial field from a given file
+ * Two arguments: initial field from file and number of time steps
+ * Three arguments: field dimensions (rows,cols) and number of time steps
+ */
+
+
+ int rows = 1024; //!< Field dimensions with default values
+ int cols = 1024;
+
+ char input_file[64]; //!< Name of the optional input file
+
+ int read_file = 0;
+
+ *nsteps = NSTEPS;
+
+ #pragma omp single
+ {
+ switch (argc) {
+ case 1:
+ /* Use default values */
+ break;
+ case 2:
+ /* Read initial field from a file */
+ strncpy(input_file, argv[1], 64);
+ read_file = 1;
+ break;
+ case 3:
+ /* Read initial field from a file */
+ strncpy(input_file, argv[1], 64);
+ read_file = 1;
+
+ /* Number of time steps */
+ *nsteps = atoi(argv[2]);
+ break;
+ case 4:
+ /* Field dimensions */
+ rows = atoi(argv[1]);
+ cols = atoi(argv[2]);
+ /* Number of time steps */
+ *nsteps = atoi(argv[3]);
+ break;
+ default:
+ printf("Unsupported number of command line arguments\n");
+ exit(-1);
+ }
+ }
+
+ if (read_file)
+ #pragma omp single
+ read_field(current, previous, input_file);
+ else {
+ #pragma omp single
+ {
+ set_field_dimensions(current, rows, cols);
+ set_field_dimensions(previous, rows, cols);
+ }
+ generate_field(current);
+ generate_field(previous);
+ }
+}
+
+/* Generate initial temperature field. Pattern is disc with a radius
+ * of nx_full / 6 in the center of the grid.
+ * Boundary conditions are (different) constant temperatures outside the grid */
+void generate_field(field *temperature)
+{
+ int i, j;
+ double radius;
+ int dx, dy;
+
+ /* Allocate the temperature array, note that
+ * we have to allocate also the ghost layers */
+ #pragma omp single
+ {
+ temperature->data =
+ malloc_2d(temperature->nx + 2, temperature->ny + 2);
+ }
+
+ /* Radius of the source disc */
+ radius = temperature->nx / 6.0;
+ #pragma omp for private(i, j, dx, dy)
+ for (i = 0; i < temperature->nx + 2; i++) {
+ for (j = 0; j < temperature->ny + 2; j++) {
+ /* Distances of point i, j from the origin */
+ dx = i - temperature->nx / 2 + 1;
+ dy = j - temperature->ny / 2 + 1;
+ if (dx * dx + dy * dy < radius * radius) {
+ temperature->data[i][j] = 5.0;
+ } else {
+ temperature->data[i][j] = 65.0;
+ }
+ }
+ }
+
+ /* Boundary conditions */
+ #pragma omp for private(i)
+ for (i = 0; i < temperature->nx + 2; i++) {
+ temperature->data[i][0] = 20.0;
+ temperature->data[i][temperature->ny + 1] = 70.0;
+ }
+
+ #pragma omp for private(j)
+ for (j = 0; j < temperature->ny + 2; j++) {
+ temperature->data[0][j] = 85.0;
+ temperature->data[temperature->nx + 1][j] = 5.0;
+ }
+
+}
+
+/* Set dimensions of the field. Note that the nx is the size of the first
+ * dimension and ny the second. */
+void set_field_dimensions(field *temperature, int nx, int ny)
+{
+ temperature->dx = DX;
+ temperature->dy = DY;
+ temperature->nx = nx;
+ temperature->ny = ny;
+}
+
+/* Deallocate the 2D arrays of temperature fields */
+void finalize(field *temperature1, field *temperature2)
+{
+ free_2d(temperature1->data);
+ free_2d(temperature2->data);
+}
+
diff --git a/heat-equation/c/solution/utilities.c b/heat-equation/c/solution/utilities.c
new file mode 100644
index 0000000000000000000000000000000000000000..da7a4ece46679fb8e35eb5809a5aa819f216739a
--- /dev/null
+++ b/heat-equation/c/solution/utilities.c
@@ -0,0 +1,51 @@
+/* Utility functions for heat equation solver
+ * NOTE: This file does not need to be edited! */
+
+#include
+#include
+#include
+#include
+
+#include "heat.h"
+
+/* Utility routine for allocating a two dimensional array */
+double **malloc_2d(int nx, int ny)
+{
+ double **array;
+ int i;
+
+ array = (double **) malloc(nx * sizeof(double *));
+ array[0] = (double *) malloc(nx * ny * sizeof(double));
+
+ for (i = 1; i < nx; i++) {
+ array[i] = array[0] + i * ny;
+ }
+
+ return array;
+}
+
+/* Utility routine for deallocating a two dimensional array */
+void free_2d(double **array)
+{
+ free(array[0]);
+ free(array);
+}
+
+
+/* Copy data on temperature1 into temperature2 */
+void copy_field(field *temperature1, field *temperature2)
+{
+ assert(temperature1->nx == temperature2->nx);
+ assert(temperature1->ny == temperature2->ny);
+ memcpy(temperature2->data[0], temperature1->data[0],
+ (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double));
+}
+
+/* Swap the data of fields temperature1 and temperature2 */
+void swap_fields(field *temperature1, field *temperature2)
+{
+ double **tmp;
+ tmp = temperature1->data;
+ temperature1->data = temperature2->data;
+ temperature2->data = tmp;
+}
diff --git a/heat-equation/fortran/solution/Makefile b/heat-equation/fortran/solution/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..313d937126b4ac30a23eab31895ab5e118d7738c
--- /dev/null
+++ b/heat-equation/fortran/solution/Makefile
@@ -0,0 +1,51 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../common
+
+ifeq ($(COMP),cray)
+FC=ftn
+CC=cc
+FCFLAGS=-O3
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+FC=gfortran
+CC=gcc
+FCFLAGS=-fopenmp -O3 -Wall
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=heat_omp
+OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
+OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+core.o: core.F90 heat_mod.o
+utilities.o: utilities.F90 heat_mod.o
+io.o: io.F90 heat_mod.o pngwriter_mod.o
+setup.o: setup.F90 heat_mod.o utilities.o io.o
+pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
+main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+ $(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
diff --git a/heat-equation/fortran/solution/core.F90 b/heat-equation/fortran/solution/core.F90
new file mode 100644
index 0000000000000000000000000000000000000000..4cdb1ea01bb57e1473a99d36cb0e7d076bd9e783
--- /dev/null
+++ b/heat-equation/fortran/solution/core.F90
@@ -0,0 +1,37 @@
+! Main solver routines for heat equation solver
+module core
+
+contains
+
+ ! Compute one time step of temperature evolution
+ ! Arguments:
+ ! curr (type(field)): current temperature values
+ ! prev (type(field)): values from previous time step
+ ! a (real(dp)): update equation constant
+ ! dt (real(dp)): time step value
+ subroutine evolve(curr, prev, a, dt)
+
+ use heat
+ implicit none
+
+ type(field), intent(inout) :: curr, prev
+ real(dp) :: a, dt
+ integer :: i, j, nx, ny
+
+ nx = curr%nx
+ ny = curr%ny
+
+ !$OMP DO
+ do j = 1, ny
+ do i = 1, nx
+ curr%data(i, j) = prev%data(i, j) + a * dt * &
+ & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
+ & prev%data(i+1, j)) / curr%dx**2 + &
+ & (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
+ & prev%data(i, j+1)) / curr%dy**2)
+ end do
+ end do
+ !$OMP END DO
+ end subroutine evolve
+
+end module core
diff --git a/heat-equation/fortran/solution/heat_mod.F90 b/heat-equation/fortran/solution/heat_mod.F90
new file mode 100644
index 0000000000000000000000000000000000000000..f34a16bbc9522e787afb2334a145188444365b71
--- /dev/null
+++ b/heat-equation/fortran/solution/heat_mod.F90
@@ -0,0 +1,35 @@
+! Field metadata for heat equation solver
+module heat
+ use iso_fortran_env, only : REAL64
+ implicit none
+
+ integer, parameter :: dp = REAL64
+ real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
+
+ type :: field
+ integer :: nx
+ integer :: ny
+ real(dp) :: dx
+ real(dp) :: dy
+ real(dp), dimension(:,:), allocatable :: data
+ end type field
+
+contains
+ ! Initialize the field type metadata
+ ! Arguments:
+ ! field0 (type(field)): input field
+ ! nx, ny, dx, dy: field dimensions and spatial step size
+ subroutine set_field_dimensions(field0, nx, ny)
+
+ implicit none
+
+ type(field), intent(out) :: field0
+ integer, intent(in) :: nx, ny
+
+ field0%dx = DX
+ field0%dy = DY
+ field0%nx = nx
+ field0%ny = ny
+ end subroutine set_field_dimensions
+
+end module heat
diff --git a/heat-equation/fortran/solution/io.F90 b/heat-equation/fortran/solution/io.F90
new file mode 100644
index 0000000000000000000000000000000000000000..8cf2471ccaf2a4c0aea21b5cc392185793c70a5e
--- /dev/null
+++ b/heat-equation/fortran/solution/io.F90
@@ -0,0 +1,75 @@
+! I/O routines for heat equation solver
+module io
+
+contains
+
+ ! Output routine, saves the temperature distribution as a png image
+ ! Arguments:
+ ! curr (type(field)): variable with the temperature data
+ ! iter (integer): index of the time step
+ subroutine write_field(curr, iter)
+ use heat
+ use pngwriter
+ implicit none
+
+ type(field), intent(in) :: curr
+ integer, intent(in) :: iter
+ character(len=85) :: filename
+
+ ! The actual write routine takes only the actual data
+ ! (without ghost layers) so we need array for that
+ integer :: full_nx, full_ny, stat
+ real(dp), dimension(:,:), allocatable, target :: full_data
+
+ full_nx = curr%nx
+ full_ny = curr%ny
+
+ allocate(full_data(full_nx, full_ny))
+ full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
+
+ write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
+ stat = save_png(full_data, full_nx, full_ny, filename)
+ deallocate(full_data)
+ end subroutine write_field
+
+
+ ! Reads the temperature distribution from an input file
+ ! Arguments:
+ ! field0 (type(field)): field variable that will store the
+ ! read data
+ ! filename (char): name of the input file
+ ! Note that this version assumes the input data to be in C memory layout
+ subroutine read_field(field0, filename)
+ use heat
+
+ implicit none
+
+ type(field), intent(out) :: field0
+ character(len=85), intent(in) :: filename
+
+ integer :: nx, ny, i
+ character(len=2) :: dummy
+
+ open(10, file=filename)
+ ! Read the header
+ read(10, *) dummy, nx, ny ! nx is the number of rows
+
+ call set_field_dimensions(field0, nx, ny)
+ ! The arrays for temperature field contain also a halo region
+ allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
+
+ ! Read the data
+ do i = 1, nx
+ read(10, *) field0%data(i, 1:ny)
+ end do
+
+ ! Set the boundary values
+ field0%data(1:nx, 0 ) = field0%data(1:nx, 1 )
+ field0%data(1:nx, ny+1) = field0%data(1:nx, ny )
+ field0%data(0, 0:ny+1) = field0%data(1, 0:ny+1)
+ field0%data( nx+1, 0:ny+1) = field0%data( nx, 0:ny+1)
+
+ close(10)
+ end subroutine read_field
+
+end module io
diff --git a/heat-equation/fortran/solution/main.F90 b/heat-equation/fortran/solution/main.F90
new file mode 100644
index 0000000000000000000000000000000000000000..21155ba5b6dde8014ab91d97dbb5502ffa51367a
--- /dev/null
+++ b/heat-equation/fortran/solution/main.F90
@@ -0,0 +1,58 @@
+! Heat equation solver in 2D.
+
+program heat_solve
+ use heat
+ use core
+ use io
+ use setup
+ use utilities
+
+ implicit none
+
+ real(dp), parameter :: a = 0.5 ! Diffusion constant
+ type(field) :: current, previous ! Current and previus temperature fields
+
+ real(dp) :: dt ! Time step
+ integer :: nsteps ! Number of time steps
+ integer, parameter :: image_interval = 10 ! Image output interval
+
+ integer :: iter
+
+ real :: start, stop ! Timers
+
+ !$OMP PARALLEL PRIVATE(iter)
+ call initialize(current, previous, nsteps)
+
+ !$OMP SINGLE
+ ! Draw the picture of the initial state
+ call write_field(current, 0)
+
+ ! Largest stable time step
+ dt = current%dx**2 * current%dy**2 / &
+ & (2.0 * a * (current%dx**2 + current%dy**2))
+
+ ! Main iteration loop, save a picture every
+ ! image_interval steps
+
+ call cpu_time(start)
+ !$OMP END SINGLE
+ do iter = 1, nsteps
+ call evolve(current, previous, a, dt)
+ !$OMP SINGLE
+ if (mod(iter, image_interval) == 0) then
+ call write_field(current, iter)
+ end if
+ call swap_fields(current, previous)
+ !$OMP END SINGLE
+ end do
+ !$OMP SINGLE
+ call cpu_time(stop)
+ !$OMP END SINGLE
+ !$OMP END PARALLEL
+
+ write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
+ write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5)
+
+ call finalize(current, previous)
+
+end program heat_solve
diff --git a/heat-equation/fortran/solution/pngwriter_mod.F90 b/heat-equation/fortran/solution/pngwriter_mod.F90
new file mode 100644
index 0000000000000000000000000000000000000000..f6fe31be12005a52f8f688e7947c2b975ade6f01
--- /dev/null
+++ b/heat-equation/fortran/solution/pngwriter_mod.F90
@@ -0,0 +1,41 @@
+! PNG writer for heat equation solver
+module pngwriter
+ use heat
+
+contains
+
+ function save_png(data, nx, ny, fname) result(stat)
+
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+
+ real(dp), dimension(:,:), intent(in) :: data
+ integer, intent(in) :: nx, ny
+ character(len=*), intent(in) :: fname
+ integer :: stat
+
+ ! Interface for save_png C-function
+ interface
+ ! The C-function definition is
+ ! int save_png(double *data, const int nx, const int ny,
+ ! const char *fname)
+ function save_png_c(data, nx, ny, fname, order) &
+ & bind(C,name="save_png") result(stat)
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+ real(kind=C_DOUBLE) :: data(*)
+ integer(kind=C_INT), value, intent(IN) :: nx, ny
+ character(kind=C_CHAR), intent(IN) :: fname(*)
+ character(kind=C_CHAR), value, intent(IN) :: order
+ integer(kind=C_INT) :: stat
+ end function save_png_c
+ end interface
+
+ stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
+ if (stat /= 0) then
+ write(*,*) 'save_png returned error!'
+ end if
+
+ end function save_png
+
+end module pngwriter
diff --git a/heat-equation/fortran/solution/setup.F90 b/heat-equation/fortran/solution/setup.F90
new file mode 100644
index 0000000000000000000000000000000000000000..e14508c00cda2bb68f43f7b992c62b5a1bc9fddf
--- /dev/null
+++ b/heat-equation/fortran/solution/setup.F90
@@ -0,0 +1,158 @@
+! Setup routines for heat equation solver
+module setup
+
+contains
+
+ subroutine initialize(previous, current, nsteps)
+
+ use heat
+ use utilities
+ use io
+
+ implicit none
+
+ type(field), intent(out) :: previous, current
+ integer, intent(out) :: nsteps
+
+ integer :: rows, cols
+ logical :: using_input_file
+ character(len=85) :: input_file, arg ! Input file name and command line arguments
+
+
+ ! Default values for grid size and time steps
+ rows = 200
+ cols = 200
+ nsteps = 500
+ using_input_file = .false.
+
+ ! Read in the command line arguments and
+ ! set up the needed variables
+ select case(command_argument_count())
+ case(0) ! No arguments -> default values
+ case(1) ! One argument -> input file name
+ using_input_file = .true.
+ call get_command_argument(1, input_file)
+ case(2) ! Two arguments -> input file name and number of steps
+ using_input_file = .true.
+ call get_command_argument(1, input_file)
+ call get_command_argument(2, arg)
+ read(arg, *) nsteps
+ case(3) ! Three arguments -> rows, cols and nsteps
+ call get_command_argument(1, arg)
+ read(arg, *) rows
+ call get_command_argument(2, arg)
+ read(arg, *) cols
+ call get_command_argument(3, arg)
+ read(arg, *) nsteps
+ case default
+ call usage()
+ stop
+ end select
+
+ ! Initialize the fields according the command line arguments
+ if (using_input_file) then
+ !$OMP SINGLE
+ call read_field(previous, input_file)
+ call copy_fields(previous, current)
+ !$OMP END SINGLE
+ else
+ !$OMP SINGLE
+ call set_field_dimensions(previous, rows, cols)
+ call set_field_dimensions(current, rows, cols)
+ !$OMP END SINGLE
+ call generate_field(previous)
+ call generate_field(current)
+ end if
+
+ end subroutine initialize
+
+ ! Generate initial the temperature field. Pattern is disc with a radius
+ ! of nx_full / 6 in the center of the grid.
+ ! Boundary conditions are (different) constant temperatures outside the grid
+ subroutine generate_field(field0)
+ use heat
+
+ implicit none
+
+ type(field), intent(inout) :: field0
+
+ real(dp) :: radius2
+ integer :: i, j, ds2
+
+ ! The arrays for field contain also a halo region
+ !$OMP SINGLE
+ allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
+ !$OMP END SINGLE
+
+ ! Square of the disk radius
+ radius2 = (field0%nx / 6.0_dp)**2
+
+ !$OMP DO
+ do j = 0, field0%ny + 1
+ do i = 0, field0%nx + 1
+ ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + &
+ & (j - field0%ny / 2.0_dp + 1)**2)
+ if (ds2 < radius2) then
+ field0%data(i,j) = 5.0_dp
+ else
+ field0%data(i,j) = 65.0_dp
+ end if
+ end do
+ end do
+ !$OMP END DO
+
+ ! Boundary conditions
+ !$OMP DO
+ do j = 0, field0%nx + 1
+ field0%data(j, 0) = 20.0_dp
+ end do
+ !$OMP END DO NOWAIT
+ !$OMP DO
+ do j = 0, field0%nx + 1
+ field0%data(j, field0%ny + 1) = 70.0_dp
+ end do
+ !$OMP END DO
+ !$OMP DO
+ do j = 0, field0%ny + 1
+ field0%data(0, j) = 85.0_dp
+ end do
+ !$OMP END DO NOWAIT
+ !$OMP DO
+ do j = 0, field0%ny+1
+ field0%data(field0%nx + 1, j) = 5.0_dp
+ end do
+ !$OMP END DO
+
+ end subroutine generate_field
+
+
+ ! Clean up routine for field type
+ ! Arguments:
+ ! field0 (type(field)): field variable to be cleared
+ subroutine finalize(field0, field1)
+ use heat
+
+ implicit none
+
+ type(field), intent(inout) :: field0, field1
+
+ deallocate(field0%data)
+ deallocate(field1%data)
+
+ end subroutine finalize
+
+ ! Helper routine that prints out a simple usage if
+ ! user gives more than three arguments
+ subroutine usage()
+ implicit none
+ character(len=256) :: buf
+
+ call get_command_argument(0, buf)
+ write (*,'(A)') 'Usage:'
+ write (*,'(A, " (default values will be used)")') trim(buf)
+ write (*,'(A, " ")') trim(buf)
+ write (*,'(A, " ")') trim(buf)
+ write (*,'(A, " ")') trim(buf)
+ end subroutine usage
+
+end module setup
diff --git a/heat-equation/fortran/solution/utilities.F90 b/heat-equation/fortran/solution/utilities.F90
new file mode 100644
index 0000000000000000000000000000000000000000..8739f8532d223063d10484d89c012ba8b5e3503d
--- /dev/null
+++ b/heat-equation/fortran/solution/utilities.F90
@@ -0,0 +1,58 @@
+! Utility routines for heat equation solver
+! NOTE: This file does not need to be edited!
+module utilities
+
+contains
+
+ ! Swap the data fields of two variables of type field
+ ! Arguments:
+ ! curr, prev (type(field)): the two variables that are swapped
+ subroutine swap_fields(curr, prev)
+ use heat
+
+ implicit none
+
+ type(field), intent(inout) :: curr, prev
+ real(dp), allocatable, dimension(:,:) :: tmp
+
+ call move_alloc(curr%data, tmp)
+ call move_alloc(prev%data, curr%data)
+ call move_alloc(tmp, prev%data)
+ end subroutine swap_fields
+
+ ! Copy the data from one field to another
+ ! Arguments:
+ ! from_field (type(field)): variable to copy from
+ ! to_field (type(field)): variable to copy to
+ subroutine copy_fields(from_field, to_field)
+
+ use heat
+ implicit none
+
+ type(field), intent(in) :: from_field
+ type(field), intent(out) :: to_field
+
+ ! Consistency checks
+ if (.not.allocated(from_field%data)) then
+ write (*,*) "Can not copy from a field without allocated data"
+ stop
+ end if
+ if (.not.allocated(to_field%data)) then
+ ! Target is not initialize, allocate memory
+ allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
+ & lbound(from_field%data, 2):ubound(from_field%data, 2)))
+ else if (any(shape(from_field%data) /= shape(to_field%data))) then
+ write (*,*) "Wrong field data sizes in copy routine"
+ print *, shape(from_field%data), shape(to_field%data)
+ stop
+ end if
+
+ to_field%data = from_field%data
+
+ to_field%nx = from_field%nx
+ to_field%ny = from_field%ny
+ to_field%dx = from_field%dx
+ to_field%dy = from_field%dy
+ end subroutine copy_fields
+
+end module utilities
diff --git a/hello-world/LICENSE.txt b/hello-world/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/hello-world/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/hello-world/README.md b/hello-world/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8acb6650ce72f3897be839804c549b7fc8af7b2e
--- /dev/null
+++ b/hello-world/README.md
@@ -0,0 +1,11 @@
+## Hello World ##
+
+In this exercise you will test that you are able to compile and run
+OpenMP application. Take a look at a simple example program in
+[hello.c](hello.c) ([hello.F90](hello.F90) for
+Fortran) that has been parallelized using OpenMP. It will first print
+out a hello message (in serial) after which each thread will print out
+an "X" character (in parallel).
+
+Compile this program so that you enable OpenMP, and run it with one,
+two and four threads. Do you get the expected amount of "X"s?
diff --git a/hello-world/hello.F90 b/hello-world/hello.F90
new file mode 100644
index 0000000000000000000000000000000000000000..f382790d1d609143e1cf61939f76604ee314d202
--- /dev/null
+++ b/hello-world/hello.F90
@@ -0,0 +1,9 @@
+program hello
+ implicit none
+
+ print *, 'Hello world!'
+ !$omp parallel
+ print *, 'X'
+ !$omp end parallel
+
+end program hello
diff --git a/hello-world/hello.c b/hello-world/hello.c
new file mode 100644
index 0000000000000000000000000000000000000000..dffe80d6d527dff3b5f6e05012e762391b384903
--- /dev/null
+++ b/hello-world/hello.c
@@ -0,0 +1,12 @@
+#include
+
+int main(int argc, char *argv[])
+{
+ printf("Hello world!\n");
+ #pragma omp parallel
+ {
+ printf("X\n");
+ }
+
+ return 0;
+}
diff --git a/hello-world/solution/hello.F90 b/hello-world/solution/hello.F90
new file mode 100644
index 0000000000000000000000000000000000000000..f382790d1d609143e1cf61939f76604ee314d202
--- /dev/null
+++ b/hello-world/solution/hello.F90
@@ -0,0 +1,9 @@
+program hello
+ implicit none
+
+ print *, 'Hello world!'
+ !$omp parallel
+ print *, 'X'
+ !$omp end parallel
+
+end program hello
diff --git a/hello-world/solution/hello.c b/hello-world/solution/hello.c
new file mode 100644
index 0000000000000000000000000000000000000000..dffe80d6d527dff3b5f6e05012e762391b384903
--- /dev/null
+++ b/hello-world/solution/hello.c
@@ -0,0 +1,12 @@
+#include
+
+int main(int argc, char *argv[])
+{
+ printf("Hello world!\n");
+ #pragma omp parallel
+ {
+ printf("X\n");
+ }
+
+ return 0;
+}
diff --git a/race-condition/LICENSE.txt b/race-condition/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/race-condition/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/race-condition/README.md b/race-condition/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..738bf61a3bd7bb94a951b6d5654e12b083dd2e42
--- /dev/null
+++ b/race-condition/README.md
@@ -0,0 +1,20 @@
+## Vector sum and race condition ##
+
+In [vectorsum.c](vectorsum.c) (or [vectorsum.F90](vectorsum.F90) for
+Fortran) you will find a serial code that sums up all the elements of
+a vector A, initialized as A=1,2,...,N. For reference, from the
+arithmetic sum formula we know that the result should be S=N(N+1)/2.
+
+Try to parallelize the code by using only `omp parallel` or `omp for`
+pragmas. Are you able to get same results with different number of
+threads and in different runs? Explain why the program does not work
+correctly in parallel.
+
+Next, try to use `reduction` clause to compute the sum correctly.
+
+Implement finally an alternative version where each thread computes its
+own part to a private variable and the use a `critical` section after
+the loop to compute the global sum. Which version (`reduction` or `critical`)
+gives better performance?
+
+
diff --git a/race-condition/solution/vectorsum.F90 b/race-condition/solution/vectorsum.F90
new file mode 100644
index 0000000000000000000000000000000000000000..63cf3bdfb8930616363b15f3dab5f70ae1f71cc0
--- /dev/null
+++ b/race-condition/solution/vectorsum.F90
@@ -0,0 +1,63 @@
+program vectorsum
+ use iso_fortran_env, only: int64
+ implicit none
+ integer, parameter :: ik = int64
+ integer(kind=ik), parameter :: nx = 102400_ik
+
+ integer(kind=ik), dimension(nx) :: vecA
+ integer(kind=ik) :: sum, psum, sumex
+ integer(kind=ik) :: i
+
+ ! Initialization of vector
+ do i = 1, nx
+ vecA(i) = i
+ end do
+
+ sumex = nx*(nx+1_ik)/2_ik
+ write(*,*) 'Arithmetic sum formula (exact): ', sumex
+
+ sum = 0
+ ! Sum with data race
+ !$omp parallel do default(shared) private(i)
+ do i = 1, nx
+ sum = sum + vecA(i)
+ end do
+ !$omp end parallel do
+ write(*,*) 'Sum with data race: ', sum
+
+ sum = 0
+ ! Dot product using critical section = SERIAL CODE
+ !$omp parallel do default(shared) private(i)
+ do i = 1, nx
+ !$omp critical
+ sum = sum + vecA(i)
+ !$omp end critical
+ end do
+ !$omp end parallel do
+ write(*,*) 'Sum using critical section: ', sum
+
+ sum = 0
+ ! Dot product using reduction
+ !$omp parallel do default(shared) private(i) reduction(+:sum)
+ do i = 1, nx
+ sum = sum + vecA(i)
+ end do
+ !$omp end parallel do
+ write(*,*) 'Sum using reduction: ', sum
+
+ sum = 0
+ ! Dot product using private variable and critical secion
+ !$omp parallel default(shared) private(i, psum)
+ psum = 0
+ !$omp do
+ do i = 1, nx
+ psum = psum + vecA(i)
+ end do
+ !$omp end do
+ !$omp critical
+ sum = sum + psum
+ !$omp end critical
+ !$omp end parallel
+ write(*,*) 'Sum using private variable and critical section: ', sum
+
+end program vectorsum
diff --git a/race-condition/solution/vectorsum.c b/race-condition/solution/vectorsum.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad8afc3a664457e9250f539d25b071ae2e3475e7
--- /dev/null
+++ b/race-condition/solution/vectorsum.c
@@ -0,0 +1,64 @@
+#include
+
+#define NX 102400
+
+int main(void)
+{
+ long vecA[NX];
+ long sum, psum, sumex;
+ int i;
+
+ /* Initialization of the vectors */
+ for (i = 0; i < NX; i++) {
+ vecA[i] = (long) i + 1;
+ }
+
+ sumex = (long) NX * (NX + 1) / ((long) 2);
+ printf("Arithmetic sum formula (exact): %ld\n",
+ sumex);
+
+ sum = 0.0;
+ /* Version with data race */
+ #pragma omp parallel for default(shared) private(i)
+ for (i = 0; i < NX; i++) {
+ sum += vecA[i];
+ }
+ printf("Sum with data race: %ld\n",
+ sum);
+
+ sum = 0.0;
+ /* Dot product using critical section = SERIAL CODE! */
+ #pragma omp parallel for default(shared) private(i)
+ for (i = 0; i < NX; i++) {
+ #pragma omp critical(dummy)
+ sum += vecA[i];
+ }
+ printf("Sum using critical section: %ld\n",
+ sum);
+
+ sum = 0.0;
+ /* Dot product using private partial sums and critical section */
+ #pragma omp parallel default(shared) private(i, psum)
+ {
+ psum = 0.0;
+ #pragma omp for
+ for (i = 0; i < NX; i++) {
+ psum += vecA[i];
+ }
+ #pragma omp critical(par_sum)
+ sum += psum;
+ }
+ printf("Sum using private variable and critical section: %ld\n",
+ sum);
+
+ sum = 0.0;
+ /* Dot product using reduction */
+ #pragma omp parallel for default(shared) private(i) reduction(+:sum)
+ for (i = 0; i < NX; i++) {
+ sum += vecA[i];
+ }
+ printf("Reduction sum: %ld\n",
+ sum);
+
+ return 0;
+}
diff --git a/race-condition/vectorsum.F90 b/race-condition/vectorsum.F90
new file mode 100644
index 0000000000000000000000000000000000000000..6847733a5d9ad86e46fd31a30e7c6738a1b4cc69
--- /dev/null
+++ b/race-condition/vectorsum.F90
@@ -0,0 +1,25 @@
+program vectorsum
+ use iso_fortran_env, only: int64
+ implicit none
+ integer, parameter :: ik = int64
+ integer(kind=ik), parameter :: nx = 102400_ik
+
+ integer(kind=ik), dimension(nx) :: vecA
+ integer(kind=ik) :: sum, psum, sumex
+ integer(kind=ik) :: i
+
+ ! Initialization of vector
+ do i = 1, nx
+ vecA(i) = i
+ end do
+
+ sumex = nx*(nx+1_ik)/2_ik
+ write(*,*) 'Arithmetic sum formula (exact): ', sumex
+
+ sum = 0
+ ! TODO: Parallelize the computation
+ do i = 1, nx
+ sum = sum + vecA(i)
+ end do
+ write(*,*) 'Sum: ', sum
+end program vectorsum
diff --git a/race-condition/vectorsum.c b/race-condition/vectorsum.c
new file mode 100644
index 0000000000000000000000000000000000000000..a7badfe086ce157b5a3c83d85d2af92bf0ee36e0
--- /dev/null
+++ b/race-condition/vectorsum.c
@@ -0,0 +1,28 @@
+#include
+
+#define NX 102400
+
+int main(void)
+{
+ long vecA[NX];
+ long sum, psum, sumex;
+ int i;
+
+ /* Initialization of the vectors */
+ for (i = 0; i < NX; i++) {
+ vecA[i] = (long) i + 1;
+ }
+
+ sumex = (long) NX * (NX + 1) / ((long) 2);
+ printf("Arithmetic sum formula (exact): %ld\n",
+ sumex);
+
+ sum = 0.0;
+ /* TODO: Parallelize computation */
+ for (i = 0; i < NX; i++) {
+ sum += vecA[i];
+ }
+ printf("Sum: %ld\n", sum);
+
+ return 0;
+}
diff --git a/tasks/LICENSE.txt b/tasks/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/tasks/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/tasks/README.md b/tasks/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2cfdd4f1526858fe7223553485264c6589a6a3f3
--- /dev/null
+++ b/tasks/README.md
@@ -0,0 +1,14 @@
+## Bonus: Using OpenMP tasks for dynamic parallelization ##
+
+Mandelbrot set of complex numbers can be presented as two dimensional
+fractal image. The computation of the pixel values is relatively time
+consuming, and the computational cost varies for each pixel. Thus,
+simply dividing the two dimensional grid evenly to threads leads into
+load imbalance and suboptimal performance.
+
+Files [tasks/c/mandelbrot.c](c/mandelbrot.c) and
+[tasks/c/mandelbrot.F90](fortran/mandelbrot.F90) contain recursive
+implementation for calculating the Mandelbrot fractal, which can be
+parallelized dynamically with OpenMP tasks. Insert missing pragmas for
+parallelizing the code (look for TODOs in the source code), and
+investigate the scalability with varying number of threads.
diff --git a/tasks/c/Makefile b/tasks/c/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..abce73f59e7ae8762e051e2f33129328fdbad6f2
--- /dev/null
+++ b/tasks/c/Makefile
@@ -0,0 +1,49 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../heat/common
+
+ifeq ($(COMP),cray)
+FC=ftn
+CC=cc
+FCFLAGS=-O3
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+FC=gfortran
+CC=gcc
+FCFLAGS=-fopenmp -O3 -Wall
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=mandelbrot
+OBJS=mandelbrot.o
+OBJS_PNG=pngwriter.o
+CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+mandelbrot.o: mandelbrot.c
+
+pngwriter.o: pngwriter.c pngwriter.h
+ $(CC) $(CCFLAGS) -c -o $@ $<
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+ $(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png
diff --git a/tasks/c/mandelbrot.c b/tasks/c/mandelbrot.c
new file mode 100644
index 0000000000000000000000000000000000000000..fbf154f884f49812ad40470864ca6145a00bdffb
--- /dev/null
+++ b/tasks/c/mandelbrot.c
@@ -0,0 +1,132 @@
+/*
+ * This example is based on the code of Andrew V. Adinetz
+ * https://github.com/canonizer/mandelbrot-dyn
+ * Licensed under The MIT License
+ */
+
+#include
+#include
+#include
+#include
+
+#include "pngwriter.h"
+
+// Maximum number of iterations
+const int MAX_ITER_COUNT = 512;
+// Marker for different iteration counts
+const int DIFF_ITER_COUNT = -1;
+// Maximum recursion depth
+const int MAX_DEPTH = 6;
+// Region size below which do per-pixel
+const int MIN_SIZE = 32;
+// Subdivision factor along each axis
+const int SUBDIV = 4;
+
+
+// |z|^2 of a complex number z
+float abs2(complex v)
+{
+ return creal(v) * creal(v) + cimag(v) * cimag(v);
+}
+
+// The kernel to count per-pixel values of the portion of the Mandelbrot set
+// Does not need to be edited
+int kernel(int w, int h, complex cmin, complex cmax,
+ int x, int y)
+{
+ complex dc = cmax - cmin;
+ float fx = (float)x / w;
+ float fy = (float)y / h;
+ complex c = cmin + fx * creal(dc) + fy * cimag(dc) * I;
+ int iteration = 0;
+ complex z = c;
+ while (iteration < MAX_ITER_COUNT && abs2(z) < 2 * 2) {
+ z = z * z + c;
+ iteration++;
+ }
+ return iteration;
+}
+
+
+/* Computes the Mandelbrot image recursively
+ * At each call, the image is divided into smaller blocks (by a factor of
+ * subdiv), and the function is called recursively with arguments corresponding
+ * to subblock. When maximum recursion depth is reached or size of block
+ * is smaller than predefined minimum, one starts to calculate actual pixel
+ * values
+ *
+ * - - - - - - - - ----- -----
+ * | | | | | |
+ * | | ----- -----
+ * | | --> --> ...
+ * | | ----- -----
+ * | | | | | |
+ * | | ----- -----
+ * ---------------
+ */
+void mandelbrot_block(int *iter_counts, int w, int h, complex cmin,
+ complex cmax, int x0, int y0, int d, int depth)
+{
+
+// TODO Parallelize the recursive function call
+// with OpenMP tasks
+
+ int block_size = d / SUBDIV;
+ if (depth + 1 < MAX_DEPTH && block_size > MIN_SIZE) {
+ // Subdivide recursively
+ for (int i = 0; i < SUBDIV; i++) {
+ for (int j = 0; j < SUBDIV; j++) {
+ mandelbrot_block(iter_counts, w, h, cmin, cmax,
+ x0 + i * block_size, y0 + j * block_size,
+ d / SUBDIV, depth + 1);
+ }
+ }
+ } else {
+ // Last recursion level reached, calculate the values
+ for (int i = x0; i < x0 + d; i++) {
+ for (int j = y0; j < y0 + d; j++) {
+ iter_counts[j * w + i] = kernel(w, h, cmin, cmax, i, j);
+ }
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ // Picture size, should be power of two
+ const int w = 512;
+ const int h = w;
+ int *iter_counts;
+
+ complex cmin, cmax;
+
+ int pic_bytes = w * h * sizeof(int);
+ iter_counts = (int *)malloc(pic_bytes);
+
+ cmin = -1.5 + -1.0 * I;
+ cmax = 0.5 + 1.0 * I;
+
+ double t1 = omp_get_wtime();
+
+// TODO create parallel region. How many threads should be calling
+// mandelbrot_block in this uppermost level?
+
+ {
+ mandelbrot_block(iter_counts, w, h, cmin, cmax,
+ 0, 0, w, 1);
+ }
+ double t2 = omp_get_wtime();
+
+ // Save the image to a PNG file
+ save_png(iter_counts, w, h, "mandelbrot.png");
+
+ double walltime = t2 - t1;
+ // Print the timings
+ printf("Mandelbrot set computed in %.3lf s, at %.3lf Mpix/s\n",
+ walltime, h * w * 1e-6 / walltime);
+
+ free(iter_counts);
+ return 0;
+}
+
diff --git a/tasks/c/pngwriter.c b/tasks/c/pngwriter.c
new file mode 100644
index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1
--- /dev/null
+++ b/tasks/c/pngwriter.c
@@ -0,0 +1,202 @@
+#include
+#include
+#include
+#include
+#include
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+} pixel_t;
+
+void cmap(const int value, const int max_val, pixel_t *pix);
+
+/* Heat colormap from black to white */
+// *INDENT-OFF*
+static int heat_colormap[256][3] = {
+ { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0},
+ { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0},
+ { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0},
+ { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0},
+ { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0},
+ { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0},
+ {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0},
+ {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0},
+ {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0},
+ {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0},
+ {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0},
+ {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0},
+ {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0},
+ {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0},
+ {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0},
+ {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0},
+ {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0},
+ {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0},
+ {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0},
+ {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0},
+ {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0},
+ {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0},
+ {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0},
+ {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0},
+ {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0},
+ {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0},
+ {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0},
+ {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0},
+ {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0},
+ {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0},
+ {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0},
+ {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0},
+ {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0},
+ {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0},
+ {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0},
+ {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0},
+ {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0},
+ {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0},
+ {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0},
+ {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0},
+ {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0},
+ {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0},
+ {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0},
+ {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0},
+ {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0},
+ {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0},
+ {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0},
+ {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0},
+ {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0},
+ {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0},
+ {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14},
+ {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29},
+ {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51},
+ {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72},
+ {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98},
+ {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123},
+ {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156},
+ {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182},
+ {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204},
+ {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222},
+ {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236},
+ {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255}
+};
+// *INDENT-ON*
+/* Save the two dimensional array as a png image
+ * Arguments:
+ * double *data is a pointer to an array of nx*ny values
+ * int nx is the number of COLUMNS to be written
+ * int ny is the number of ROWS to be written
+ * char *fname is the name of the picture
+ * char lang is either 'c' or 'f' denoting the memory
+ * layout. That is, if 'f' is given, then rows
+ * and columns are swapped.
+ */
+int save_png(int *data, const int height, const int width, const char *fname)
+{
+ FILE *fp;
+ png_structp pngstruct_ptr = NULL;
+ png_infop pnginfo_ptr = NULL;
+ png_byte **row_pointers = NULL;
+ int i, j, max_value;
+
+ /* Determine the maximum value */
+ max_value = INT_MIN;
+ for (i = 0; i < height * width; i++) {
+ max_value = data[i] > max_value ? data[i] : max_value;
+ }
+
+ /* Default return status is failure */
+ int status = -1;
+
+ int pixel_size = 3;
+ int depth = 8;
+
+ fp = fopen(fname, "wb");
+ if (fp == NULL) {
+ goto fopen_failed;
+ }
+
+ pngstruct_ptr =
+ png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (pngstruct_ptr == NULL) {
+ goto pngstruct_create_failed;
+ }
+
+ pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+ if (pnginfo_ptr == NULL) {
+ goto pnginfo_create_failed;
+ }
+
+ if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+ goto setjmp_failed;
+ }
+
+ png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+ (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+ row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+ for (i = 0; i < height; i++) {
+ png_byte *row = png_malloc(pngstruct_ptr,
+ sizeof(uint8_t) * width * pixel_size);
+ row_pointers[i] = row;
+
+ // Branch according to the memory layout
+ for (j = 0; j < width; j++) {
+ pixel_t pixel;
+ // Scale the values so that values between
+ // 0 and 100 degrees are mapped to values
+ // between 0 and 255
+ cmap(data[j + i * width], max_value, &pixel);
+ *row++ = pixel.red;
+ *row++ = pixel.green;
+ *row++ = pixel.blue;
+ }
+ }
+
+ png_init_io(pngstruct_ptr, fp);
+ png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+ png_write_png(pngstruct_ptr, pnginfo_ptr,
+ PNG_TRANSFORM_IDENTITY, NULL);
+
+ status = 0;
+
+ for (i = 0; i < height; i++) {
+ png_free(pngstruct_ptr, row_pointers[i]);
+ }
+ png_free(pngstruct_ptr, row_pointers);
+
+setjmp_failed:
+pnginfo_create_failed:
+ png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+ fclose(fp);
+fopen_failed:
+ return status;
+}
+
+/* This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0,255 blue or red color is used instead. */
+void cmap(const int value, const int max_value, pixel_t *pix)
+{
+ int ival;
+
+ ival = ((double) value / (double) max_value) * 255;
+ /* Check for wrong values */
+ if (ival > 255) {
+ ival = 255;
+ }
+ if (ival < 0) {
+ ival = 0;
+ }
+
+ /* Pick color from heat_colormap */
+ pix->red = heat_colormap[ival][0];
+ pix->green = heat_colormap[ival][1];
+ pix->blue = heat_colormap[ival][2];
+}
diff --git a/tasks/c/pngwriter.h b/tasks/c/pngwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa
--- /dev/null
+++ b/tasks/c/pngwriter.h
@@ -0,0 +1,6 @@
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(int *data, const int nx, const int ny, const char *fname);
+
+#endif
diff --git a/tasks/c/solution/Makefile b/tasks/c/solution/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..abce73f59e7ae8762e051e2f33129328fdbad6f2
--- /dev/null
+++ b/tasks/c/solution/Makefile
@@ -0,0 +1,49 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../heat/common
+
+ifeq ($(COMP),cray)
+FC=ftn
+CC=cc
+FCFLAGS=-O3
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+FC=gfortran
+CC=gcc
+FCFLAGS=-fopenmp -O3 -Wall
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=mandelbrot
+OBJS=mandelbrot.o
+OBJS_PNG=pngwriter.o
+CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+mandelbrot.o: mandelbrot.c
+
+pngwriter.o: pngwriter.c pngwriter.h
+ $(CC) $(CCFLAGS) -c -o $@ $<
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+ $(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png
diff --git a/tasks/c/solution/mandelbrot.c b/tasks/c/solution/mandelbrot.c
new file mode 100644
index 0000000000000000000000000000000000000000..a778b31da0b3e0e22a3808d11dcf77a70080184d
--- /dev/null
+++ b/tasks/c/solution/mandelbrot.c
@@ -0,0 +1,134 @@
+/*
+ * This example is based on the code of Andrew V. Adinetz
+ * https://github.com/canonizer/mandelbrot-dyn
+ * Licensed under The MIT License
+ */
+
+#include
+#include
+#include
+#include
+
+#include "pngwriter.h"
+
+// Maximum number of iterations
+const int MAX_ITER_COUNT = 512;
+// Marker for different iteration counts
+const int DIFF_ITER_COUNT = -1;
+// Maximum recursion depth
+const int MAX_DEPTH = 6;
+// Region size below which do per-pixel
+const int MIN_SIZE = 32;
+// Subdivision factor along each axis
+const int SUBDIV = 4;
+
+
+// |z|^2 of a complex number z
+float abs2(complex v)
+{
+ return creal(v) * creal(v) + cimag(v) * cimag(v);
+}
+
+// The kernel to count per-pixel values of the portion of the Mandelbrot set
+// Does not need to be edited
+int kernel(int w, int h, complex cmin, complex cmax,
+ int x, int y)
+{
+ complex dc = cmax - cmin;
+ float fx = (float)x / w;
+ float fy = (float)y / h;
+ complex c = cmin + fx * creal(dc) + fy * cimag(dc) * I;
+ int iteration = 0;
+ complex z = c;
+ while (iteration < MAX_ITER_COUNT && abs2(z) < 2 * 2) {
+ z = z * z + c;
+ iteration++;
+ }
+ return iteration;
+}
+
+
+/* Computes the Mandelbrot image recursively
+ * At each call, the image is divided into smaller blocks (by a factor of
+ * subdiv), and the function is called recursively with arguments corresponding
+ * to subblock. When maximum recursion depth is reached or size of block
+ * is smaller than predefined minimum, one starts to calculate actual pixel
+ * values
+ *
+ * - - - - - - - - ----- -----
+ * | | | | | |
+ * | | ----- -----
+ * | | --> --> ...
+ * | | ----- -----
+ * | | | | | |
+ * | | ----- -----
+ * ---------------
+ */
+void mandelbrot_block(int *iter_counts, int w, int h, complex cmin,
+ complex cmax, int x0, int y0, int d, int depth)
+{
+
+// TODO Parallelize the recursive function call
+// with OpenMP tasks
+
+ int block_size = d / SUBDIV;
+ if (depth + 1 < MAX_DEPTH && block_size > MIN_SIZE) {
+ // Subdivide recursively
+ for (int i = 0; i < SUBDIV; i++) {
+ for (int j = 0; j < SUBDIV; j++) {
+ #pragma omp task
+ mandelbrot_block(iter_counts, w, h, cmin, cmax,
+ x0 + i * block_size, y0 + j * block_size,
+ d / SUBDIV, depth + 1);
+ }
+ }
+ } else {
+ // Last recursion level reached, calculate the values
+ for (int i = x0; i < x0 + d; i++) {
+ for (int j = y0; j < y0 + d; j++) {
+ iter_counts[j * w + i] = kernel(w, h, cmin, cmax, i, j);
+ }
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ // Picture size, should be power of two
+ const int w = 512;
+ const int h = w;
+ int *iter_counts;
+
+ complex cmin, cmax;
+
+ int pic_bytes = w * h * sizeof(int);
+ iter_counts = (int *)malloc(pic_bytes);
+
+ cmin = -1.5 + -1.0 * I;
+ cmax = 0.5 + 1.0 * I;
+
+ double t1 = omp_get_wtime();
+
+// TODO create parallel region. How many threads should be calling
+// mandelbrot_block in this uppermost level?
+ #pragma omp parallel
+ #pragma omp single
+ {
+ mandelbrot_block(iter_counts, w, h, cmin, cmax,
+ 0, 0, w, 1);
+ }
+ double t2 = omp_get_wtime();
+
+ // Save the image to a PNG file
+ save_png(iter_counts, w, h, "mandelbrot.png");
+
+ double walltime = t2 - t1;
+ // Print the timings
+ printf("Mandelbrot set computed in %.3lf s, at %.3lf Mpix/s\n",
+ walltime, h * w * 1e-6 / walltime);
+
+ free(iter_counts);
+ return 0;
+}
+
diff --git a/tasks/c/solution/pngwriter.c b/tasks/c/solution/pngwriter.c
new file mode 100644
index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1
--- /dev/null
+++ b/tasks/c/solution/pngwriter.c
@@ -0,0 +1,202 @@
+#include
+#include
+#include
+#include
+#include
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+} pixel_t;
+
+void cmap(const int value, const int max_val, pixel_t *pix);
+
+/* Heat colormap from black to white */
+// *INDENT-OFF*
+static int heat_colormap[256][3] = {
+ { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0},
+ { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0},
+ { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0},
+ { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0},
+ { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0},
+ { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0},
+ {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0},
+ {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0},
+ {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0},
+ {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0},
+ {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0},
+ {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0},
+ {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0},
+ {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0},
+ {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0},
+ {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0},
+ {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0},
+ {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0},
+ {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0},
+ {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0},
+ {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0},
+ {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0},
+ {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0},
+ {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0},
+ {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0},
+ {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0},
+ {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0},
+ {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0},
+ {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0},
+ {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0},
+ {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0},
+ {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0},
+ {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0},
+ {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0},
+ {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0},
+ {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0},
+ {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0},
+ {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0},
+ {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0},
+ {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0},
+ {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0},
+ {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0},
+ {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0},
+ {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0},
+ {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0},
+ {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0},
+ {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0},
+ {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0},
+ {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0},
+ {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0},
+ {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14},
+ {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29},
+ {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51},
+ {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72},
+ {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98},
+ {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123},
+ {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156},
+ {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182},
+ {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204},
+ {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222},
+ {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236},
+ {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255}
+};
+// *INDENT-ON*
+/* Save the two dimensional array as a png image
+ * Arguments:
+ * double *data is a pointer to an array of nx*ny values
+ * int nx is the number of COLUMNS to be written
+ * int ny is the number of ROWS to be written
+ * char *fname is the name of the picture
+ * char lang is either 'c' or 'f' denoting the memory
+ * layout. That is, if 'f' is given, then rows
+ * and columns are swapped.
+ */
+int save_png(int *data, const int height, const int width, const char *fname)
+{
+ FILE *fp;
+ png_structp pngstruct_ptr = NULL;
+ png_infop pnginfo_ptr = NULL;
+ png_byte **row_pointers = NULL;
+ int i, j, max_value;
+
+ /* Determine the maximum value */
+ max_value = INT_MIN;
+ for (i = 0; i < height * width; i++) {
+ max_value = data[i] > max_value ? data[i] : max_value;
+ }
+
+ /* Default return status is failure */
+ int status = -1;
+
+ int pixel_size = 3;
+ int depth = 8;
+
+ fp = fopen(fname, "wb");
+ if (fp == NULL) {
+ goto fopen_failed;
+ }
+
+ pngstruct_ptr =
+ png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (pngstruct_ptr == NULL) {
+ goto pngstruct_create_failed;
+ }
+
+ pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+ if (pnginfo_ptr == NULL) {
+ goto pnginfo_create_failed;
+ }
+
+ if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+ goto setjmp_failed;
+ }
+
+ png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+ (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+ row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+ for (i = 0; i < height; i++) {
+ png_byte *row = png_malloc(pngstruct_ptr,
+ sizeof(uint8_t) * width * pixel_size);
+ row_pointers[i] = row;
+
+ // Branch according to the memory layout
+ for (j = 0; j < width; j++) {
+ pixel_t pixel;
+ // Scale the values so that values between
+ // 0 and 100 degrees are mapped to values
+ // between 0 and 255
+ cmap(data[j + i * width], max_value, &pixel);
+ *row++ = pixel.red;
+ *row++ = pixel.green;
+ *row++ = pixel.blue;
+ }
+ }
+
+ png_init_io(pngstruct_ptr, fp);
+ png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+ png_write_png(pngstruct_ptr, pnginfo_ptr,
+ PNG_TRANSFORM_IDENTITY, NULL);
+
+ status = 0;
+
+ for (i = 0; i < height; i++) {
+ png_free(pngstruct_ptr, row_pointers[i]);
+ }
+ png_free(pngstruct_ptr, row_pointers);
+
+setjmp_failed:
+pnginfo_create_failed:
+ png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+ fclose(fp);
+fopen_failed:
+ return status;
+}
+
+/* This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0,255 blue or red color is used instead. */
+void cmap(const int value, const int max_value, pixel_t *pix)
+{
+ int ival;
+
+ ival = ((double) value / (double) max_value) * 255;
+ /* Check for wrong values */
+ if (ival > 255) {
+ ival = 255;
+ }
+ if (ival < 0) {
+ ival = 0;
+ }
+
+ /* Pick color from heat_colormap */
+ pix->red = heat_colormap[ival][0];
+ pix->green = heat_colormap[ival][1];
+ pix->blue = heat_colormap[ival][2];
+}
diff --git a/tasks/c/solution/pngwriter.h b/tasks/c/solution/pngwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa
--- /dev/null
+++ b/tasks/c/solution/pngwriter.h
@@ -0,0 +1,6 @@
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(int *data, const int nx, const int ny, const char *fname);
+
+#endif
diff --git a/tasks/fortran/Makefile b/tasks/fortran/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..323942db80f4581b003b06b2500f3a3adfe9e084
--- /dev/null
+++ b/tasks/fortran/Makefile
@@ -0,0 +1,50 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../heat/common
+
+ifeq ($(COMP),cray)
+FC=ftn
+CC=cc
+FCFLAGS=-O3
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+FC=gfortran
+CC=gcc
+FCFLAGS=-fopenmp -O3 -Wall
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=mandelbrot
+OBJS=mandelbrot.o pngwriter_mod.o
+OBJS_PNG=pngwriter.o
+CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+pngwriter_mod.o: pngwriter_mod.F90
+mandelbrot.o: mandelbrot.F90 pngwriter_mod.o
+
+pngwriter.o: pngwriter.c pngwriter.h
+ $(CC) $(CCFLAGS) -c -o $@ $<
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+ $(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png
diff --git a/tasks/fortran/mandelbrot.F90 b/tasks/fortran/mandelbrot.F90
new file mode 100644
index 0000000000000000000000000000000000000000..e624ca46375cfbbbe4e6f6112c92cd60b2fc3094
--- /dev/null
+++ b/tasks/fortran/mandelbrot.F90
@@ -0,0 +1,129 @@
+program mandelbrot
+
+ use iso_fortran_env, only : REAL64
+ use pngwriter
+ use omp_lib
+
+ implicit none
+
+ integer, parameter :: dp = REAL64
+
+ integer, parameter :: max_iter_count = 512
+ integer, parameter :: max_depth = 6
+ integer, parameter :: min_size = 32
+ integer, parameter :: subdiv = 4
+
+ integer, parameter :: w = 2048
+ integer, parameter :: h = w
+
+ integer, pointer, dimension(:,:) :: iter_counts
+
+ integer :: stat
+
+ complex(dp) :: cmin, cmax
+ real(dp) :: t0, t1
+
+ allocate(iter_counts(w, h))
+
+
+ t0 = omp_get_wtime()
+
+ cmin = (-1.5, -1.0)
+ cmax = (0.5, 1.0)
+
+ ! TODO create parallel region. How many threads should be calling
+ ! mandelbrot_block in this uppermost level?
+
+ call mandelbrot_block(iter_counts, w, h, cmin, cmax, 0, 0, w, 1)
+
+ t1 = omp_get_wtime()
+
+ stat = save_png(iter_counts, h, w, 'mandelbrot.png')
+
+ deallocate(iter_counts)
+
+ write(*,*) 'Mandelbrot set computed in', t1 - t0, 's'
+
+contains
+
+ ! Computes the Mandelbrot image recursively
+ ! At each call, the image is divided into smaller blocks (by a factor of
+ ! subdiv), and the function is called recursively with arguments corresponding
+ ! to subblock. When maximum recursion depth is reached or size of block
+ ! is smaller than predefined minimum, one starts to calculate actual pixel
+ ! values
+ !
+ ! - - - - - - - - ----- -----
+ ! | | | | | |
+ ! | | ----- -----
+ ! | | --> --> ...
+ ! | | ----- -----
+ ! | | | | | |
+ ! | | ----- -----
+ ! ---------------
+ !
+
+ recursive subroutine mandelbrot_block(iter_counts, w, h, cmin, cmax, &
+ x0, y0, d, depth)
+ implicit none
+
+ integer, pointer, dimension(:,:), intent(inout) :: iter_counts
+ integer, intent(in) :: w, h, x0, y0, d, depth
+ complex(dp), intent(in) :: cmin, cmax
+
+ integer :: block_size, i, j
+
+ ! TODO Parallelize the recursive function call
+ ! with OpenMP tasks
+
+ block_size = d / subdiv
+ if ((depth + 1 < max_depth) .and. (block_size > min_size)) then
+ ! Subdivide recursively
+ do i=0, subdiv - 1
+ do j=0, subdiv - 1
+ call mandelbrot_block(iter_counts, w, h, cmin, cmax, &
+ x0 + i*block_size, y0 + j*block_size, &
+ block_size, depth + 1)
+ end do
+ end do
+ else
+ ! Last recursion level reached, calculate the values
+ do j = y0 + 1, y0 + d
+ do i = x0 + 1, x0 + d
+ iter_counts(i, j) = kernel(h, w, cmin, cmax, i-1, j-1)
+ end do
+ end do
+ end if
+
+ end subroutine mandelbrot_block
+
+ ! Calculate iteration count for a pixel.
+ ! This function does not need to be edited
+ integer function kernel(h, w, cmin, cmax, x, y)
+ implicit none
+
+ integer, intent(in) :: h, w, x, y
+ complex(dp), intent(in) :: cmin, cmax
+
+ integer :: iteration
+ complex(dp) :: z, dc, c
+ real(dp) :: fx, fy
+
+ dc = cmax - cmin
+ fx = real(x, dp) / w
+ fy = real(y, dp) / h
+ c = cmin + fx * real(dc) + fy * imag(dc) * (0.0, 1.0)
+
+ z = c
+ iteration = 0
+
+ do while (iteration < max_iter_count .and. abs(z)**2 < 4)
+ z = z**2 + c
+ iteration = iteration + 1
+ end do
+
+ kernel = iteration
+
+ end function kernel
+
+end program mandelbrot
diff --git a/tasks/fortran/pngwriter.c b/tasks/fortran/pngwriter.c
new file mode 100644
index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1
--- /dev/null
+++ b/tasks/fortran/pngwriter.c
@@ -0,0 +1,202 @@
+#include
+#include
+#include
+#include
+#include
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+} pixel_t;
+
+void cmap(const int value, const int max_val, pixel_t *pix);
+
+/* Heat colormap from black to white */
+// *INDENT-OFF*
+static int heat_colormap[256][3] = {
+ { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0},
+ { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0},
+ { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0},
+ { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0},
+ { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0},
+ { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0},
+ {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0},
+ {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0},
+ {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0},
+ {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0},
+ {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0},
+ {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0},
+ {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0},
+ {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0},
+ {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0},
+ {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0},
+ {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0},
+ {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0},
+ {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0},
+ {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0},
+ {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0},
+ {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0},
+ {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0},
+ {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0},
+ {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0},
+ {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0},
+ {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0},
+ {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0},
+ {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0},
+ {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0},
+ {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0},
+ {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0},
+ {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0},
+ {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0},
+ {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0},
+ {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0},
+ {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0},
+ {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0},
+ {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0},
+ {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0},
+ {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0},
+ {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0},
+ {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0},
+ {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0},
+ {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0},
+ {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0},
+ {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0},
+ {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0},
+ {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0},
+ {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0},
+ {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14},
+ {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29},
+ {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51},
+ {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72},
+ {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98},
+ {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123},
+ {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156},
+ {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182},
+ {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204},
+ {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222},
+ {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236},
+ {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255}
+};
+// *INDENT-ON*
+/* Save the two dimensional array as a png image
+ * Arguments:
+ * double *data is a pointer to an array of nx*ny values
+ * int nx is the number of COLUMNS to be written
+ * int ny is the number of ROWS to be written
+ * char *fname is the name of the picture
+ * char lang is either 'c' or 'f' denoting the memory
+ * layout. That is, if 'f' is given, then rows
+ * and columns are swapped.
+ */
+int save_png(int *data, const int height, const int width, const char *fname)
+{
+ FILE *fp;
+ png_structp pngstruct_ptr = NULL;
+ png_infop pnginfo_ptr = NULL;
+ png_byte **row_pointers = NULL;
+ int i, j, max_value;
+
+ /* Determine the maximum value */
+ max_value = INT_MIN;
+ for (i = 0; i < height * width; i++) {
+ max_value = data[i] > max_value ? data[i] : max_value;
+ }
+
+ /* Default return status is failure */
+ int status = -1;
+
+ int pixel_size = 3;
+ int depth = 8;
+
+ fp = fopen(fname, "wb");
+ if (fp == NULL) {
+ goto fopen_failed;
+ }
+
+ pngstruct_ptr =
+ png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (pngstruct_ptr == NULL) {
+ goto pngstruct_create_failed;
+ }
+
+ pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+ if (pnginfo_ptr == NULL) {
+ goto pnginfo_create_failed;
+ }
+
+ if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+ goto setjmp_failed;
+ }
+
+ png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+ (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+ row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+ for (i = 0; i < height; i++) {
+ png_byte *row = png_malloc(pngstruct_ptr,
+ sizeof(uint8_t) * width * pixel_size);
+ row_pointers[i] = row;
+
+ // Branch according to the memory layout
+ for (j = 0; j < width; j++) {
+ pixel_t pixel;
+ // Scale the values so that values between
+ // 0 and 100 degrees are mapped to values
+ // between 0 and 255
+ cmap(data[j + i * width], max_value, &pixel);
+ *row++ = pixel.red;
+ *row++ = pixel.green;
+ *row++ = pixel.blue;
+ }
+ }
+
+ png_init_io(pngstruct_ptr, fp);
+ png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+ png_write_png(pngstruct_ptr, pnginfo_ptr,
+ PNG_TRANSFORM_IDENTITY, NULL);
+
+ status = 0;
+
+ for (i = 0; i < height; i++) {
+ png_free(pngstruct_ptr, row_pointers[i]);
+ }
+ png_free(pngstruct_ptr, row_pointers);
+
+setjmp_failed:
+pnginfo_create_failed:
+ png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+ fclose(fp);
+fopen_failed:
+ return status;
+}
+
+/* This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0,255 blue or red color is used instead. */
+void cmap(const int value, const int max_value, pixel_t *pix)
+{
+ int ival;
+
+ ival = ((double) value / (double) max_value) * 255;
+ /* Check for wrong values */
+ if (ival > 255) {
+ ival = 255;
+ }
+ if (ival < 0) {
+ ival = 0;
+ }
+
+ /* Pick color from heat_colormap */
+ pix->red = heat_colormap[ival][0];
+ pix->green = heat_colormap[ival][1];
+ pix->blue = heat_colormap[ival][2];
+}
diff --git a/tasks/fortran/pngwriter.h b/tasks/fortran/pngwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa
--- /dev/null
+++ b/tasks/fortran/pngwriter.h
@@ -0,0 +1,6 @@
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(int *data, const int nx, const int ny, const char *fname);
+
+#endif
diff --git a/tasks/fortran/pngwriter_mod.F90 b/tasks/fortran/pngwriter_mod.F90
new file mode 100644
index 0000000000000000000000000000000000000000..5c6054e407869133543b23a8dfd9ecb7acb47d75
--- /dev/null
+++ b/tasks/fortran/pngwriter_mod.F90
@@ -0,0 +1,40 @@
+! PNG writer
+module pngwriter
+
+contains
+
+ function save_png(data, nx, ny, fname) result(stat)
+
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+
+ integer, dimension(:,:), intent(in) :: data
+ integer, intent(in) :: nx, ny
+ character(len=*), intent(in) :: fname
+ integer :: stat
+
+ ! Interface for save_png C-function
+ interface
+ ! The C-function definition is
+ ! int save_png(double *data, const int nx, const int ny,
+ ! const char *fname)
+ function save_png_c(data, nx, ny, fname, order) &
+ & bind(C,name="save_png") result(stat)
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+ integer(kind=C_INT) :: data(*)
+ integer(kind=C_INT), value, intent(IN) :: nx, ny
+ character(kind=C_CHAR), intent(IN) :: fname(*)
+ character(kind=C_CHAR), value, intent(IN) :: order
+ integer(kind=C_INT) :: stat
+ end function save_png_c
+ end interface
+
+ stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
+ if (stat /= 0) then
+ write(*,*) 'save_png returned error!'
+ end if
+
+ end function save_png
+
+end module pngwriter
diff --git a/tasks/fortran/solution/Makefile b/tasks/fortran/solution/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..323942db80f4581b003b06b2500f3a3adfe9e084
--- /dev/null
+++ b/tasks/fortran/solution/Makefile
@@ -0,0 +1,50 @@
+ifeq ($(COMP),)
+COMP=cray
+endif
+
+COMMONDIR=../../../heat/common
+
+ifeq ($(COMP),cray)
+FC=ftn
+CC=cc
+FCFLAGS=-O3
+CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR)
+LDFLAGS=-L/appl/opt/libpng/lib
+LIBS=-lpng -lz
+endif
+
+ifeq ($(COMP),gnu)
+FC=gfortran
+CC=gcc
+FCFLAGS=-fopenmp -O3 -Wall
+CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR)
+LDFLAGS=
+LIBS=-lpng
+endif
+
+EXE=mandelbrot
+OBJS=mandelbrot.o pngwriter_mod.o
+OBJS_PNG=pngwriter.o
+CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o
+
+all: $(EXE)
+
+$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
+pngwriter_mod.o: pngwriter_mod.F90
+mandelbrot.o: mandelbrot.F90 pngwriter_mod.o
+
+pngwriter.o: pngwriter.c pngwriter.h
+ $(CC) $(CCFLAGS) -c -o $@ $<
+
+$(EXE): $(OBJS) $(OBJS_PNG)
+ $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
+
+%.o: %.F90
+ $(FC) $(FCFLAGS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CCFLAGS) -c $< -o $@
+
+.PHONY: clean
+clean:
+ -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png
diff --git a/tasks/fortran/solution/mandelbrot.F90 b/tasks/fortran/solution/mandelbrot.F90
new file mode 100644
index 0000000000000000000000000000000000000000..4fab3ebc17ba67d030652df0f85c55ab372279d7
--- /dev/null
+++ b/tasks/fortran/solution/mandelbrot.F90
@@ -0,0 +1,129 @@
+program mandelbrot
+
+ use iso_fortran_env, only : REAL64
+ use pngwriter
+ use omp_lib
+
+ implicit none
+
+ integer, parameter :: dp = REAL64
+
+ integer, parameter :: max_iter_count = 512
+ integer, parameter :: max_depth = 6
+ integer, parameter :: min_size = 32
+ integer, parameter :: subdiv = 4
+
+ integer, parameter :: w = 2048
+ integer, parameter :: h = w
+
+ integer, pointer, dimension(:,:) :: iter_counts
+
+ integer :: stat
+
+ complex(dp) :: cmin, cmax
+ real(dp) :: t0, t1
+
+ allocate(iter_counts(w, h))
+
+
+ t0 = omp_get_wtime()
+
+ cmin = (-1.5, -1.0)
+ cmax = (0.5, 1.0)
+
+ !$omp parallel
+ !$omp single
+ call mandelbrot_block(iter_counts, w, h, cmin, cmax, 0, 0, w, 1)
+ !$omp end single
+ !$omp end parallel
+
+ t1 = omp_get_wtime()
+
+ stat = save_png(iter_counts, h, w, 'mandelbrot.png')
+
+ deallocate(iter_counts)
+
+ write(*,*) 'Mandelbrot set computed in', t1 - t0, 's'
+
+contains
+
+ ! Computes the Mandelbrot image recursively
+ ! At each call, the image is divided into smaller blocks (by a factor of
+ ! subdiv), and the function is called recursively with arguments corresponding
+ ! to subblock. When maximum recursion depth is reached or size of block
+ ! is smaller than predefined minimum, one starts to calculate actual pixel
+ ! values
+ !
+ ! - - - - - - - - ----- -----
+ ! | | | | | |
+ ! | | ----- -----
+ ! | | --> --> ...
+ ! | | ----- -----
+ ! | | | | | |
+ ! | | ----- -----
+ ! ---------------
+ !
+
+ recursive subroutine mandelbrot_block(iter_counts, w, h, cmin, cmax, &
+ x0, y0, d, depth)
+ implicit none
+
+ integer, pointer, dimension(:,:), intent(inout) :: iter_counts
+ integer, intent(in) :: w, h, x0, y0, d, depth
+ complex(dp), intent(in) :: cmin, cmax
+
+ integer :: block_size, i, j
+
+ block_size = d / subdiv
+ if ((depth + 1 < max_depth) .and. (block_size > min_size)) then
+ ! Subdivide recursively
+ do i=0, subdiv - 1
+ do j=0, subdiv - 1
+ !$omp task
+ call mandelbrot_block(iter_counts, w, h, cmin, cmax, &
+ x0 + i*block_size, y0 + j*block_size, &
+ block_size, depth + 1)
+ !$omp end task
+ end do
+ end do
+ else
+ ! Last recursion level reached, calculate the values
+ do j = y0 + 1, y0 + d
+ do i = x0 + 1, x0 + d
+ iter_counts(i, j) = kernel(h, w, cmin, cmax, i-1, j-1)
+ end do
+ end do
+ end if
+
+ end subroutine mandelbrot_block
+
+ ! Calculate iteration count for a pixel.
+ ! This function does not need to be edited
+ integer function kernel(h, w, cmin, cmax, x, y)
+ implicit none
+
+ integer, intent(in) :: h, w, x, y
+ complex(dp), intent(in) :: cmin, cmax
+
+ integer :: iteration
+ complex(dp) :: z, dc, c
+ real(dp) :: fx, fy
+
+ dc = cmax - cmin
+ fx = real(x, dp) / w
+ fy = real(y, dp) / h
+ c = cmin + fx * real(dc) + fy * imag(dc) * (0.0, 1.0)
+
+ z = c
+ iteration = 0
+
+ do while (iteration < max_iter_count .and. abs(z)**2 < 4)
+ z = z**2 + c
+ iteration = iteration + 1
+ end do
+
+ kernel = iteration
+
+ end function kernel
+
+end program mandelbrot
diff --git a/tasks/fortran/solution/pngwriter.c b/tasks/fortran/solution/pngwriter.c
new file mode 100644
index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1
--- /dev/null
+++ b/tasks/fortran/solution/pngwriter.c
@@ -0,0 +1,202 @@
+#include
+#include
+#include
+#include
+#include
+#include "pngwriter.h"
+
+/* Datatype for RGB pixel */
+typedef struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+} pixel_t;
+
+void cmap(const int value, const int max_val, pixel_t *pix);
+
+/* Heat colormap from black to white */
+// *INDENT-OFF*
+static int heat_colormap[256][3] = {
+ { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0},
+ { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0},
+ { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0},
+ { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0},
+ { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0},
+ { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0},
+ {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0},
+ {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0},
+ {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0},
+ {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0},
+ {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0},
+ {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0},
+ {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0},
+ {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0},
+ {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0},
+ {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0},
+ {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0},
+ {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0},
+ {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0},
+ {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0},
+ {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0},
+ {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0},
+ {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0},
+ {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0},
+ {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0},
+ {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0},
+ {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0},
+ {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0},
+ {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0},
+ {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0},
+ {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0},
+ {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0},
+ {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0},
+ {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0},
+ {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0},
+ {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0},
+ {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0},
+ {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0},
+ {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0},
+ {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0},
+ {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0},
+ {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0},
+ {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0},
+ {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0},
+ {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0},
+ {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0},
+ {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0},
+ {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0},
+ {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0},
+ {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0},
+ {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14},
+ {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29},
+ {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51},
+ {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72},
+ {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98},
+ {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123},
+ {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156},
+ {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182},
+ {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204},
+ {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222},
+ {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236},
+ {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255}
+};
+// *INDENT-ON*
+/* Save the two dimensional array as a png image
+ * Arguments:
+ * double *data is a pointer to an array of nx*ny values
+ * int nx is the number of COLUMNS to be written
+ * int ny is the number of ROWS to be written
+ * char *fname is the name of the picture
+ * char lang is either 'c' or 'f' denoting the memory
+ * layout. That is, if 'f' is given, then rows
+ * and columns are swapped.
+ */
+int save_png(int *data, const int height, const int width, const char *fname)
+{
+ FILE *fp;
+ png_structp pngstruct_ptr = NULL;
+ png_infop pnginfo_ptr = NULL;
+ png_byte **row_pointers = NULL;
+ int i, j, max_value;
+
+ /* Determine the maximum value */
+ max_value = INT_MIN;
+ for (i = 0; i < height * width; i++) {
+ max_value = data[i] > max_value ? data[i] : max_value;
+ }
+
+ /* Default return status is failure */
+ int status = -1;
+
+ int pixel_size = 3;
+ int depth = 8;
+
+ fp = fopen(fname, "wb");
+ if (fp == NULL) {
+ goto fopen_failed;
+ }
+
+ pngstruct_ptr =
+ png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (pngstruct_ptr == NULL) {
+ goto pngstruct_create_failed;
+ }
+
+ pnginfo_ptr = png_create_info_struct(pngstruct_ptr);
+
+ if (pnginfo_ptr == NULL) {
+ goto pnginfo_create_failed;
+ }
+
+ if (setjmp(png_jmpbuf(pngstruct_ptr))) {
+ goto setjmp_failed;
+ }
+
+ png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width,
+ (size_t) height, depth, PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+ row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *));
+
+ for (i = 0; i < height; i++) {
+ png_byte *row = png_malloc(pngstruct_ptr,
+ sizeof(uint8_t) * width * pixel_size);
+ row_pointers[i] = row;
+
+ // Branch according to the memory layout
+ for (j = 0; j < width; j++) {
+ pixel_t pixel;
+ // Scale the values so that values between
+ // 0 and 100 degrees are mapped to values
+ // between 0 and 255
+ cmap(data[j + i * width], max_value, &pixel);
+ *row++ = pixel.red;
+ *row++ = pixel.green;
+ *row++ = pixel.blue;
+ }
+ }
+
+ png_init_io(pngstruct_ptr, fp);
+ png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers);
+ png_write_png(pngstruct_ptr, pnginfo_ptr,
+ PNG_TRANSFORM_IDENTITY, NULL);
+
+ status = 0;
+
+ for (i = 0; i < height; i++) {
+ png_free(pngstruct_ptr, row_pointers[i]);
+ }
+ png_free(pngstruct_ptr, row_pointers);
+
+setjmp_failed:
+pnginfo_create_failed:
+ png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr);
+pngstruct_create_failed:
+ fclose(fp);
+fopen_failed:
+ return status;
+}
+
+/* This routine sets the RGB values for the pixel_t structure using
+ * the colormap data heat_colormap. If the value is outside the
+ * acceptable png values 0,255 blue or red color is used instead. */
+void cmap(const int value, const int max_value, pixel_t *pix)
+{
+ int ival;
+
+ ival = ((double) value / (double) max_value) * 255;
+ /* Check for wrong values */
+ if (ival > 255) {
+ ival = 255;
+ }
+ if (ival < 0) {
+ ival = 0;
+ }
+
+ /* Pick color from heat_colormap */
+ pix->red = heat_colormap[ival][0];
+ pix->green = heat_colormap[ival][1];
+ pix->blue = heat_colormap[ival][2];
+}
diff --git a/tasks/fortran/solution/pngwriter.h b/tasks/fortran/solution/pngwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa
--- /dev/null
+++ b/tasks/fortran/solution/pngwriter.h
@@ -0,0 +1,6 @@
+#ifndef PNGWRITER_H_
+#define PNGWRITER_H_
+
+int save_png(int *data, const int nx, const int ny, const char *fname);
+
+#endif
diff --git a/tasks/fortran/solution/pngwriter_mod.F90 b/tasks/fortran/solution/pngwriter_mod.F90
new file mode 100644
index 0000000000000000000000000000000000000000..5c6054e407869133543b23a8dfd9ecb7acb47d75
--- /dev/null
+++ b/tasks/fortran/solution/pngwriter_mod.F90
@@ -0,0 +1,40 @@
+! PNG writer
+module pngwriter
+
+contains
+
+ function save_png(data, nx, ny, fname) result(stat)
+
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+
+ integer, dimension(:,:), intent(in) :: data
+ integer, intent(in) :: nx, ny
+ character(len=*), intent(in) :: fname
+ integer :: stat
+
+ ! Interface for save_png C-function
+ interface
+ ! The C-function definition is
+ ! int save_png(double *data, const int nx, const int ny,
+ ! const char *fname)
+ function save_png_c(data, nx, ny, fname, order) &
+ & bind(C,name="save_png") result(stat)
+ use, intrinsic :: ISO_C_BINDING
+ implicit none
+ integer(kind=C_INT) :: data(*)
+ integer(kind=C_INT), value, intent(IN) :: nx, ny
+ character(kind=C_CHAR), intent(IN) :: fname(*)
+ character(kind=C_CHAR), value, intent(IN) :: order
+ integer(kind=C_INT) :: stat
+ end function save_png_c
+ end interface
+
+ stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
+ if (stat /= 0) then
+ write(*,*) 'save_png returned error!'
+ end if
+
+ end function save_png
+
+end module pngwriter
diff --git a/work-sharing/LICENSE.txt b/work-sharing/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc
--- /dev/null
+++ b/work-sharing/LICENSE.txt
@@ -0,0 +1,15 @@
+
+Copyright (C) 2018 CSC - IT Center for Science Ltd.
+
+Licensed under the terms of the GNU General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+Code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Copy of the GNU General Public License can be obtained from
+.
+
diff --git a/work-sharing/README.md b/work-sharing/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a64b48428b8ff68e0568b4f32e406b7f7d6a8bff
--- /dev/null
+++ b/work-sharing/README.md
@@ -0,0 +1,6 @@
+## Work sharing for a simple loop ##
+
+In [sum.c](sum.c) (or [sum.F90](sum.F90) for Fortran) is a skeleton
+implementation for a simple summation of two vectors, C=A+B. Add the
+computation loop and add the parallel region with work sharing
+directives so that the vector addition is executed in parallel.
diff --git a/work-sharing/solution/sum.F90 b/work-sharing/solution/sum.F90
new file mode 100644
index 0000000000000000000000000000000000000000..6c61b35fad87787129da5486a24f3147e69e6755
--- /dev/null
+++ b/work-sharing/solution/sum.F90
@@ -0,0 +1,26 @@
+program vectorsum
+ implicit none
+ integer, parameter :: rk = kind(1d0)
+ integer, parameter :: ik = selected_int_kind(9)
+ integer, parameter :: nx = 102400
+
+ real(kind=rk), dimension(nx) :: vecA, vecB, vecC
+ real(kind=rk) :: sum
+ integer(kind=ik) :: i
+
+ ! Initialization of vectors
+ do i = 1, nx
+ vecA(i) = 1.0_rk/(real(nx - i + 1, kind=rk))
+ vecB(i) = vecA(i)**2
+ end do
+
+ !$omp parallel do default(shared) private(i)
+ do i = 1, nx
+ vecC(i) = vecA(i) * vecB(i)
+ end do
+ !$omp end parallel do
+
+ ! Compute the check value
+ write(*,*) 'Reduction sum: ', sum(vecC)
+
+end program vectorsum
diff --git a/work-sharing/solution/sum.c b/work-sharing/solution/sum.c
new file mode 100644
index 0000000000000000000000000000000000000000..957d767a57a23de733360be4a5ed15668407a4c2
--- /dev/null
+++ b/work-sharing/solution/sum.c
@@ -0,0 +1,30 @@
+#include
+
+#define NX 102400
+
+int main(void)
+{
+ double vecA[NX], vecB[NX], vecC[NX];
+ double sum;
+ int i;
+
+ /* Initialization of the vectors */
+ for (i = 0; i < NX; i++) {
+ vecA[i] = 1.0 / ((double)(NX - i));
+ vecB[i] = vecA[i] * vecA[i];
+ }
+
+ #pragma omp parallel for default(shared) private(i)
+ for (i = 0; i < NX; i++) {
+ vecC[i] = vecA[i] * vecB[i];
+ }
+
+ sum = 0.0;
+ /* Compute the check value */
+ for (i = 0; i < NX; i++) {
+ sum += vecC[i];
+ }
+ printf("Reduction sum: %18.16f\n", sum);
+
+ return 0;
+}
diff --git a/work-sharing/sum.F90 b/work-sharing/sum.F90
new file mode 100644
index 0000000000000000000000000000000000000000..f527169555980efb0d28fd10262b560b04c38aa4
--- /dev/null
+++ b/work-sharing/sum.F90
@@ -0,0 +1,24 @@
+program vectorsum
+ implicit none
+ integer, parameter :: rk = kind(1d0)
+ integer, parameter :: ik = selected_int_kind(9)
+ integer, parameter :: nx = 102400
+
+ real(kind=rk), dimension(nx) :: vecA, vecB, vecC
+ real(kind=rk) :: sum
+ integer(kind=ik) :: i
+
+ ! Initialization of vectors
+ do i = 1, nx
+ vecA(i) = 1.0_rk/(real(nx - i + 1, kind=rk))
+ vecB(i) = vecA(i)**2
+ end do
+
+ ! TODO:
+ ! Implement here the parallelized version of vector addition,
+ ! vecC = vecA + vecB
+
+ ! Compute the check value
+ write(*,*) 'Reduction sum: ', sum(vecC)
+
+end program vectorsum
diff --git a/work-sharing/sum.c b/work-sharing/sum.c
new file mode 100644
index 0000000000000000000000000000000000000000..05bea390bef8ea2bbd71b8518035584f07c83b59
--- /dev/null
+++ b/work-sharing/sum.c
@@ -0,0 +1,30 @@
+#include
+
+#define NX 102400
+
+int main(void)
+{
+ double vecA[NX], vecB[NX], vecC[NX];
+ double sum;
+ int i;
+
+ /* Initialization of the vectors */
+ for (i = 0; i < NX; i++) {
+ vecA[i] = 1.0 / ((double)(NX - i));
+ vecB[i] = vecA[i] * vecA[i];
+ }
+
+ /* TODO:
+ * Implement here a parallelized version of vector addition,
+ * vecC = vecA + vecB
+ */
+
+ sum = 0.0;
+ /* Compute the check value */
+ for (i = 0; i < NX; i++) {
+ sum += vecC[i];
+ }
+ printf("Reduction sum: %18.16f\n", sum);
+
+ return 0;
+}