diff --git a/README.md b/README.md index dad80016ae15b6a1b2f1077072a7fd240dc32d86..2724e49d6d1c9993cf31034688bf78731b4ba905 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,43 @@ be built and run as: - gcc -o exe -fopenmp exercise.c - gfortran -o exe -fopenmp exercise.c -where gcc/gfortran and -fopenmp should be replaced by proper compiler commands and options if you are not using the GNU compilers. +where gcc/gfortran and -fopenmp should be replaced by proper compiler commands +and options if you are not using the GNU compilers. For more complex cases a Makefile is provided. ## Exercises -## Examples \ No newline at end of file + - [Hello world](hello-world) Simplest possible OpenMP program + (C and Fortran versions). Level: **basic** + - [Parallel region and data sharing](data-sharing) The basic data sharing + primitives (C and Fortran versions). Level: **basic** + - [Work sharing for a simple loop](work-sharing) Simple parallelization of + for/do loop (C and Fortran versions). Level: **basic** + - [Vector sum and race condition](race-condition) A basic example of race + condition and various ways for resolving it (C and Fortran versions). + Level: **intemediate** + - [Using OpenMP tasks for dynamic parallelization](tasks) Utilising OpenMP + task construct for more dynamic parallelization (C and Fortran versions). + Level: **intemediate** + + + +## Examples + - [Heat equation](heat-equation) A two dimensional heat equation solver which + is parallelized with OpenMP. (C and Fortran versions). + Level: **intermediate** + +## How to contribute + +Any contributions (new exercises and examples, bug fixes, improvements etc.) are +warmly welcome. In order to contribute, please follow the standard +Gitlab workflow: + +1. Fork the project into your personal space +2. Create a feature branch +3. Work on your contributions +4. Push the commit(s) to your fork +5. Submit a merge request to the master branch + +As a quality assurance, the merge request is reviewed by PRACE staff before it is accepted into main branch. + diff --git a/data-sharing/LICENSE.txt b/data-sharing/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/data-sharing/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/data-sharing/README.md b/data-sharing/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a21d1de1afe39e0b0ef059e7dc669d8a7504a06b --- /dev/null +++ b/data-sharing/README.md @@ -0,0 +1,9 @@ +## Parallel region and data sharing ## + +Take a look at the exercise skeleton in [variables.c](variables.c) (or +[variables.F90](variables.F90) for Fortran). Add an OpenMP parallel +region aroung the block where the variables `Var1` and `Var2` are +printed and manipulated. What results do you get when you define the +variables as **shared**, **private** or **firstprivate**? Explain why +do you get different results. + diff --git a/data-sharing/solution/variables.F90 b/data-sharing/solution/variables.F90 new file mode 100644 index 0000000000000000000000000000000000000000..776adc3d3321273f74410b0756899ab3b0fff6d4 --- /dev/null +++ b/data-sharing/solution/variables.F90 @@ -0,0 +1,31 @@ +program exer1 + implicit none + integer :: var1, var2 + var1 = 1 + var2 = 2 + + !$OMP PARALLEL PRIVATE(VAR1, VAR2) + print *, 'Region 1: var1=', var1, 'var2=', var2 + var1 = var1 + 1 + var2 = var2 + 1 + !$OMP END PARALLEL + print *, 'After region 1: var1=', var1, 'var2=', var2 + print * + + !$OMP PARALLEL FIRSTPRIVATE(VAR1, VAR2) + print *, 'Region 2: var1=', var1, 'var2=', var2 + var1 = var1 + 1 + var2 = var2 + 1 + !$OMP END PARALLEL + print *, 'After region 2: var1=', var1, 'var2=', var2 + print * + + !$OMP PARALLEL + print *, 'Region 3: var1=', var1, 'var2=', var2 + var1 = var1 + 1 + var2 = var2 + 1 + !$OMP END PARALLEL + print *, 'After region 3: var1=', var1, 'var2=', var2 + print * + +end program exer1 diff --git a/data-sharing/solution/variables.c b/data-sharing/solution/variables.c new file mode 100644 index 0000000000000000000000000000000000000000..cf23d18d243423762f27059cb9815539e90e25fa --- /dev/null +++ b/data-sharing/solution/variables.c @@ -0,0 +1,33 @@ +#include + +int main(void) +{ + int var1 = 1, var2 = 2; + + #pragma omp parallel private(var1, var2) + { + printf("Region 1: var1=%i, var2=%i\n", var1, var2); + var1++; + var2++; + } + printf("After region 1: var1=%i, var2=%i\n\n", var1, var2); + + #pragma omp parallel firstprivate(var1, var2) + { + printf("Region 2: var1=%i, var2=%i\n", var1, var2); + var1++; + var2++; + } + printf("After region 2: var1=%i, var2=%i\n\n", var1, var2); + + #pragma omp parallel /* same as omp parallel shared(var1, var2) */ + { + printf("Region 3: var1=%i, var2=%i\n", var1, var2); + /* Note that this introduces the data race condition! */ + var1++; + var2++; + } + printf("After region 3: var1=%i, var2=%i\n\n", var1, var2); + + return 0; +} diff --git a/data-sharing/variables.F90 b/data-sharing/variables.F90 new file mode 100644 index 0000000000000000000000000000000000000000..034639f54fcf24ebb154be007e6e8dca7844bd62 --- /dev/null +++ b/data-sharing/variables.F90 @@ -0,0 +1,16 @@ +program exer1 + implicit none + integer :: var1, var2 + var1 = 1 + var2 = 2 + + ! TODO: + ! Test different data sharing clauses here + print *, 'Region 1: var1=', var1, 'var2=', var2 + var1 = var1 + 1 + var2 = var2 + 1 + !end here :) + print *, 'After region 1: var1=', var1, 'var2=', var2 + print * + +end program exer1 diff --git a/data-sharing/variables.c b/data-sharing/variables.c new file mode 100644 index 0000000000000000000000000000000000000000..8221baaf950a9e9c91aa3a58a7f648179e95b71f --- /dev/null +++ b/data-sharing/variables.c @@ -0,0 +1,18 @@ +#include + +int main(void) +{ + int var1 = 1, var2 = 2; + + /* TODO: + * Test the effect of different data sharing clauses here + */ + { + printf("Region 1: var1=%i, var2=%i\n", var1, var2); + var1++; + var2++; + } + printf("After region 1: var1=%i, var2=%i\n\n", var1, var2); + + return 0; +} diff --git a/heat-equation/LICENSE.txt b/heat-equation/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/heat-equation/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/heat-equation/README.md b/heat-equation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4c6fc5c37b745187f15dea57c5654acd61262677 --- /dev/null +++ b/heat-equation/README.md @@ -0,0 +1,72 @@ +Two dimensional heat equation +============================= +This folder contains a code which solves two dimensional heat equation +with OpenMP parallelization. A coarse grained parallelization is used, where +the threads are launched in the start of the program and kept alive throughout +the whole execution. + +Heat (or diffusion) equation is + + +![img](http://quicklatex.com/cache3/d2/ql_b3f6b8bdc3a8862c73c5a97862afb9d2_l3.png) + +where **u(x, y, t)** is the temperature field that varies in space and time, +and α is thermal diffusivity constant. The two dimensional Laplacian can be +discretized with finite differences as + + +![img](http://quicklatex.com/cache3/2d/ql_59f49ed64dbbe76704e0679b8ad7c22d_l3.png) + +Given an initial condition (u(t=0) = u0) one can follow the time dependence of +the temperature field with explicit time evolution method: + + +![img](http://quicklatex.com/cache3/9e/ql_9eb7ce5f3d5eccd6cfc1ff5638bf199e_l3.png) + +Note: Algorithm is stable only when + + +![img](http://quicklatex.com/cache3/d1/ql_0e7107049c9183d11dbb1e81174280d1_l3.png) + +The two dimensional grid is decomposed along both dimensions, and the +communication of boundary data is overlapped with computation. Restart files +are written and read with MPI I/O. + +Compilation instructions +------------------------ +For building and running the example one needs to have the +[libpng](http://www.libpng.org/pub/png/libpng.html) library installed. + +Move to proper subfolder (C or Fortran) and modify the top of the **Makefile** +according to your environment (proper compiler commands and compiler flags). +Code can be build simple with **make** + +How to run +---------- +The code can be run with arbitrary number of OpenMP threads. The default +initial temperature field is a disk in a 2000 x 2000 grid. Initial +temperature field can be read also from a file, the provided **bottle.dat** +illustrates what happens to a cold soda bottle in sauna. + + + * Running with defaults: OMP_NUM_THREADS=4 ./heat_omp + * Initial field from a file: OMP_NUM_THREADS=4 ./heat_omp bottle.dat + * Initial field from a file, given number of time steps: + OMP_NUM_THREADS=4./heat_omp bottle.dat 1000 + * Default pattern with given dimensions and time steps: + OMP_NUM_THREADS=4 ./heat_omp 800 800 1000 + + The program produces a series of heat_XXXX.png files which show the + time development of the temperature field + diff --git a/heat-equation/c/solution/Makefile b/heat-equation/c/solution/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..c772c962c5a4aac27df0cd8be3580b4071afddfc --- /dev/null +++ b/heat-equation/c/solution/Makefile @@ -0,0 +1,46 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../common + +ifeq ($(COMP),cray) +CC=cc +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +CC=gcc +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=heat_omp +OBJS=core.o setup.o utilities.o io.o main.o +OBJS_PNG=$(COMMONDIR)/pngwriter.o + + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +core.o: core.c heat.h +utilities.o: utilities.c heat.h +setup.o: setup.c heat.h +io.o: io.c heat.h +main.o: main.c heat.h + +$(OBJS_PNG): C_COMPILER := $(CC) +$(OBJS): C_COMPILER := $(CC) + +$(EXE): $(OBJS) $(OBJS_PNG) + $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.c + $(C_COMPILER) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.png *~ diff --git a/heat-equation/c/solution/core.c b/heat-equation/c/solution/core.c new file mode 100644 index 0000000000000000000000000000000000000000..06f55947d027c6403fecdb187c55cfa67108f49c --- /dev/null +++ b/heat-equation/c/solution/core.c @@ -0,0 +1,35 @@ +/* Main solver routines for heat equation solver */ + +#include +#include +#include +#include + +#include "heat.h" + +/* Update the temperature values using five-point stencil */ +void evolve(field *curr, field *prev, double a, double dt) +{ + int i, j; + double dx2, dy2; + + /* Determine the temperature field at next time step + * As we have fixed boundary conditions, the outermost gridpoints + * are not updated. */ + dx2 = prev->dx * prev->dx; + dy2 = prev->dy * prev->dy; + #pragma omp for private(i, j) + for (i = 1; i < curr->nx + 1; i++) { + for (j = 1; j < curr->ny + 1; j++) { + curr->data[i][j] = prev->data[i][j] + a * dt * + ((prev->data[i + 1][j] - + 2.0 * prev->data[i][j] + + prev->data[i - 1][j]) / dx2 + + (prev->data[i][j + 1] - + 2.0 * prev->data[i][j] + + prev->data[i][j - 1]) / dy2); + } + } +} + + diff --git a/heat-equation/c/solution/heat.h b/heat-equation/c/solution/heat.h new file mode 100644 index 0000000000000000000000000000000000000000..d6a9e30d55c1d99eca083758d2fc188097ad20f7 --- /dev/null +++ b/heat-equation/c/solution/heat.h @@ -0,0 +1,46 @@ +#ifndef __HEAT_H__ +#define __HEAT_H__ + +/* Datatype for temperature field */ +typedef struct { + /* nx and ny are the true dimensions of the field. The array data + * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */ + int nx; + int ny; + double dx; + double dy; + double **data; +} field; + +/* We use here fixed grid spacing */ +#define DX 0.01 +#define DY 0.01 + + +/* Function prototypes */ +double **malloc_2d(int nx, int ny); + +void free_2d(double **array); + +void set_field_dimensions(field *temperature, int nx, int ny); + +void initialize(int argc, char *argv[], field *temperature1, + field *temperature2, int *nsteps); + +void generate_field(field *temperature); + +void evolve(field *curr, field *prev, double a, double dt); + +void write_field(field *temperature, int iter); + +void read_field(field *temperature1, field *temperature2, + char *filename); + +void copy_field(field *temperature1, field *temperature2); + +void swap_fields(field *temperature1, field *temperature2); + +void finalize(field *temperature1, field *temperature2); + +#endif /* __HEAT_H__ */ + diff --git a/heat-equation/c/solution/io.c b/heat-equation/c/solution/io.c new file mode 100644 index 0000000000000000000000000000000000000000..2399510cab7463257e7af33cc30860fcf22966f0 --- /dev/null +++ b/heat-equation/c/solution/io.c @@ -0,0 +1,83 @@ +/* I/O related functions for heat equation solver */ + +#include +#include +#include +#include + +#include "heat.h" +#include "pngwriter.h" + +/* Output routine that prints out a picture of the temperature + * distribution. */ +void write_field(field *temperature, int iter) +{ + char filename[64]; + + /* The actual write routine takes only the actual data + * (without ghost layers) so we need array for that. */ + int height, width; + double **full_data; + + int i; + + height = temperature->nx; + width = temperature->ny; + + /* Copy the inner data */ + full_data = malloc_2d(height, width); + for (i = 0; i < temperature->nx; i++) + memcpy(full_data[i], &temperature->data[i + 1][1], + temperature->ny * sizeof(double)); + + sprintf(filename, "%s_%04d.png", "heat", iter); + save_png(full_data[0], height, width, filename, 'c'); + + free_2d(full_data); +} + +/* Read the initial temperature distribution from a file and + * initialize the temperature fields temperature1 and + * temperature2 to the same initial state. */ +void read_field(field *temperature1, field *temperature2, char *filename) +{ + FILE *fp; + int nx, ny, i, j; + int count; + + fp = fopen(filename, "r"); + /* Read the header */ + count = fscanf(fp, "# %d %d \n", &nx, &ny); + if (count < 2) { + fprintf(stderr, "Error while reading the input file!\n"); + exit(EXIT_FAILURE); + } + + set_field_dimensions(temperature1, nx, ny); + set_field_dimensions(temperature2, nx, ny); + + /* Allocate arrays (including ghost layers */ + temperature1->data = malloc_2d(nx + 2, ny + 2); + temperature2->data = malloc_2d(nx + 2, ny + 2); + + /* Read the actual data */ + for (i = 1; i < nx + 1; i++) { + for (j = 1; j < ny + 1; j++) { + fscanf(fp, "%lf", &temperature1->data[i][j]); + } + } + + /* Set the boundary values */ + for (i = 1; i < nx + 1; i++) { + temperature1->data[i][0] = temperature1->data[i][1]; + temperature1->data[i][ny + 1] = temperature1->data[i][ny]; + } + for (j = 0; j < ny + 2; j++) { + temperature1->data[0][j] = temperature1->data[1][j]; + temperature1->data[nx + 1][j] = temperature1->data[nx][j]; + } + + copy_field(temperature1, temperature2); + + fclose(fp); +} diff --git a/heat-equation/c/solution/main.c b/heat-equation/c/solution/main.c new file mode 100644 index 0000000000000000000000000000000000000000..959a4ea72cdd1e8e3ff53ec17aa40a13951321da --- /dev/null +++ b/heat-equation/c/solution/main.c @@ -0,0 +1,66 @@ +/* Heat equation solver in 2D. */ + +#include +#include +#include +#include + +#include "heat.h" + + +int main(int argc, char **argv) +{ + double a = 0.5; //!< Diffusion constant + field current, previous; //!< Current and previous temperature fields + + double dt; //!< Time step + int nsteps; //!< Number of time steps + + int image_interval = 1000; //!< Image output interval + + int iter; //!< Iteration counter + + double dx2, dy2; //!< delta x and y squared + + clock_t start_clock; //!< Time stamps + + #pragma omp parallel private(iter) + { + initialize(argc, argv, ¤t, &previous, &nsteps); + + #pragma omp single + { + /* Output the initial field */ + write_field(¤t, 0); + + /* Largest stable time step */ + dx2 = current.dx * current.dx; + dy2 = current.dy * current.dy; + dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); + + /* Get the start time stamp */ + start_clock = clock(); + } + + /* Time evolve */ + for (iter = 1; iter < nsteps; iter++) { + evolve(¤t, &previous, a, dt); + if (iter % image_interval == 0) { + #pragma omp single + write_field(¤t, iter); + } + /* Swap current field so that it will be used + as previous for next iteration step */ + #pragma omp single + swap_fields(¤t, &previous); + } + } /* End of parallel region */ + + /* Determine the CPU time used for the iteration */ + printf("Iteration took %.3f seconds.\n", (double)(clock() - start_clock) / + (double)CLOCKS_PER_SEC); + printf("Reference value at 5,5: %f\n", previous.data[5][5]); + + finalize(¤t, &previous); + return 0; +} diff --git a/heat-equation/c/solution/setup.c b/heat-equation/c/solution/setup.c new file mode 100644 index 0000000000000000000000000000000000000000..2f3a6c6a6948e6160e0c875e9160131ccccd1775 --- /dev/null +++ b/heat-equation/c/solution/setup.c @@ -0,0 +1,145 @@ +/* Setup routines for heat equation solver */ + +#include +#include +#include +#include + +#include "heat.h" +#include "pngwriter.h" + +#define NSTEPS 500 // Default number of iteration steps + +/* Initialize the heat equation solver */ +void initialize(int argc, char *argv[], field *current, + field *previous, int *nsteps) +{ + /* + * Following combinations of command line arguments are possible: + * No arguments: use default field dimensions and number of time steps + * One argument: read initial field from a given file + * Two arguments: initial field from file and number of time steps + * Three arguments: field dimensions (rows,cols) and number of time steps + */ + + + int rows = 1024; //!< Field dimensions with default values + int cols = 1024; + + char input_file[64]; //!< Name of the optional input file + + int read_file = 0; + + *nsteps = NSTEPS; + + #pragma omp single + { + switch (argc) { + case 1: + /* Use default values */ + break; + case 2: + /* Read initial field from a file */ + strncpy(input_file, argv[1], 64); + read_file = 1; + break; + case 3: + /* Read initial field from a file */ + strncpy(input_file, argv[1], 64); + read_file = 1; + + /* Number of time steps */ + *nsteps = atoi(argv[2]); + break; + case 4: + /* Field dimensions */ + rows = atoi(argv[1]); + cols = atoi(argv[2]); + /* Number of time steps */ + *nsteps = atoi(argv[3]); + break; + default: + printf("Unsupported number of command line arguments\n"); + exit(-1); + } + } + + if (read_file) + #pragma omp single + read_field(current, previous, input_file); + else { + #pragma omp single + { + set_field_dimensions(current, rows, cols); + set_field_dimensions(previous, rows, cols); + } + generate_field(current); + generate_field(previous); + } +} + +/* Generate initial temperature field. Pattern is disc with a radius + * of nx_full / 6 in the center of the grid. + * Boundary conditions are (different) constant temperatures outside the grid */ +void generate_field(field *temperature) +{ + int i, j; + double radius; + int dx, dy; + + /* Allocate the temperature array, note that + * we have to allocate also the ghost layers */ + #pragma omp single + { + temperature->data = + malloc_2d(temperature->nx + 2, temperature->ny + 2); + } + + /* Radius of the source disc */ + radius = temperature->nx / 6.0; + #pragma omp for private(i, j, dx, dy) + for (i = 0; i < temperature->nx + 2; i++) { + for (j = 0; j < temperature->ny + 2; j++) { + /* Distances of point i, j from the origin */ + dx = i - temperature->nx / 2 + 1; + dy = j - temperature->ny / 2 + 1; + if (dx * dx + dy * dy < radius * radius) { + temperature->data[i][j] = 5.0; + } else { + temperature->data[i][j] = 65.0; + } + } + } + + /* Boundary conditions */ + #pragma omp for private(i) + for (i = 0; i < temperature->nx + 2; i++) { + temperature->data[i][0] = 20.0; + temperature->data[i][temperature->ny + 1] = 70.0; + } + + #pragma omp for private(j) + for (j = 0; j < temperature->ny + 2; j++) { + temperature->data[0][j] = 85.0; + temperature->data[temperature->nx + 1][j] = 5.0; + } + +} + +/* Set dimensions of the field. Note that the nx is the size of the first + * dimension and ny the second. */ +void set_field_dimensions(field *temperature, int nx, int ny) +{ + temperature->dx = DX; + temperature->dy = DY; + temperature->nx = nx; + temperature->ny = ny; +} + +/* Deallocate the 2D arrays of temperature fields */ +void finalize(field *temperature1, field *temperature2) +{ + free_2d(temperature1->data); + free_2d(temperature2->data); +} + diff --git a/heat-equation/c/solution/utilities.c b/heat-equation/c/solution/utilities.c new file mode 100644 index 0000000000000000000000000000000000000000..da7a4ece46679fb8e35eb5809a5aa819f216739a --- /dev/null +++ b/heat-equation/c/solution/utilities.c @@ -0,0 +1,51 @@ +/* Utility functions for heat equation solver + * NOTE: This file does not need to be edited! */ + +#include +#include +#include +#include + +#include "heat.h" + +/* Utility routine for allocating a two dimensional array */ +double **malloc_2d(int nx, int ny) +{ + double **array; + int i; + + array = (double **) malloc(nx * sizeof(double *)); + array[0] = (double *) malloc(nx * ny * sizeof(double)); + + for (i = 1; i < nx; i++) { + array[i] = array[0] + i * ny; + } + + return array; +} + +/* Utility routine for deallocating a two dimensional array */ +void free_2d(double **array) +{ + free(array[0]); + free(array); +} + + +/* Copy data on temperature1 into temperature2 */ +void copy_field(field *temperature1, field *temperature2) +{ + assert(temperature1->nx == temperature2->nx); + assert(temperature1->ny == temperature2->ny); + memcpy(temperature2->data[0], temperature1->data[0], + (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); +} + +/* Swap the data of fields temperature1 and temperature2 */ +void swap_fields(field *temperature1, field *temperature2) +{ + double **tmp; + tmp = temperature1->data; + temperature1->data = temperature2->data; + temperature2->data = tmp; +} diff --git a/heat-equation/fortran/solution/Makefile b/heat-equation/fortran/solution/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..313d937126b4ac30a23eab31895ab5e118d7738c --- /dev/null +++ b/heat-equation/fortran/solution/Makefile @@ -0,0 +1,51 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../common + +ifeq ($(COMP),cray) +FC=ftn +CC=cc +FCFLAGS=-O3 +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +FC=gfortran +CC=gcc +FCFLAGS=-fopenmp -O3 -Wall +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=heat_omp +OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o +OBJS_PNG=$(COMMONDIR)/pngwriter.o + + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +core.o: core.F90 heat_mod.o +utilities.o: utilities.F90 heat_mod.o +io.o: io.F90 heat_mod.o pngwriter_mod.o +setup.o: setup.F90 heat_mod.o utilities.o io.o +pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o +main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o + +$(EXE): $(OBJS) $(OBJS_PNG) + $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.F90 + $(FC) $(FCFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ diff --git a/heat-equation/fortran/solution/core.F90 b/heat-equation/fortran/solution/core.F90 new file mode 100644 index 0000000000000000000000000000000000000000..4cdb1ea01bb57e1473a99d36cb0e7d076bd9e783 --- /dev/null +++ b/heat-equation/fortran/solution/core.F90 @@ -0,0 +1,37 @@ +! Main solver routines for heat equation solver +module core + +contains + + ! Compute one time step of temperature evolution + ! Arguments: + ! curr (type(field)): current temperature values + ! prev (type(field)): values from previous time step + ! a (real(dp)): update equation constant + ! dt (real(dp)): time step value + subroutine evolve(curr, prev, a, dt) + + use heat + implicit none + + type(field), intent(inout) :: curr, prev + real(dp) :: a, dt + integer :: i, j, nx, ny + + nx = curr%nx + ny = curr%ny + + !$OMP DO + do j = 1, ny + do i = 1, nx + curr%data(i, j) = prev%data(i, j) + a * dt * & + & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + & + & prev%data(i+1, j)) / curr%dx**2 + & + & (prev%data(i, j-1) - 2.0 * prev%data(i, j) + & + & prev%data(i, j+1)) / curr%dy**2) + end do + end do + !$OMP END DO + end subroutine evolve + +end module core diff --git a/heat-equation/fortran/solution/heat_mod.F90 b/heat-equation/fortran/solution/heat_mod.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f34a16bbc9522e787afb2334a145188444365b71 --- /dev/null +++ b/heat-equation/fortran/solution/heat_mod.F90 @@ -0,0 +1,35 @@ +! Field metadata for heat equation solver +module heat + use iso_fortran_env, only : REAL64 + implicit none + + integer, parameter :: dp = REAL64 + real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing + + type :: field + integer :: nx + integer :: ny + real(dp) :: dx + real(dp) :: dy + real(dp), dimension(:,:), allocatable :: data + end type field + +contains + ! Initialize the field type metadata + ! Arguments: + ! field0 (type(field)): input field + ! nx, ny, dx, dy: field dimensions and spatial step size + subroutine set_field_dimensions(field0, nx, ny) + + implicit none + + type(field), intent(out) :: field0 + integer, intent(in) :: nx, ny + + field0%dx = DX + field0%dy = DY + field0%nx = nx + field0%ny = ny + end subroutine set_field_dimensions + +end module heat diff --git a/heat-equation/fortran/solution/io.F90 b/heat-equation/fortran/solution/io.F90 new file mode 100644 index 0000000000000000000000000000000000000000..8cf2471ccaf2a4c0aea21b5cc392185793c70a5e --- /dev/null +++ b/heat-equation/fortran/solution/io.F90 @@ -0,0 +1,75 @@ +! I/O routines for heat equation solver +module io + +contains + + ! Output routine, saves the temperature distribution as a png image + ! Arguments: + ! curr (type(field)): variable with the temperature data + ! iter (integer): index of the time step + subroutine write_field(curr, iter) + use heat + use pngwriter + implicit none + + type(field), intent(in) :: curr + integer, intent(in) :: iter + character(len=85) :: filename + + ! The actual write routine takes only the actual data + ! (without ghost layers) so we need array for that + integer :: full_nx, full_ny, stat + real(dp), dimension(:,:), allocatable, target :: full_data + + full_nx = curr%nx + full_ny = curr%ny + + allocate(full_data(full_nx, full_ny)) + full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) + + write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' + stat = save_png(full_data, full_nx, full_ny, filename) + deallocate(full_data) + end subroutine write_field + + + ! Reads the temperature distribution from an input file + ! Arguments: + ! field0 (type(field)): field variable that will store the + ! read data + ! filename (char): name of the input file + ! Note that this version assumes the input data to be in C memory layout + subroutine read_field(field0, filename) + use heat + + implicit none + + type(field), intent(out) :: field0 + character(len=85), intent(in) :: filename + + integer :: nx, ny, i + character(len=2) :: dummy + + open(10, file=filename) + ! Read the header + read(10, *) dummy, nx, ny ! nx is the number of rows + + call set_field_dimensions(field0, nx, ny) + ! The arrays for temperature field contain also a halo region + allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) + + ! Read the data + do i = 1, nx + read(10, *) field0%data(i, 1:ny) + end do + + ! Set the boundary values + field0%data(1:nx, 0 ) = field0%data(1:nx, 1 ) + field0%data(1:nx, ny+1) = field0%data(1:nx, ny ) + field0%data(0, 0:ny+1) = field0%data(1, 0:ny+1) + field0%data( nx+1, 0:ny+1) = field0%data( nx, 0:ny+1) + + close(10) + end subroutine read_field + +end module io diff --git a/heat-equation/fortran/solution/main.F90 b/heat-equation/fortran/solution/main.F90 new file mode 100644 index 0000000000000000000000000000000000000000..21155ba5b6dde8014ab91d97dbb5502ffa51367a --- /dev/null +++ b/heat-equation/fortran/solution/main.F90 @@ -0,0 +1,58 @@ +! Heat equation solver in 2D. + +program heat_solve + use heat + use core + use io + use setup + use utilities + + implicit none + + real(dp), parameter :: a = 0.5 ! Diffusion constant + type(field) :: current, previous ! Current and previus temperature fields + + real(dp) :: dt ! Time step + integer :: nsteps ! Number of time steps + integer, parameter :: image_interval = 10 ! Image output interval + + integer :: iter + + real :: start, stop ! Timers + + !$OMP PARALLEL PRIVATE(iter) + call initialize(current, previous, nsteps) + + !$OMP SINGLE + ! Draw the picture of the initial state + call write_field(current, 0) + + ! Largest stable time step + dt = current%dx**2 * current%dy**2 / & + & (2.0 * a * (current%dx**2 + current%dy**2)) + + ! Main iteration loop, save a picture every + ! image_interval steps + + call cpu_time(start) + !$OMP END SINGLE + do iter = 1, nsteps + call evolve(current, previous, a, dt) + !$OMP SINGLE + if (mod(iter, image_interval) == 0) then + call write_field(current, iter) + end if + call swap_fields(current, previous) + !$OMP END SINGLE + end do + !$OMP SINGLE + call cpu_time(stop) + !$OMP END SINGLE + !$OMP END PARALLEL + + write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' + write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5) + + call finalize(current, previous) + +end program heat_solve diff --git a/heat-equation/fortran/solution/pngwriter_mod.F90 b/heat-equation/fortran/solution/pngwriter_mod.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f6fe31be12005a52f8f688e7947c2b975ade6f01 --- /dev/null +++ b/heat-equation/fortran/solution/pngwriter_mod.F90 @@ -0,0 +1,41 @@ +! PNG writer for heat equation solver +module pngwriter + use heat + +contains + + function save_png(data, nx, ny, fname) result(stat) + + use, intrinsic :: ISO_C_BINDING + implicit none + + real(dp), dimension(:,:), intent(in) :: data + integer, intent(in) :: nx, ny + character(len=*), intent(in) :: fname + integer :: stat + + ! Interface for save_png C-function + interface + ! The C-function definition is + ! int save_png(double *data, const int nx, const int ny, + ! const char *fname) + function save_png_c(data, nx, ny, fname, order) & + & bind(C,name="save_png") result(stat) + use, intrinsic :: ISO_C_BINDING + implicit none + real(kind=C_DOUBLE) :: data(*) + integer(kind=C_INT), value, intent(IN) :: nx, ny + character(kind=C_CHAR), intent(IN) :: fname(*) + character(kind=C_CHAR), value, intent(IN) :: order + integer(kind=C_INT) :: stat + end function save_png_c + end interface + + stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') + if (stat /= 0) then + write(*,*) 'save_png returned error!' + end if + + end function save_png + +end module pngwriter diff --git a/heat-equation/fortran/solution/setup.F90 b/heat-equation/fortran/solution/setup.F90 new file mode 100644 index 0000000000000000000000000000000000000000..e14508c00cda2bb68f43f7b992c62b5a1bc9fddf --- /dev/null +++ b/heat-equation/fortran/solution/setup.F90 @@ -0,0 +1,158 @@ +! Setup routines for heat equation solver +module setup + +contains + + subroutine initialize(previous, current, nsteps) + + use heat + use utilities + use io + + implicit none + + type(field), intent(out) :: previous, current + integer, intent(out) :: nsteps + + integer :: rows, cols + logical :: using_input_file + character(len=85) :: input_file, arg ! Input file name and command line arguments + + + ! Default values for grid size and time steps + rows = 200 + cols = 200 + nsteps = 500 + using_input_file = .false. + + ! Read in the command line arguments and + ! set up the needed variables + select case(command_argument_count()) + case(0) ! No arguments -> default values + case(1) ! One argument -> input file name + using_input_file = .true. + call get_command_argument(1, input_file) + case(2) ! Two arguments -> input file name and number of steps + using_input_file = .true. + call get_command_argument(1, input_file) + call get_command_argument(2, arg) + read(arg, *) nsteps + case(3) ! Three arguments -> rows, cols and nsteps + call get_command_argument(1, arg) + read(arg, *) rows + call get_command_argument(2, arg) + read(arg, *) cols + call get_command_argument(3, arg) + read(arg, *) nsteps + case default + call usage() + stop + end select + + ! Initialize the fields according the command line arguments + if (using_input_file) then + !$OMP SINGLE + call read_field(previous, input_file) + call copy_fields(previous, current) + !$OMP END SINGLE + else + !$OMP SINGLE + call set_field_dimensions(previous, rows, cols) + call set_field_dimensions(current, rows, cols) + !$OMP END SINGLE + call generate_field(previous) + call generate_field(current) + end if + + end subroutine initialize + + ! Generate initial the temperature field. Pattern is disc with a radius + ! of nx_full / 6 in the center of the grid. + ! Boundary conditions are (different) constant temperatures outside the grid + subroutine generate_field(field0) + use heat + + implicit none + + type(field), intent(inout) :: field0 + + real(dp) :: radius2 + integer :: i, j, ds2 + + ! The arrays for field contain also a halo region + !$OMP SINGLE + allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) + !$OMP END SINGLE + + ! Square of the disk radius + radius2 = (field0%nx / 6.0_dp)**2 + + !$OMP DO + do j = 0, field0%ny + 1 + do i = 0, field0%nx + 1 + ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + & + & (j - field0%ny / 2.0_dp + 1)**2) + if (ds2 < radius2) then + field0%data(i,j) = 5.0_dp + else + field0%data(i,j) = 65.0_dp + end if + end do + end do + !$OMP END DO + + ! Boundary conditions + !$OMP DO + do j = 0, field0%nx + 1 + field0%data(j, 0) = 20.0_dp + end do + !$OMP END DO NOWAIT + !$OMP DO + do j = 0, field0%nx + 1 + field0%data(j, field0%ny + 1) = 70.0_dp + end do + !$OMP END DO + !$OMP DO + do j = 0, field0%ny + 1 + field0%data(0, j) = 85.0_dp + end do + !$OMP END DO NOWAIT + !$OMP DO + do j = 0, field0%ny+1 + field0%data(field0%nx + 1, j) = 5.0_dp + end do + !$OMP END DO + + end subroutine generate_field + + + ! Clean up routine for field type + ! Arguments: + ! field0 (type(field)): field variable to be cleared + subroutine finalize(field0, field1) + use heat + + implicit none + + type(field), intent(inout) :: field0, field1 + + deallocate(field0%data) + deallocate(field1%data) + + end subroutine finalize + + ! Helper routine that prints out a simple usage if + ! user gives more than three arguments + subroutine usage() + implicit none + character(len=256) :: buf + + call get_command_argument(0, buf) + write (*,'(A)') 'Usage:' + write (*,'(A, " (default values will be used)")') trim(buf) + write (*,'(A, " ")') trim(buf) + write (*,'(A, " ")') trim(buf) + write (*,'(A, " ")') trim(buf) + end subroutine usage + +end module setup diff --git a/heat-equation/fortran/solution/utilities.F90 b/heat-equation/fortran/solution/utilities.F90 new file mode 100644 index 0000000000000000000000000000000000000000..8739f8532d223063d10484d89c012ba8b5e3503d --- /dev/null +++ b/heat-equation/fortran/solution/utilities.F90 @@ -0,0 +1,58 @@ +! Utility routines for heat equation solver +! NOTE: This file does not need to be edited! +module utilities + +contains + + ! Swap the data fields of two variables of type field + ! Arguments: + ! curr, prev (type(field)): the two variables that are swapped + subroutine swap_fields(curr, prev) + use heat + + implicit none + + type(field), intent(inout) :: curr, prev + real(dp), allocatable, dimension(:,:) :: tmp + + call move_alloc(curr%data, tmp) + call move_alloc(prev%data, curr%data) + call move_alloc(tmp, prev%data) + end subroutine swap_fields + + ! Copy the data from one field to another + ! Arguments: + ! from_field (type(field)): variable to copy from + ! to_field (type(field)): variable to copy to + subroutine copy_fields(from_field, to_field) + + use heat + implicit none + + type(field), intent(in) :: from_field + type(field), intent(out) :: to_field + + ! Consistency checks + if (.not.allocated(from_field%data)) then + write (*,*) "Can not copy from a field without allocated data" + stop + end if + if (.not.allocated(to_field%data)) then + ! Target is not initialize, allocate memory + allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & + & lbound(from_field%data, 2):ubound(from_field%data, 2))) + else if (any(shape(from_field%data) /= shape(to_field%data))) then + write (*,*) "Wrong field data sizes in copy routine" + print *, shape(from_field%data), shape(to_field%data) + stop + end if + + to_field%data = from_field%data + + to_field%nx = from_field%nx + to_field%ny = from_field%ny + to_field%dx = from_field%dx + to_field%dy = from_field%dy + end subroutine copy_fields + +end module utilities diff --git a/hello-world/LICENSE.txt b/hello-world/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/hello-world/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/hello-world/README.md b/hello-world/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8acb6650ce72f3897be839804c549b7fc8af7b2e --- /dev/null +++ b/hello-world/README.md @@ -0,0 +1,11 @@ +## Hello World ## + +In this exercise you will test that you are able to compile and run +OpenMP application. Take a look at a simple example program in +[hello.c](hello.c) ([hello.F90](hello.F90) for +Fortran) that has been parallelized using OpenMP. It will first print +out a hello message (in serial) after which each thread will print out +an "X" character (in parallel). + +Compile this program so that you enable OpenMP, and run it with one, +two and four threads. Do you get the expected amount of "X"s? diff --git a/hello-world/hello.F90 b/hello-world/hello.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f382790d1d609143e1cf61939f76604ee314d202 --- /dev/null +++ b/hello-world/hello.F90 @@ -0,0 +1,9 @@ +program hello + implicit none + + print *, 'Hello world!' + !$omp parallel + print *, 'X' + !$omp end parallel + +end program hello diff --git a/hello-world/hello.c b/hello-world/hello.c new file mode 100644 index 0000000000000000000000000000000000000000..dffe80d6d527dff3b5f6e05012e762391b384903 --- /dev/null +++ b/hello-world/hello.c @@ -0,0 +1,12 @@ +#include + +int main(int argc, char *argv[]) +{ + printf("Hello world!\n"); + #pragma omp parallel + { + printf("X\n"); + } + + return 0; +} diff --git a/hello-world/solution/hello.F90 b/hello-world/solution/hello.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f382790d1d609143e1cf61939f76604ee314d202 --- /dev/null +++ b/hello-world/solution/hello.F90 @@ -0,0 +1,9 @@ +program hello + implicit none + + print *, 'Hello world!' + !$omp parallel + print *, 'X' + !$omp end parallel + +end program hello diff --git a/hello-world/solution/hello.c b/hello-world/solution/hello.c new file mode 100644 index 0000000000000000000000000000000000000000..dffe80d6d527dff3b5f6e05012e762391b384903 --- /dev/null +++ b/hello-world/solution/hello.c @@ -0,0 +1,12 @@ +#include + +int main(int argc, char *argv[]) +{ + printf("Hello world!\n"); + #pragma omp parallel + { + printf("X\n"); + } + + return 0; +} diff --git a/race-condition/LICENSE.txt b/race-condition/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/race-condition/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/race-condition/README.md b/race-condition/README.md new file mode 100644 index 0000000000000000000000000000000000000000..738bf61a3bd7bb94a951b6d5654e12b083dd2e42 --- /dev/null +++ b/race-condition/README.md @@ -0,0 +1,20 @@ +## Vector sum and race condition ## + +In [vectorsum.c](vectorsum.c) (or [vectorsum.F90](vectorsum.F90) for +Fortran) you will find a serial code that sums up all the elements of +a vector A, initialized as A=1,2,...,N. For reference, from the +arithmetic sum formula we know that the result should be S=N(N+1)/2. + +Try to parallelize the code by using only `omp parallel` or `omp for` +pragmas. Are you able to get same results with different number of +threads and in different runs? Explain why the program does not work +correctly in parallel. + +Next, try to use `reduction` clause to compute the sum correctly. + +Implement finally an alternative version where each thread computes its +own part to a private variable and the use a `critical` section after +the loop to compute the global sum. Which version (`reduction` or `critical`) +gives better performance? + + diff --git a/race-condition/solution/vectorsum.F90 b/race-condition/solution/vectorsum.F90 new file mode 100644 index 0000000000000000000000000000000000000000..63cf3bdfb8930616363b15f3dab5f70ae1f71cc0 --- /dev/null +++ b/race-condition/solution/vectorsum.F90 @@ -0,0 +1,63 @@ +program vectorsum + use iso_fortran_env, only: int64 + implicit none + integer, parameter :: ik = int64 + integer(kind=ik), parameter :: nx = 102400_ik + + integer(kind=ik), dimension(nx) :: vecA + integer(kind=ik) :: sum, psum, sumex + integer(kind=ik) :: i + + ! Initialization of vector + do i = 1, nx + vecA(i) = i + end do + + sumex = nx*(nx+1_ik)/2_ik + write(*,*) 'Arithmetic sum formula (exact): ', sumex + + sum = 0 + ! Sum with data race + !$omp parallel do default(shared) private(i) + do i = 1, nx + sum = sum + vecA(i) + end do + !$omp end parallel do + write(*,*) 'Sum with data race: ', sum + + sum = 0 + ! Dot product using critical section = SERIAL CODE + !$omp parallel do default(shared) private(i) + do i = 1, nx + !$omp critical + sum = sum + vecA(i) + !$omp end critical + end do + !$omp end parallel do + write(*,*) 'Sum using critical section: ', sum + + sum = 0 + ! Dot product using reduction + !$omp parallel do default(shared) private(i) reduction(+:sum) + do i = 1, nx + sum = sum + vecA(i) + end do + !$omp end parallel do + write(*,*) 'Sum using reduction: ', sum + + sum = 0 + ! Dot product using private variable and critical secion + !$omp parallel default(shared) private(i, psum) + psum = 0 + !$omp do + do i = 1, nx + psum = psum + vecA(i) + end do + !$omp end do + !$omp critical + sum = sum + psum + !$omp end critical + !$omp end parallel + write(*,*) 'Sum using private variable and critical section: ', sum + +end program vectorsum diff --git a/race-condition/solution/vectorsum.c b/race-condition/solution/vectorsum.c new file mode 100644 index 0000000000000000000000000000000000000000..ad8afc3a664457e9250f539d25b071ae2e3475e7 --- /dev/null +++ b/race-condition/solution/vectorsum.c @@ -0,0 +1,64 @@ +#include + +#define NX 102400 + +int main(void) +{ + long vecA[NX]; + long sum, psum, sumex; + int i; + + /* Initialization of the vectors */ + for (i = 0; i < NX; i++) { + vecA[i] = (long) i + 1; + } + + sumex = (long) NX * (NX + 1) / ((long) 2); + printf("Arithmetic sum formula (exact): %ld\n", + sumex); + + sum = 0.0; + /* Version with data race */ + #pragma omp parallel for default(shared) private(i) + for (i = 0; i < NX; i++) { + sum += vecA[i]; + } + printf("Sum with data race: %ld\n", + sum); + + sum = 0.0; + /* Dot product using critical section = SERIAL CODE! */ + #pragma omp parallel for default(shared) private(i) + for (i = 0; i < NX; i++) { + #pragma omp critical(dummy) + sum += vecA[i]; + } + printf("Sum using critical section: %ld\n", + sum); + + sum = 0.0; + /* Dot product using private partial sums and critical section */ + #pragma omp parallel default(shared) private(i, psum) + { + psum = 0.0; + #pragma omp for + for (i = 0; i < NX; i++) { + psum += vecA[i]; + } + #pragma omp critical(par_sum) + sum += psum; + } + printf("Sum using private variable and critical section: %ld\n", + sum); + + sum = 0.0; + /* Dot product using reduction */ + #pragma omp parallel for default(shared) private(i) reduction(+:sum) + for (i = 0; i < NX; i++) { + sum += vecA[i]; + } + printf("Reduction sum: %ld\n", + sum); + + return 0; +} diff --git a/race-condition/vectorsum.F90 b/race-condition/vectorsum.F90 new file mode 100644 index 0000000000000000000000000000000000000000..6847733a5d9ad86e46fd31a30e7c6738a1b4cc69 --- /dev/null +++ b/race-condition/vectorsum.F90 @@ -0,0 +1,25 @@ +program vectorsum + use iso_fortran_env, only: int64 + implicit none + integer, parameter :: ik = int64 + integer(kind=ik), parameter :: nx = 102400_ik + + integer(kind=ik), dimension(nx) :: vecA + integer(kind=ik) :: sum, psum, sumex + integer(kind=ik) :: i + + ! Initialization of vector + do i = 1, nx + vecA(i) = i + end do + + sumex = nx*(nx+1_ik)/2_ik + write(*,*) 'Arithmetic sum formula (exact): ', sumex + + sum = 0 + ! TODO: Parallelize the computation + do i = 1, nx + sum = sum + vecA(i) + end do + write(*,*) 'Sum: ', sum +end program vectorsum diff --git a/race-condition/vectorsum.c b/race-condition/vectorsum.c new file mode 100644 index 0000000000000000000000000000000000000000..a7badfe086ce157b5a3c83d85d2af92bf0ee36e0 --- /dev/null +++ b/race-condition/vectorsum.c @@ -0,0 +1,28 @@ +#include + +#define NX 102400 + +int main(void) +{ + long vecA[NX]; + long sum, psum, sumex; + int i; + + /* Initialization of the vectors */ + for (i = 0; i < NX; i++) { + vecA[i] = (long) i + 1; + } + + sumex = (long) NX * (NX + 1) / ((long) 2); + printf("Arithmetic sum formula (exact): %ld\n", + sumex); + + sum = 0.0; + /* TODO: Parallelize computation */ + for (i = 0; i < NX; i++) { + sum += vecA[i]; + } + printf("Sum: %ld\n", sum); + + return 0; +} diff --git a/tasks/LICENSE.txt b/tasks/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/tasks/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/tasks/README.md b/tasks/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2cfdd4f1526858fe7223553485264c6589a6a3f3 --- /dev/null +++ b/tasks/README.md @@ -0,0 +1,14 @@ +## Bonus: Using OpenMP tasks for dynamic parallelization ## + +Mandelbrot set of complex numbers can be presented as two dimensional +fractal image. The computation of the pixel values is relatively time +consuming, and the computational cost varies for each pixel. Thus, +simply dividing the two dimensional grid evenly to threads leads into +load imbalance and suboptimal performance. + +Files [tasks/c/mandelbrot.c](c/mandelbrot.c) and +[tasks/c/mandelbrot.F90](fortran/mandelbrot.F90) contain recursive +implementation for calculating the Mandelbrot fractal, which can be +parallelized dynamically with OpenMP tasks. Insert missing pragmas for +parallelizing the code (look for TODOs in the source code), and +investigate the scalability with varying number of threads. diff --git a/tasks/c/Makefile b/tasks/c/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..abce73f59e7ae8762e051e2f33129328fdbad6f2 --- /dev/null +++ b/tasks/c/Makefile @@ -0,0 +1,49 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../heat/common + +ifeq ($(COMP),cray) +FC=ftn +CC=cc +FCFLAGS=-O3 +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +FC=gfortran +CC=gcc +FCFLAGS=-fopenmp -O3 -Wall +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=mandelbrot +OBJS=mandelbrot.o +OBJS_PNG=pngwriter.o +CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +mandelbrot.o: mandelbrot.c + +pngwriter.o: pngwriter.c pngwriter.h + $(CC) $(CCFLAGS) -c -o $@ $< + +$(EXE): $(OBJS) $(OBJS_PNG) + $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.F90 + $(FC) $(FCFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png diff --git a/tasks/c/mandelbrot.c b/tasks/c/mandelbrot.c new file mode 100644 index 0000000000000000000000000000000000000000..fbf154f884f49812ad40470864ca6145a00bdffb --- /dev/null +++ b/tasks/c/mandelbrot.c @@ -0,0 +1,132 @@ +/* + * This example is based on the code of Andrew V. Adinetz + * https://github.com/canonizer/mandelbrot-dyn + * Licensed under The MIT License + */ + +#include +#include +#include +#include + +#include "pngwriter.h" + +// Maximum number of iterations +const int MAX_ITER_COUNT = 512; +// Marker for different iteration counts +const int DIFF_ITER_COUNT = -1; +// Maximum recursion depth +const int MAX_DEPTH = 6; +// Region size below which do per-pixel +const int MIN_SIZE = 32; +// Subdivision factor along each axis +const int SUBDIV = 4; + + +// |z|^2 of a complex number z +float abs2(complex v) +{ + return creal(v) * creal(v) + cimag(v) * cimag(v); +} + +// The kernel to count per-pixel values of the portion of the Mandelbrot set +// Does not need to be edited +int kernel(int w, int h, complex cmin, complex cmax, + int x, int y) +{ + complex dc = cmax - cmin; + float fx = (float)x / w; + float fy = (float)y / h; + complex c = cmin + fx * creal(dc) + fy * cimag(dc) * I; + int iteration = 0; + complex z = c; + while (iteration < MAX_ITER_COUNT && abs2(z) < 2 * 2) { + z = z * z + c; + iteration++; + } + return iteration; +} + + +/* Computes the Mandelbrot image recursively + * At each call, the image is divided into smaller blocks (by a factor of + * subdiv), and the function is called recursively with arguments corresponding + * to subblock. When maximum recursion depth is reached or size of block + * is smaller than predefined minimum, one starts to calculate actual pixel + * values + * + * - - - - - - - - ----- ----- + * | | | | | | + * | | ----- ----- + * | | --> --> ... + * | | ----- ----- + * | | | | | | + * | | ----- ----- + * --------------- + */ +void mandelbrot_block(int *iter_counts, int w, int h, complex cmin, + complex cmax, int x0, int y0, int d, int depth) +{ + +// TODO Parallelize the recursive function call +// with OpenMP tasks + + int block_size = d / SUBDIV; + if (depth + 1 < MAX_DEPTH && block_size > MIN_SIZE) { + // Subdivide recursively + for (int i = 0; i < SUBDIV; i++) { + for (int j = 0; j < SUBDIV; j++) { + mandelbrot_block(iter_counts, w, h, cmin, cmax, + x0 + i * block_size, y0 + j * block_size, + d / SUBDIV, depth + 1); + } + } + } else { + // Last recursion level reached, calculate the values + for (int i = x0; i < x0 + d; i++) { + for (int j = y0; j < y0 + d; j++) { + iter_counts[j * w + i] = kernel(w, h, cmin, cmax, i, j); + } + } + } +} + + +int main(int argc, char **argv) +{ + // Picture size, should be power of two + const int w = 512; + const int h = w; + int *iter_counts; + + complex cmin, cmax; + + int pic_bytes = w * h * sizeof(int); + iter_counts = (int *)malloc(pic_bytes); + + cmin = -1.5 + -1.0 * I; + cmax = 0.5 + 1.0 * I; + + double t1 = omp_get_wtime(); + +// TODO create parallel region. How many threads should be calling +// mandelbrot_block in this uppermost level? + + { + mandelbrot_block(iter_counts, w, h, cmin, cmax, + 0, 0, w, 1); + } + double t2 = omp_get_wtime(); + + // Save the image to a PNG file + save_png(iter_counts, w, h, "mandelbrot.png"); + + double walltime = t2 - t1; + // Print the timings + printf("Mandelbrot set computed in %.3lf s, at %.3lf Mpix/s\n", + walltime, h * w * 1e-6 / walltime); + + free(iter_counts); + return 0; +} + diff --git a/tasks/c/pngwriter.c b/tasks/c/pngwriter.c new file mode 100644 index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1 --- /dev/null +++ b/tasks/c/pngwriter.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include "pngwriter.h" + +/* Datatype for RGB pixel */ +typedef struct { + uint8_t red; + uint8_t green; + uint8_t blue; +} pixel_t; + +void cmap(const int value, const int max_val, pixel_t *pix); + +/* Heat colormap from black to white */ +// *INDENT-OFF* +static int heat_colormap[256][3] = { + { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0}, + { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0}, + { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0}, + { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0}, + { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0}, + { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0}, + {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0}, + {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0}, + {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0}, + {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0}, + {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0}, + {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0}, + {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0}, + {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0}, + {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0}, + {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0}, + {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0}, + {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0}, + {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0}, + {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0}, + {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0}, + {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0}, + {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0}, + {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0}, + {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0}, + {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0}, + {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0}, + {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0}, + {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0}, + {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0}, + {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0}, + {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0}, + {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0}, + {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0}, + {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0}, + {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0}, + {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0}, + {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0}, + {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0}, + {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0}, + {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0}, + {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0}, + {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0}, + {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0}, + {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0}, + {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0}, + {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0}, + {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0}, + {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0}, + {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0}, + {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14}, + {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29}, + {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51}, + {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72}, + {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98}, + {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123}, + {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156}, + {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182}, + {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204}, + {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222}, + {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236}, + {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255} +}; +// *INDENT-ON* +/* Save the two dimensional array as a png image + * Arguments: + * double *data is a pointer to an array of nx*ny values + * int nx is the number of COLUMNS to be written + * int ny is the number of ROWS to be written + * char *fname is the name of the picture + * char lang is either 'c' or 'f' denoting the memory + * layout. That is, if 'f' is given, then rows + * and columns are swapped. + */ +int save_png(int *data, const int height, const int width, const char *fname) +{ + FILE *fp; + png_structp pngstruct_ptr = NULL; + png_infop pnginfo_ptr = NULL; + png_byte **row_pointers = NULL; + int i, j, max_value; + + /* Determine the maximum value */ + max_value = INT_MIN; + for (i = 0; i < height * width; i++) { + max_value = data[i] > max_value ? data[i] : max_value; + } + + /* Default return status is failure */ + int status = -1; + + int pixel_size = 3; + int depth = 8; + + fp = fopen(fname, "wb"); + if (fp == NULL) { + goto fopen_failed; + } + + pngstruct_ptr = + png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + + if (pngstruct_ptr == NULL) { + goto pngstruct_create_failed; + } + + pnginfo_ptr = png_create_info_struct(pngstruct_ptr); + + if (pnginfo_ptr == NULL) { + goto pnginfo_create_failed; + } + + if (setjmp(png_jmpbuf(pngstruct_ptr))) { + goto setjmp_failed; + } + + png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width, + (size_t) height, depth, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + + row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *)); + + for (i = 0; i < height; i++) { + png_byte *row = png_malloc(pngstruct_ptr, + sizeof(uint8_t) * width * pixel_size); + row_pointers[i] = row; + + // Branch according to the memory layout + for (j = 0; j < width; j++) { + pixel_t pixel; + // Scale the values so that values between + // 0 and 100 degrees are mapped to values + // between 0 and 255 + cmap(data[j + i * width], max_value, &pixel); + *row++ = pixel.red; + *row++ = pixel.green; + *row++ = pixel.blue; + } + } + + png_init_io(pngstruct_ptr, fp); + png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers); + png_write_png(pngstruct_ptr, pnginfo_ptr, + PNG_TRANSFORM_IDENTITY, NULL); + + status = 0; + + for (i = 0; i < height; i++) { + png_free(pngstruct_ptr, row_pointers[i]); + } + png_free(pngstruct_ptr, row_pointers); + +setjmp_failed: +pnginfo_create_failed: + png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr); +pngstruct_create_failed: + fclose(fp); +fopen_failed: + return status; +} + +/* This routine sets the RGB values for the pixel_t structure using + * the colormap data heat_colormap. If the value is outside the + * acceptable png values 0,255 blue or red color is used instead. */ +void cmap(const int value, const int max_value, pixel_t *pix) +{ + int ival; + + ival = ((double) value / (double) max_value) * 255; + /* Check for wrong values */ + if (ival > 255) { + ival = 255; + } + if (ival < 0) { + ival = 0; + } + + /* Pick color from heat_colormap */ + pix->red = heat_colormap[ival][0]; + pix->green = heat_colormap[ival][1]; + pix->blue = heat_colormap[ival][2]; +} diff --git a/tasks/c/pngwriter.h b/tasks/c/pngwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa --- /dev/null +++ b/tasks/c/pngwriter.h @@ -0,0 +1,6 @@ +#ifndef PNGWRITER_H_ +#define PNGWRITER_H_ + +int save_png(int *data, const int nx, const int ny, const char *fname); + +#endif diff --git a/tasks/c/solution/Makefile b/tasks/c/solution/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..abce73f59e7ae8762e051e2f33129328fdbad6f2 --- /dev/null +++ b/tasks/c/solution/Makefile @@ -0,0 +1,49 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../heat/common + +ifeq ($(COMP),cray) +FC=ftn +CC=cc +FCFLAGS=-O3 +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +FC=gfortran +CC=gcc +FCFLAGS=-fopenmp -O3 -Wall +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=mandelbrot +OBJS=mandelbrot.o +OBJS_PNG=pngwriter.o +CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +mandelbrot.o: mandelbrot.c + +pngwriter.o: pngwriter.c pngwriter.h + $(CC) $(CCFLAGS) -c -o $@ $< + +$(EXE): $(OBJS) $(OBJS_PNG) + $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.F90 + $(FC) $(FCFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png diff --git a/tasks/c/solution/mandelbrot.c b/tasks/c/solution/mandelbrot.c new file mode 100644 index 0000000000000000000000000000000000000000..a778b31da0b3e0e22a3808d11dcf77a70080184d --- /dev/null +++ b/tasks/c/solution/mandelbrot.c @@ -0,0 +1,134 @@ +/* + * This example is based on the code of Andrew V. Adinetz + * https://github.com/canonizer/mandelbrot-dyn + * Licensed under The MIT License + */ + +#include +#include +#include +#include + +#include "pngwriter.h" + +// Maximum number of iterations +const int MAX_ITER_COUNT = 512; +// Marker for different iteration counts +const int DIFF_ITER_COUNT = -1; +// Maximum recursion depth +const int MAX_DEPTH = 6; +// Region size below which do per-pixel +const int MIN_SIZE = 32; +// Subdivision factor along each axis +const int SUBDIV = 4; + + +// |z|^2 of a complex number z +float abs2(complex v) +{ + return creal(v) * creal(v) + cimag(v) * cimag(v); +} + +// The kernel to count per-pixel values of the portion of the Mandelbrot set +// Does not need to be edited +int kernel(int w, int h, complex cmin, complex cmax, + int x, int y) +{ + complex dc = cmax - cmin; + float fx = (float)x / w; + float fy = (float)y / h; + complex c = cmin + fx * creal(dc) + fy * cimag(dc) * I; + int iteration = 0; + complex z = c; + while (iteration < MAX_ITER_COUNT && abs2(z) < 2 * 2) { + z = z * z + c; + iteration++; + } + return iteration; +} + + +/* Computes the Mandelbrot image recursively + * At each call, the image is divided into smaller blocks (by a factor of + * subdiv), and the function is called recursively with arguments corresponding + * to subblock. When maximum recursion depth is reached or size of block + * is smaller than predefined minimum, one starts to calculate actual pixel + * values + * + * - - - - - - - - ----- ----- + * | | | | | | + * | | ----- ----- + * | | --> --> ... + * | | ----- ----- + * | | | | | | + * | | ----- ----- + * --------------- + */ +void mandelbrot_block(int *iter_counts, int w, int h, complex cmin, + complex cmax, int x0, int y0, int d, int depth) +{ + +// TODO Parallelize the recursive function call +// with OpenMP tasks + + int block_size = d / SUBDIV; + if (depth + 1 < MAX_DEPTH && block_size > MIN_SIZE) { + // Subdivide recursively + for (int i = 0; i < SUBDIV; i++) { + for (int j = 0; j < SUBDIV; j++) { + #pragma omp task + mandelbrot_block(iter_counts, w, h, cmin, cmax, + x0 + i * block_size, y0 + j * block_size, + d / SUBDIV, depth + 1); + } + } + } else { + // Last recursion level reached, calculate the values + for (int i = x0; i < x0 + d; i++) { + for (int j = y0; j < y0 + d; j++) { + iter_counts[j * w + i] = kernel(w, h, cmin, cmax, i, j); + } + } + } +} + + +int main(int argc, char **argv) +{ + // Picture size, should be power of two + const int w = 512; + const int h = w; + int *iter_counts; + + complex cmin, cmax; + + int pic_bytes = w * h * sizeof(int); + iter_counts = (int *)malloc(pic_bytes); + + cmin = -1.5 + -1.0 * I; + cmax = 0.5 + 1.0 * I; + + double t1 = omp_get_wtime(); + +// TODO create parallel region. How many threads should be calling +// mandelbrot_block in this uppermost level? + #pragma omp parallel + #pragma omp single + { + mandelbrot_block(iter_counts, w, h, cmin, cmax, + 0, 0, w, 1); + } + double t2 = omp_get_wtime(); + + // Save the image to a PNG file + save_png(iter_counts, w, h, "mandelbrot.png"); + + double walltime = t2 - t1; + // Print the timings + printf("Mandelbrot set computed in %.3lf s, at %.3lf Mpix/s\n", + walltime, h * w * 1e-6 / walltime); + + free(iter_counts); + return 0; +} + diff --git a/tasks/c/solution/pngwriter.c b/tasks/c/solution/pngwriter.c new file mode 100644 index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1 --- /dev/null +++ b/tasks/c/solution/pngwriter.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include "pngwriter.h" + +/* Datatype for RGB pixel */ +typedef struct { + uint8_t red; + uint8_t green; + uint8_t blue; +} pixel_t; + +void cmap(const int value, const int max_val, pixel_t *pix); + +/* Heat colormap from black to white */ +// *INDENT-OFF* +static int heat_colormap[256][3] = { + { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0}, + { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0}, + { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0}, + { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0}, + { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0}, + { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0}, + {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0}, + {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0}, + {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0}, + {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0}, + {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0}, + {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0}, + {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0}, + {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0}, + {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0}, + {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0}, + {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0}, + {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0}, + {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0}, + {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0}, + {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0}, + {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0}, + {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0}, + {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0}, + {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0}, + {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0}, + {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0}, + {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0}, + {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0}, + {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0}, + {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0}, + {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0}, + {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0}, + {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0}, + {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0}, + {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0}, + {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0}, + {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0}, + {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0}, + {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0}, + {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0}, + {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0}, + {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0}, + {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0}, + {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0}, + {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0}, + {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0}, + {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0}, + {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0}, + {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0}, + {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14}, + {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29}, + {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51}, + {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72}, + {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98}, + {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123}, + {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156}, + {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182}, + {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204}, + {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222}, + {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236}, + {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255} +}; +// *INDENT-ON* +/* Save the two dimensional array as a png image + * Arguments: + * double *data is a pointer to an array of nx*ny values + * int nx is the number of COLUMNS to be written + * int ny is the number of ROWS to be written + * char *fname is the name of the picture + * char lang is either 'c' or 'f' denoting the memory + * layout. That is, if 'f' is given, then rows + * and columns are swapped. + */ +int save_png(int *data, const int height, const int width, const char *fname) +{ + FILE *fp; + png_structp pngstruct_ptr = NULL; + png_infop pnginfo_ptr = NULL; + png_byte **row_pointers = NULL; + int i, j, max_value; + + /* Determine the maximum value */ + max_value = INT_MIN; + for (i = 0; i < height * width; i++) { + max_value = data[i] > max_value ? data[i] : max_value; + } + + /* Default return status is failure */ + int status = -1; + + int pixel_size = 3; + int depth = 8; + + fp = fopen(fname, "wb"); + if (fp == NULL) { + goto fopen_failed; + } + + pngstruct_ptr = + png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + + if (pngstruct_ptr == NULL) { + goto pngstruct_create_failed; + } + + pnginfo_ptr = png_create_info_struct(pngstruct_ptr); + + if (pnginfo_ptr == NULL) { + goto pnginfo_create_failed; + } + + if (setjmp(png_jmpbuf(pngstruct_ptr))) { + goto setjmp_failed; + } + + png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width, + (size_t) height, depth, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + + row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *)); + + for (i = 0; i < height; i++) { + png_byte *row = png_malloc(pngstruct_ptr, + sizeof(uint8_t) * width * pixel_size); + row_pointers[i] = row; + + // Branch according to the memory layout + for (j = 0; j < width; j++) { + pixel_t pixel; + // Scale the values so that values between + // 0 and 100 degrees are mapped to values + // between 0 and 255 + cmap(data[j + i * width], max_value, &pixel); + *row++ = pixel.red; + *row++ = pixel.green; + *row++ = pixel.blue; + } + } + + png_init_io(pngstruct_ptr, fp); + png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers); + png_write_png(pngstruct_ptr, pnginfo_ptr, + PNG_TRANSFORM_IDENTITY, NULL); + + status = 0; + + for (i = 0; i < height; i++) { + png_free(pngstruct_ptr, row_pointers[i]); + } + png_free(pngstruct_ptr, row_pointers); + +setjmp_failed: +pnginfo_create_failed: + png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr); +pngstruct_create_failed: + fclose(fp); +fopen_failed: + return status; +} + +/* This routine sets the RGB values for the pixel_t structure using + * the colormap data heat_colormap. If the value is outside the + * acceptable png values 0,255 blue or red color is used instead. */ +void cmap(const int value, const int max_value, pixel_t *pix) +{ + int ival; + + ival = ((double) value / (double) max_value) * 255; + /* Check for wrong values */ + if (ival > 255) { + ival = 255; + } + if (ival < 0) { + ival = 0; + } + + /* Pick color from heat_colormap */ + pix->red = heat_colormap[ival][0]; + pix->green = heat_colormap[ival][1]; + pix->blue = heat_colormap[ival][2]; +} diff --git a/tasks/c/solution/pngwriter.h b/tasks/c/solution/pngwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa --- /dev/null +++ b/tasks/c/solution/pngwriter.h @@ -0,0 +1,6 @@ +#ifndef PNGWRITER_H_ +#define PNGWRITER_H_ + +int save_png(int *data, const int nx, const int ny, const char *fname); + +#endif diff --git a/tasks/fortran/Makefile b/tasks/fortran/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..323942db80f4581b003b06b2500f3a3adfe9e084 --- /dev/null +++ b/tasks/fortran/Makefile @@ -0,0 +1,50 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../heat/common + +ifeq ($(COMP),cray) +FC=ftn +CC=cc +FCFLAGS=-O3 +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +FC=gfortran +CC=gcc +FCFLAGS=-fopenmp -O3 -Wall +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=mandelbrot +OBJS=mandelbrot.o pngwriter_mod.o +OBJS_PNG=pngwriter.o +CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +pngwriter_mod.o: pngwriter_mod.F90 +mandelbrot.o: mandelbrot.F90 pngwriter_mod.o + +pngwriter.o: pngwriter.c pngwriter.h + $(CC) $(CCFLAGS) -c -o $@ $< + +$(EXE): $(OBJS) $(OBJS_PNG) + $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.F90 + $(FC) $(FCFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png diff --git a/tasks/fortran/mandelbrot.F90 b/tasks/fortran/mandelbrot.F90 new file mode 100644 index 0000000000000000000000000000000000000000..e624ca46375cfbbbe4e6f6112c92cd60b2fc3094 --- /dev/null +++ b/tasks/fortran/mandelbrot.F90 @@ -0,0 +1,129 @@ +program mandelbrot + + use iso_fortran_env, only : REAL64 + use pngwriter + use omp_lib + + implicit none + + integer, parameter :: dp = REAL64 + + integer, parameter :: max_iter_count = 512 + integer, parameter :: max_depth = 6 + integer, parameter :: min_size = 32 + integer, parameter :: subdiv = 4 + + integer, parameter :: w = 2048 + integer, parameter :: h = w + + integer, pointer, dimension(:,:) :: iter_counts + + integer :: stat + + complex(dp) :: cmin, cmax + real(dp) :: t0, t1 + + allocate(iter_counts(w, h)) + + + t0 = omp_get_wtime() + + cmin = (-1.5, -1.0) + cmax = (0.5, 1.0) + + ! TODO create parallel region. How many threads should be calling + ! mandelbrot_block in this uppermost level? + + call mandelbrot_block(iter_counts, w, h, cmin, cmax, 0, 0, w, 1) + + t1 = omp_get_wtime() + + stat = save_png(iter_counts, h, w, 'mandelbrot.png') + + deallocate(iter_counts) + + write(*,*) 'Mandelbrot set computed in', t1 - t0, 's' + +contains + + ! Computes the Mandelbrot image recursively + ! At each call, the image is divided into smaller blocks (by a factor of + ! subdiv), and the function is called recursively with arguments corresponding + ! to subblock. When maximum recursion depth is reached or size of block + ! is smaller than predefined minimum, one starts to calculate actual pixel + ! values + ! + ! - - - - - - - - ----- ----- + ! | | | | | | + ! | | ----- ----- + ! | | --> --> ... + ! | | ----- ----- + ! | | | | | | + ! | | ----- ----- + ! --------------- + ! + + recursive subroutine mandelbrot_block(iter_counts, w, h, cmin, cmax, & + x0, y0, d, depth) + implicit none + + integer, pointer, dimension(:,:), intent(inout) :: iter_counts + integer, intent(in) :: w, h, x0, y0, d, depth + complex(dp), intent(in) :: cmin, cmax + + integer :: block_size, i, j + + ! TODO Parallelize the recursive function call + ! with OpenMP tasks + + block_size = d / subdiv + if ((depth + 1 < max_depth) .and. (block_size > min_size)) then + ! Subdivide recursively + do i=0, subdiv - 1 + do j=0, subdiv - 1 + call mandelbrot_block(iter_counts, w, h, cmin, cmax, & + x0 + i*block_size, y0 + j*block_size, & + block_size, depth + 1) + end do + end do + else + ! Last recursion level reached, calculate the values + do j = y0 + 1, y0 + d + do i = x0 + 1, x0 + d + iter_counts(i, j) = kernel(h, w, cmin, cmax, i-1, j-1) + end do + end do + end if + + end subroutine mandelbrot_block + + ! Calculate iteration count for a pixel. + ! This function does not need to be edited + integer function kernel(h, w, cmin, cmax, x, y) + implicit none + + integer, intent(in) :: h, w, x, y + complex(dp), intent(in) :: cmin, cmax + + integer :: iteration + complex(dp) :: z, dc, c + real(dp) :: fx, fy + + dc = cmax - cmin + fx = real(x, dp) / w + fy = real(y, dp) / h + c = cmin + fx * real(dc) + fy * imag(dc) * (0.0, 1.0) + + z = c + iteration = 0 + + do while (iteration < max_iter_count .and. abs(z)**2 < 4) + z = z**2 + c + iteration = iteration + 1 + end do + + kernel = iteration + + end function kernel + +end program mandelbrot diff --git a/tasks/fortran/pngwriter.c b/tasks/fortran/pngwriter.c new file mode 100644 index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1 --- /dev/null +++ b/tasks/fortran/pngwriter.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include "pngwriter.h" + +/* Datatype for RGB pixel */ +typedef struct { + uint8_t red; + uint8_t green; + uint8_t blue; +} pixel_t; + +void cmap(const int value, const int max_val, pixel_t *pix); + +/* Heat colormap from black to white */ +// *INDENT-OFF* +static int heat_colormap[256][3] = { + { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0}, + { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0}, + { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0}, + { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0}, + { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0}, + { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0}, + {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0}, + {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0}, + {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0}, + {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0}, + {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0}, + {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0}, + {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0}, + {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0}, + {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0}, + {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0}, + {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0}, + {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0}, + {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0}, + {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0}, + {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0}, + {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0}, + {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0}, + {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0}, + {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0}, + {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0}, + {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0}, + {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0}, + {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0}, + {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0}, + {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0}, + {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0}, + {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0}, + {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0}, + {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0}, + {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0}, + {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0}, + {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0}, + {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0}, + {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0}, + {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0}, + {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0}, + {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0}, + {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0}, + {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0}, + {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0}, + {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0}, + {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0}, + {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0}, + {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0}, + {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14}, + {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29}, + {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51}, + {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72}, + {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98}, + {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123}, + {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156}, + {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182}, + {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204}, + {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222}, + {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236}, + {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255} +}; +// *INDENT-ON* +/* Save the two dimensional array as a png image + * Arguments: + * double *data is a pointer to an array of nx*ny values + * int nx is the number of COLUMNS to be written + * int ny is the number of ROWS to be written + * char *fname is the name of the picture + * char lang is either 'c' or 'f' denoting the memory + * layout. That is, if 'f' is given, then rows + * and columns are swapped. + */ +int save_png(int *data, const int height, const int width, const char *fname) +{ + FILE *fp; + png_structp pngstruct_ptr = NULL; + png_infop pnginfo_ptr = NULL; + png_byte **row_pointers = NULL; + int i, j, max_value; + + /* Determine the maximum value */ + max_value = INT_MIN; + for (i = 0; i < height * width; i++) { + max_value = data[i] > max_value ? data[i] : max_value; + } + + /* Default return status is failure */ + int status = -1; + + int pixel_size = 3; + int depth = 8; + + fp = fopen(fname, "wb"); + if (fp == NULL) { + goto fopen_failed; + } + + pngstruct_ptr = + png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + + if (pngstruct_ptr == NULL) { + goto pngstruct_create_failed; + } + + pnginfo_ptr = png_create_info_struct(pngstruct_ptr); + + if (pnginfo_ptr == NULL) { + goto pnginfo_create_failed; + } + + if (setjmp(png_jmpbuf(pngstruct_ptr))) { + goto setjmp_failed; + } + + png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width, + (size_t) height, depth, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + + row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *)); + + for (i = 0; i < height; i++) { + png_byte *row = png_malloc(pngstruct_ptr, + sizeof(uint8_t) * width * pixel_size); + row_pointers[i] = row; + + // Branch according to the memory layout + for (j = 0; j < width; j++) { + pixel_t pixel; + // Scale the values so that values between + // 0 and 100 degrees are mapped to values + // between 0 and 255 + cmap(data[j + i * width], max_value, &pixel); + *row++ = pixel.red; + *row++ = pixel.green; + *row++ = pixel.blue; + } + } + + png_init_io(pngstruct_ptr, fp); + png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers); + png_write_png(pngstruct_ptr, pnginfo_ptr, + PNG_TRANSFORM_IDENTITY, NULL); + + status = 0; + + for (i = 0; i < height; i++) { + png_free(pngstruct_ptr, row_pointers[i]); + } + png_free(pngstruct_ptr, row_pointers); + +setjmp_failed: +pnginfo_create_failed: + png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr); +pngstruct_create_failed: + fclose(fp); +fopen_failed: + return status; +} + +/* This routine sets the RGB values for the pixel_t structure using + * the colormap data heat_colormap. If the value is outside the + * acceptable png values 0,255 blue or red color is used instead. */ +void cmap(const int value, const int max_value, pixel_t *pix) +{ + int ival; + + ival = ((double) value / (double) max_value) * 255; + /* Check for wrong values */ + if (ival > 255) { + ival = 255; + } + if (ival < 0) { + ival = 0; + } + + /* Pick color from heat_colormap */ + pix->red = heat_colormap[ival][0]; + pix->green = heat_colormap[ival][1]; + pix->blue = heat_colormap[ival][2]; +} diff --git a/tasks/fortran/pngwriter.h b/tasks/fortran/pngwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa --- /dev/null +++ b/tasks/fortran/pngwriter.h @@ -0,0 +1,6 @@ +#ifndef PNGWRITER_H_ +#define PNGWRITER_H_ + +int save_png(int *data, const int nx, const int ny, const char *fname); + +#endif diff --git a/tasks/fortran/pngwriter_mod.F90 b/tasks/fortran/pngwriter_mod.F90 new file mode 100644 index 0000000000000000000000000000000000000000..5c6054e407869133543b23a8dfd9ecb7acb47d75 --- /dev/null +++ b/tasks/fortran/pngwriter_mod.F90 @@ -0,0 +1,40 @@ +! PNG writer +module pngwriter + +contains + + function save_png(data, nx, ny, fname) result(stat) + + use, intrinsic :: ISO_C_BINDING + implicit none + + integer, dimension(:,:), intent(in) :: data + integer, intent(in) :: nx, ny + character(len=*), intent(in) :: fname + integer :: stat + + ! Interface for save_png C-function + interface + ! The C-function definition is + ! int save_png(double *data, const int nx, const int ny, + ! const char *fname) + function save_png_c(data, nx, ny, fname, order) & + & bind(C,name="save_png") result(stat) + use, intrinsic :: ISO_C_BINDING + implicit none + integer(kind=C_INT) :: data(*) + integer(kind=C_INT), value, intent(IN) :: nx, ny + character(kind=C_CHAR), intent(IN) :: fname(*) + character(kind=C_CHAR), value, intent(IN) :: order + integer(kind=C_INT) :: stat + end function save_png_c + end interface + + stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') + if (stat /= 0) then + write(*,*) 'save_png returned error!' + end if + + end function save_png + +end module pngwriter diff --git a/tasks/fortran/solution/Makefile b/tasks/fortran/solution/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..323942db80f4581b003b06b2500f3a3adfe9e084 --- /dev/null +++ b/tasks/fortran/solution/Makefile @@ -0,0 +1,50 @@ +ifeq ($(COMP),) +COMP=cray +endif + +COMMONDIR=../../../heat/common + +ifeq ($(COMP),cray) +FC=ftn +CC=cc +FCFLAGS=-O3 +CCFLAGS=-O3 -I/appl/opt/libpng/include -I$(COMMONDIR) +LDFLAGS=-L/appl/opt/libpng/lib +LIBS=-lpng -lz +endif + +ifeq ($(COMP),gnu) +FC=gfortran +CC=gcc +FCFLAGS=-fopenmp -O3 -Wall +CCFLAGS=-fopenmp -O3 -Wall -I$(COMMONDIR) +LDFLAGS= +LIBS=-lpng +endif + +EXE=mandelbrot +OBJS=mandelbrot.o pngwriter_mod.o +OBJS_PNG=pngwriter.o +CORRECT_OBJS_PNG=$(COMMONDIR)/pngwriter.o + +all: $(EXE) + +$(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h +pngwriter_mod.o: pngwriter_mod.F90 +mandelbrot.o: mandelbrot.F90 pngwriter_mod.o + +pngwriter.o: pngwriter.c pngwriter.h + $(CC) $(CCFLAGS) -c -o $@ $< + +$(EXE): $(OBJS) $(OBJS_PNG) + $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) + +%.o: %.F90 + $(FC) $(FCFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) -c $< -o $@ + +.PHONY: clean +clean: + -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ mandelbrot.png diff --git a/tasks/fortran/solution/mandelbrot.F90 b/tasks/fortran/solution/mandelbrot.F90 new file mode 100644 index 0000000000000000000000000000000000000000..4fab3ebc17ba67d030652df0f85c55ab372279d7 --- /dev/null +++ b/tasks/fortran/solution/mandelbrot.F90 @@ -0,0 +1,129 @@ +program mandelbrot + + use iso_fortran_env, only : REAL64 + use pngwriter + use omp_lib + + implicit none + + integer, parameter :: dp = REAL64 + + integer, parameter :: max_iter_count = 512 + integer, parameter :: max_depth = 6 + integer, parameter :: min_size = 32 + integer, parameter :: subdiv = 4 + + integer, parameter :: w = 2048 + integer, parameter :: h = w + + integer, pointer, dimension(:,:) :: iter_counts + + integer :: stat + + complex(dp) :: cmin, cmax + real(dp) :: t0, t1 + + allocate(iter_counts(w, h)) + + + t0 = omp_get_wtime() + + cmin = (-1.5, -1.0) + cmax = (0.5, 1.0) + + !$omp parallel + !$omp single + call mandelbrot_block(iter_counts, w, h, cmin, cmax, 0, 0, w, 1) + !$omp end single + !$omp end parallel + + t1 = omp_get_wtime() + + stat = save_png(iter_counts, h, w, 'mandelbrot.png') + + deallocate(iter_counts) + + write(*,*) 'Mandelbrot set computed in', t1 - t0, 's' + +contains + + ! Computes the Mandelbrot image recursively + ! At each call, the image is divided into smaller blocks (by a factor of + ! subdiv), and the function is called recursively with arguments corresponding + ! to subblock. When maximum recursion depth is reached or size of block + ! is smaller than predefined minimum, one starts to calculate actual pixel + ! values + ! + ! - - - - - - - - ----- ----- + ! | | | | | | + ! | | ----- ----- + ! | | --> --> ... + ! | | ----- ----- + ! | | | | | | + ! | | ----- ----- + ! --------------- + ! + + recursive subroutine mandelbrot_block(iter_counts, w, h, cmin, cmax, & + x0, y0, d, depth) + implicit none + + integer, pointer, dimension(:,:), intent(inout) :: iter_counts + integer, intent(in) :: w, h, x0, y0, d, depth + complex(dp), intent(in) :: cmin, cmax + + integer :: block_size, i, j + + block_size = d / subdiv + if ((depth + 1 < max_depth) .and. (block_size > min_size)) then + ! Subdivide recursively + do i=0, subdiv - 1 + do j=0, subdiv - 1 + !$omp task + call mandelbrot_block(iter_counts, w, h, cmin, cmax, & + x0 + i*block_size, y0 + j*block_size, & + block_size, depth + 1) + !$omp end task + end do + end do + else + ! Last recursion level reached, calculate the values + do j = y0 + 1, y0 + d + do i = x0 + 1, x0 + d + iter_counts(i, j) = kernel(h, w, cmin, cmax, i-1, j-1) + end do + end do + end if + + end subroutine mandelbrot_block + + ! Calculate iteration count for a pixel. + ! This function does not need to be edited + integer function kernel(h, w, cmin, cmax, x, y) + implicit none + + integer, intent(in) :: h, w, x, y + complex(dp), intent(in) :: cmin, cmax + + integer :: iteration + complex(dp) :: z, dc, c + real(dp) :: fx, fy + + dc = cmax - cmin + fx = real(x, dp) / w + fy = real(y, dp) / h + c = cmin + fx * real(dc) + fy * imag(dc) * (0.0, 1.0) + + z = c + iteration = 0 + + do while (iteration < max_iter_count .and. abs(z)**2 < 4) + z = z**2 + c + iteration = iteration + 1 + end do + + kernel = iteration + + end function kernel + +end program mandelbrot diff --git a/tasks/fortran/solution/pngwriter.c b/tasks/fortran/solution/pngwriter.c new file mode 100644 index 0000000000000000000000000000000000000000..94e96345ed1f435cfc923bf0fa1ad59e3dadf9e1 --- /dev/null +++ b/tasks/fortran/solution/pngwriter.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include "pngwriter.h" + +/* Datatype for RGB pixel */ +typedef struct { + uint8_t red; + uint8_t green; + uint8_t blue; +} pixel_t; + +void cmap(const int value, const int max_val, pixel_t *pix); + +/* Heat colormap from black to white */ +// *INDENT-OFF* +static int heat_colormap[256][3] = { + { 0, 0, 0}, { 35, 0, 0}, { 52, 0, 0}, { 60, 0, 0}, + { 63, 1, 0}, { 64, 2, 0}, { 68, 5, 0}, { 69, 6, 0}, + { 72, 8, 0}, { 74, 10, 0}, { 77, 12, 0}, { 78, 14, 0}, + { 81, 16, 0}, { 83, 17, 0}, { 85, 19, 0}, { 86, 20, 0}, + { 89, 22, 0}, { 91, 24, 0}, { 92, 25, 0}, { 94, 26, 0}, + { 95, 28, 0}, { 98, 30, 0}, {100, 31, 0}, {102, 33, 0}, + {103, 34, 0}, {105, 35, 0}, {106, 36, 0}, {108, 38, 0}, + {109, 39, 0}, {111, 40, 0}, {112, 42, 0}, {114, 43, 0}, + {115, 44, 0}, {117, 45, 0}, {119, 47, 0}, {119, 47, 0}, + {120, 48, 0}, {122, 49, 0}, {123, 51, 0}, {125, 52, 0}, + {125, 52, 0}, {126, 53, 0}, {128, 54, 0}, {129, 56, 0}, + {129, 56, 0}, {131, 57, 0}, {132, 58, 0}, {134, 59, 0}, + {134, 59, 0}, {136, 61, 0}, {137, 62, 0}, {137, 62, 0}, + {139, 63, 0}, {139, 63, 0}, {140, 65, 0}, {142, 66, 0}, + {148, 71, 0}, {149, 72, 0}, {149, 72, 0}, {151, 73, 0}, + {151, 73, 0}, {153, 75, 0}, {153, 75, 0}, {154, 76, 0}, + {154, 76, 0}, {154, 76, 0}, {156, 77, 0}, {156, 77, 0}, + {157, 79, 0}, {157, 79, 0}, {159, 80, 0}, {159, 80, 0}, + {159, 80, 0}, {160, 81, 0}, {160, 81, 0}, {162, 82, 0}, + {162, 82, 0}, {163, 84, 0}, {163, 84, 0}, {165, 85, 0}, + {165, 85, 0}, {166, 86, 0}, {166, 86, 0}, {166, 86, 0}, + {168, 87, 0}, {168, 87, 0}, {170, 89, 0}, {170, 89, 0}, + {171, 90, 0}, {171, 90, 0}, {173, 91, 0}, {173, 91, 0}, + {174, 93, 0}, {174, 93, 0}, {176, 94, 0}, {176, 94, 0}, + {177, 95, 0}, {177, 95, 0}, {179, 96, 0}, {179, 96, 0}, + {180, 98, 0}, {182, 99, 0}, {182, 99, 0}, {183, 100, 0}, + {183, 100, 0}, {185, 102, 0}, {185, 102, 0}, {187, 103, 0}, + {187, 103, 0}, {188, 104, 0}, {188, 104, 0}, {190, 105, 0}, + {191, 107, 0}, {191, 107, 0}, {193, 108, 0}, {193, 108, 0}, + {194, 109, 0}, {196, 110, 0}, {196, 110, 0}, {197, 112, 0}, + {197, 112, 0}, {199, 113, 0}, {200, 114, 0}, {200, 114, 0}, + {202, 116, 0}, {202, 116, 0}, {204, 117, 0}, {205, 118, 0}, + {205, 118, 0}, {207, 119, 0}, {208, 121, 0}, {208, 121, 0}, + {210, 122, 0}, {211, 123, 0}, {211, 123, 0}, {213, 124, 0}, + {214, 126, 0}, {214, 126, 0}, {216, 127, 0}, {217, 128, 0}, + {217, 128, 0}, {219, 130, 0}, {221, 131, 0}, {221, 131, 0}, + {222, 132, 0}, {224, 133, 0}, {224, 133, 0}, {225, 135, 0}, + {227, 136, 0}, {227, 136, 0}, {228, 137, 0}, {230, 138, 0}, + {230, 138, 0}, {231, 140, 0}, {233, 141, 0}, {233, 141, 0}, + {234, 142, 0}, {236, 144, 0}, {236, 144, 0}, {238, 145, 0}, + {239, 146, 0}, {241, 147, 0}, {241, 147, 0}, {242, 149, 0}, + {244, 150, 0}, {244, 150, 0}, {245, 151, 0}, {247, 153, 0}, + {247, 153, 0}, {248, 154, 0}, {250, 155, 0}, {251, 156, 0}, + {251, 156, 0}, {253, 158, 0}, {255, 159, 0}, {255, 159, 0}, + {255, 160, 0}, {255, 161, 0}, {255, 163, 0}, {255, 163, 0}, + {255, 164, 0}, {255, 165, 0}, {255, 167, 0}, {255, 167, 0}, + {255, 168, 0}, {255, 169, 0}, {255, 169, 0}, {255, 170, 0}, + {255, 172, 0}, {255, 173, 0}, {255, 173, 0}, {255, 174, 0}, + {255, 175, 0}, {255, 177, 0}, {255, 178, 0}, {255, 179, 0}, + {255, 181, 0}, {255, 181, 0}, {255, 182, 0}, {255, 183, 0}, + {255, 184, 0}, {255, 187, 7}, {255, 188, 10}, {255, 189, 14}, + {255, 191, 18}, {255, 192, 21}, {255, 193, 25}, {255, 195, 29}, + {255, 197, 36}, {255, 198, 40}, {255, 200, 43}, {255, 202, 51}, + {255, 204, 54}, {255, 206, 61}, {255, 207, 65}, {255, 210, 72}, + {255, 211, 76}, {255, 214, 83}, {255, 216, 91}, {255, 219, 98}, + {255, 221, 105}, {255, 223, 109}, {255, 225, 116}, {255, 228, 123}, + {255, 232, 134}, {255, 234, 142}, {255, 237, 149}, {255, 239, 156}, + {255, 240, 160}, {255, 243, 167}, {255, 246, 174}, {255, 248, 182}, + {255, 249, 185}, {255, 252, 193}, {255, 253, 196}, {255, 255, 204}, + {255, 255, 207}, {255, 255, 211}, {255, 255, 218}, {255, 255, 222}, + {255, 255, 225}, {255, 255, 229}, {255, 255, 233}, {255, 255, 236}, + {255, 255, 240}, {255, 255, 244}, {255, 255, 247}, {255, 255, 255} +}; +// *INDENT-ON* +/* Save the two dimensional array as a png image + * Arguments: + * double *data is a pointer to an array of nx*ny values + * int nx is the number of COLUMNS to be written + * int ny is the number of ROWS to be written + * char *fname is the name of the picture + * char lang is either 'c' or 'f' denoting the memory + * layout. That is, if 'f' is given, then rows + * and columns are swapped. + */ +int save_png(int *data, const int height, const int width, const char *fname) +{ + FILE *fp; + png_structp pngstruct_ptr = NULL; + png_infop pnginfo_ptr = NULL; + png_byte **row_pointers = NULL; + int i, j, max_value; + + /* Determine the maximum value */ + max_value = INT_MIN; + for (i = 0; i < height * width; i++) { + max_value = data[i] > max_value ? data[i] : max_value; + } + + /* Default return status is failure */ + int status = -1; + + int pixel_size = 3; + int depth = 8; + + fp = fopen(fname, "wb"); + if (fp == NULL) { + goto fopen_failed; + } + + pngstruct_ptr = + png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + + if (pngstruct_ptr == NULL) { + goto pngstruct_create_failed; + } + + pnginfo_ptr = png_create_info_struct(pngstruct_ptr); + + if (pnginfo_ptr == NULL) { + goto pnginfo_create_failed; + } + + if (setjmp(png_jmpbuf(pngstruct_ptr))) { + goto setjmp_failed; + } + + png_set_IHDR(pngstruct_ptr, pnginfo_ptr, (size_t) width, + (size_t) height, depth, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + + row_pointers = png_malloc(pngstruct_ptr, height * sizeof(png_byte *)); + + for (i = 0; i < height; i++) { + png_byte *row = png_malloc(pngstruct_ptr, + sizeof(uint8_t) * width * pixel_size); + row_pointers[i] = row; + + // Branch according to the memory layout + for (j = 0; j < width; j++) { + pixel_t pixel; + // Scale the values so that values between + // 0 and 100 degrees are mapped to values + // between 0 and 255 + cmap(data[j + i * width], max_value, &pixel); + *row++ = pixel.red; + *row++ = pixel.green; + *row++ = pixel.blue; + } + } + + png_init_io(pngstruct_ptr, fp); + png_set_rows(pngstruct_ptr, pnginfo_ptr, row_pointers); + png_write_png(pngstruct_ptr, pnginfo_ptr, + PNG_TRANSFORM_IDENTITY, NULL); + + status = 0; + + for (i = 0; i < height; i++) { + png_free(pngstruct_ptr, row_pointers[i]); + } + png_free(pngstruct_ptr, row_pointers); + +setjmp_failed: +pnginfo_create_failed: + png_destroy_write_struct(&pngstruct_ptr, &pnginfo_ptr); +pngstruct_create_failed: + fclose(fp); +fopen_failed: + return status; +} + +/* This routine sets the RGB values for the pixel_t structure using + * the colormap data heat_colormap. If the value is outside the + * acceptable png values 0,255 blue or red color is used instead. */ +void cmap(const int value, const int max_value, pixel_t *pix) +{ + int ival; + + ival = ((double) value / (double) max_value) * 255; + /* Check for wrong values */ + if (ival > 255) { + ival = 255; + } + if (ival < 0) { + ival = 0; + } + + /* Pick color from heat_colormap */ + pix->red = heat_colormap[ival][0]; + pix->green = heat_colormap[ival][1]; + pix->blue = heat_colormap[ival][2]; +} diff --git a/tasks/fortran/solution/pngwriter.h b/tasks/fortran/solution/pngwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..9abe52b7e5e83e93f9f8802c1ededbaf190c26fa --- /dev/null +++ b/tasks/fortran/solution/pngwriter.h @@ -0,0 +1,6 @@ +#ifndef PNGWRITER_H_ +#define PNGWRITER_H_ + +int save_png(int *data, const int nx, const int ny, const char *fname); + +#endif diff --git a/tasks/fortran/solution/pngwriter_mod.F90 b/tasks/fortran/solution/pngwriter_mod.F90 new file mode 100644 index 0000000000000000000000000000000000000000..5c6054e407869133543b23a8dfd9ecb7acb47d75 --- /dev/null +++ b/tasks/fortran/solution/pngwriter_mod.F90 @@ -0,0 +1,40 @@ +! PNG writer +module pngwriter + +contains + + function save_png(data, nx, ny, fname) result(stat) + + use, intrinsic :: ISO_C_BINDING + implicit none + + integer, dimension(:,:), intent(in) :: data + integer, intent(in) :: nx, ny + character(len=*), intent(in) :: fname + integer :: stat + + ! Interface for save_png C-function + interface + ! The C-function definition is + ! int save_png(double *data, const int nx, const int ny, + ! const char *fname) + function save_png_c(data, nx, ny, fname, order) & + & bind(C,name="save_png") result(stat) + use, intrinsic :: ISO_C_BINDING + implicit none + integer(kind=C_INT) :: data(*) + integer(kind=C_INT), value, intent(IN) :: nx, ny + character(kind=C_CHAR), intent(IN) :: fname(*) + character(kind=C_CHAR), value, intent(IN) :: order + integer(kind=C_INT) :: stat + end function save_png_c + end interface + + stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') + if (stat /= 0) then + write(*,*) 'save_png returned error!' + end if + + end function save_png + +end module pngwriter diff --git a/work-sharing/LICENSE.txt b/work-sharing/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..026e9d4a227009a7e61053b626b7d7c1bfa443cc --- /dev/null +++ b/work-sharing/LICENSE.txt @@ -0,0 +1,15 @@ + +Copyright (C) 2018 CSC - IT Center for Science Ltd. + +Licensed under the terms of the GNU General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Code is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Copy of the GNU General Public License can be obtained from +. + diff --git a/work-sharing/README.md b/work-sharing/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a64b48428b8ff68e0568b4f32e406b7f7d6a8bff --- /dev/null +++ b/work-sharing/README.md @@ -0,0 +1,6 @@ +## Work sharing for a simple loop ## + +In [sum.c](sum.c) (or [sum.F90](sum.F90) for Fortran) is a skeleton +implementation for a simple summation of two vectors, C=A+B. Add the +computation loop and add the parallel region with work sharing +directives so that the vector addition is executed in parallel. diff --git a/work-sharing/solution/sum.F90 b/work-sharing/solution/sum.F90 new file mode 100644 index 0000000000000000000000000000000000000000..6c61b35fad87787129da5486a24f3147e69e6755 --- /dev/null +++ b/work-sharing/solution/sum.F90 @@ -0,0 +1,26 @@ +program vectorsum + implicit none + integer, parameter :: rk = kind(1d0) + integer, parameter :: ik = selected_int_kind(9) + integer, parameter :: nx = 102400 + + real(kind=rk), dimension(nx) :: vecA, vecB, vecC + real(kind=rk) :: sum + integer(kind=ik) :: i + + ! Initialization of vectors + do i = 1, nx + vecA(i) = 1.0_rk/(real(nx - i + 1, kind=rk)) + vecB(i) = vecA(i)**2 + end do + + !$omp parallel do default(shared) private(i) + do i = 1, nx + vecC(i) = vecA(i) * vecB(i) + end do + !$omp end parallel do + + ! Compute the check value + write(*,*) 'Reduction sum: ', sum(vecC) + +end program vectorsum diff --git a/work-sharing/solution/sum.c b/work-sharing/solution/sum.c new file mode 100644 index 0000000000000000000000000000000000000000..957d767a57a23de733360be4a5ed15668407a4c2 --- /dev/null +++ b/work-sharing/solution/sum.c @@ -0,0 +1,30 @@ +#include + +#define NX 102400 + +int main(void) +{ + double vecA[NX], vecB[NX], vecC[NX]; + double sum; + int i; + + /* Initialization of the vectors */ + for (i = 0; i < NX; i++) { + vecA[i] = 1.0 / ((double)(NX - i)); + vecB[i] = vecA[i] * vecA[i]; + } + + #pragma omp parallel for default(shared) private(i) + for (i = 0; i < NX; i++) { + vecC[i] = vecA[i] * vecB[i]; + } + + sum = 0.0; + /* Compute the check value */ + for (i = 0; i < NX; i++) { + sum += vecC[i]; + } + printf("Reduction sum: %18.16f\n", sum); + + return 0; +} diff --git a/work-sharing/sum.F90 b/work-sharing/sum.F90 new file mode 100644 index 0000000000000000000000000000000000000000..f527169555980efb0d28fd10262b560b04c38aa4 --- /dev/null +++ b/work-sharing/sum.F90 @@ -0,0 +1,24 @@ +program vectorsum + implicit none + integer, parameter :: rk = kind(1d0) + integer, parameter :: ik = selected_int_kind(9) + integer, parameter :: nx = 102400 + + real(kind=rk), dimension(nx) :: vecA, vecB, vecC + real(kind=rk) :: sum + integer(kind=ik) :: i + + ! Initialization of vectors + do i = 1, nx + vecA(i) = 1.0_rk/(real(nx - i + 1, kind=rk)) + vecB(i) = vecA(i)**2 + end do + + ! TODO: + ! Implement here the parallelized version of vector addition, + ! vecC = vecA + vecB + + ! Compute the check value + write(*,*) 'Reduction sum: ', sum(vecC) + +end program vectorsum diff --git a/work-sharing/sum.c b/work-sharing/sum.c new file mode 100644 index 0000000000000000000000000000000000000000..05bea390bef8ea2bbd71b8518035584f07c83b59 --- /dev/null +++ b/work-sharing/sum.c @@ -0,0 +1,30 @@ +#include + +#define NX 102400 + +int main(void) +{ + double vecA[NX], vecB[NX], vecC[NX]; + double sum; + int i; + + /* Initialization of the vectors */ + for (i = 0; i < NX; i++) { + vecA[i] = 1.0 / ((double)(NX - i)); + vecB[i] = vecA[i] * vecA[i]; + } + + /* TODO: + * Implement here a parallelized version of vector addition, + * vecC = vecA + vecB + */ + + sum = 0.0; + /* Compute the check value */ + for (i = 0; i < NX; i++) { + sum += vecC[i]; + } + printf("Reduction sum: %18.16f\n", sum); + + return 0; +}