Commit 541cc903 authored by Mariusz Uchronski's avatar Mariusz Uchronski
Browse files

Added OpenMP (Fortran 90) and C++ threads code samples for gemm.

parent a8581ed3
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Mariusz Uchronski <mariusz.uchronski@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required(VERSION 2.8.7 FATAL_ERROR)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/common.cmake)
# ==================================================================================================
if ("${DWARF_PREFIX}" STREQUAL "")
set(DWARF_PREFIX dense_linear_algebra)
endif()
find_package(Common)
find_package(OpenMP)
enable_language (Fortran)
set(NAME ${DWARF_PREFIX}_gemm_openmp_fortran)
if (OPENMP_FOUND)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
add_executable(${NAME} src/gemm.f90)
install(TARGETS ${NAME} DESTINATION bin)
else()
message("## Skipping '${NAME}_omp': no OpenMP support found")
dummy_install(${NAME} "OpenMP")
endif()
unset(NAME)
=======
README
=======
# 1. Code sample name
gemm
# 2. Description of the code sample package
This example demonstrates the use of Fortran 90 with OpenMP for matrix-matrix multiplication
# 3. Release date
15 August 2016
# 4. Version history
1.0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Mariusz Uchronski <mariusz.uchronski@pwr.edu.pl>
# 7. Language(s)
Fortran 90
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
Randomly generated on sample code execution
# 13. Sample output(s)
Not available
! =================================================================================================
! This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
! CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
!
! Author(s):
! Mariusz Uchronski <mariusz.uchronski@pwr.edu.pl>
!
! This example demonstrates the use of Fortran 90 with OpenMP for matrix-matrix multiplication.
! The example is set-up to perform single precision matrix-matrix multiplication.
!
! See [http://openmp.org] for the full OpenMP documentation.
!
! =================================================================================================
program gemm_openmp
implicit none
integer,parameter :: seed = 86456
integer, parameter :: n = 400
integer, parameter :: m = 600
integer, parameter :: k = 800
real, allocatable :: a(:)
real, allocatable :: b(:)
real, allocatable :: c(:)
allocate(a(n*m))
allocate(b(m*k))
allocate(c(n*k))
call fill_random(a, n, m)
call fill_random(b, m, k)
call run_gemm_openmp(a, b, c, n, m, k)
deallocate(a)
deallocate(b)
deallocate(c)
end program gemm_openmp
subroutine fill_random(a, n, m)
implicit none
integer, intent(in) :: n, m
real, dimension(n*m), intent(inout) :: a
integer :: i, j
do i=1, n
do j=1, m
a((i-1)*m+j) = rand()
end do
end do
end subroutine fill_random
subroutine print_array(a, n, m)
implicit none
integer, intent(in) :: n, m
real, dimension(n*m), intent(in) :: a
integer :: i, j
do i=1, n
do j=1, m
print '(F8.4,$)', a((i-1)*m+j)
end do
print *, ''
end do
end subroutine print_array
subroutine run_gemm_openmp(a, b, c, n, m, k)
integer, intent(in) :: n, m , k
real, dimension(n*m), intent(in) :: a
real, dimension(m*k), intent(in) :: b
real, dimension(n*k), intent(inout) :: c
integer :: i, j, l
real :: summ
!$OMP PARALLEL DO DEFAULT (NONE) &
!$OMP SHARED(n,m,k,a,b,c) PRIVATE(i,j,l,summ)
do i=1, n
do j=1, k
summ = 0
do l=1, m
summ = summ + a((i-1)*m+l) * b((l-1)*k+j)
end do
c((i-1)*k+j) = summ
end do
end do
!$OMP END PARALLEL DO
end subroutine
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Mariusz Uchronski <mariusz.uchronski@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required(VERSION 2.8.7 FATAL_ERROR)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/common.cmake)
# ==================================================================================================
if ("${DWARF_PREFIX}" STREQUAL "")
set(DWARF_PREFIX gemm_threads)
endif()
find_package(Common)
enable_language (C)
set(NAME ${DWARF_PREFIX}_gemm_threads)
set(CXX_FLAGS "${CXX_FLAGS} -Wall -Wno-comment -std=c++0x -g -pthread")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS}")
add_executable(${NAME} src/gemm.cpp)
install(TARGETS ${NAME} DESTINATION bin)
unset(NAME)
=======
README
=======
# 1. Code sample name
gemm
# 2. Description of the code sample package
This example demonstrates the use of C++ threads for matrix-matrix multiplication.
# 3. Release date
15 August 2016
# 4. Version history
1.0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Mariusz Uchronski <mariusz.uchronski@pwr.edu.pl>
# 7. Language(s)
C++ 11
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
Randomly generated on sample code execution
# 13. Sample output(s)
Not available
// =================================================================================================
// This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
// CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
//
// Author(s):
// Mariusz Uchronski <mariusz.uchronski@pwr.edu.pl>
//
// This example demonstrates the use of C++ threads for matrix-matrix multiplication.
// The example is set-up to perform single precision matrix-matrix multiplication.
//
// See [http://www.cplusplus.com/reference/thread/thread/] for the full C++ threads documentation.
//
// =================================================================================================
#include <thread>
#include <random>
#include <iostream>
#include <vector>
#include <algorithm>
void fill_random(float *a, const int &n, const int &m)
{
std::mt19937 e(static_cast<unsigned int>(time(nullptr)));
std::uniform_real_distribution<float> f;
for(int i=0; i<n; ++i)
{
for(int j=0; j<m; ++j)
{
a[i*m+j] = f(e);
}
}
}
void gemm_threads(const int &num_threads, const int &id,
const int &n, const int &m, const int &k,
float *a, float *b, float *c)
{
const int part = n/num_threads;
const int begin = id*part;
const int end = (id+1)*part - 1;
for(int i=begin; i<=end; ++i)
{
for(int j=0; j<k; ++j)
{
float sum = 0.0;
for(int l=0; l<m; ++l)
{
sum = sum + a[i*m+l] * b[l*k+j];
}
c[i*k+j] = sum;
}
}
}
void print_data(float *a, const int &n, const int &m)
{
for(int i=0; i<n; ++i)
{
for(int j=0; j<m; ++j)
{
std::cout << a[i*m+j] << " ";
}
std::cout << std::endl;
}
}
int main(int argc, char *argv[])
{
const int n = 200;
const int m = 300;
const int k = 400;
const int num_threads = 4;
std::vector<std::thread> threads;
float *a = new float[n*m];
float *b = new float[m*k];
float *c = new float[n*k];
fill_random(a, n, m);
fill_random(b, m, k);
for(int i=0; i<num_threads; ++i)
threads.push_back(std::thread(gemm_threads, num_threads, i, n, m, k, a, b, c));
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
delete[] a;
delete[] b;
delete[] c;
return 0;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment