Commit 7e79c47f authored by Rafal Gandecki's avatar Rafal Gandecki
Browse files

Added OpenMP code samples for gemm and LU decomposition(Doolittle algorithm)

parent a8581ed3
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Rafal Gandecki <rafal.gandeci@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required(VERSION 2.8.7 FATAL_ERROR)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/common.cmake)
# ==================================================================================================
if ("${DWARF_PREFIX}" STREQUAL "")
set(DWARF_PREFIX gemm_openmp)
endif()
find_package(Common)
find_package(OpenMP)
enable_language (C)
set(NAME ${DWARF_PREFIX})
if (OPENMP_FOUND)
set(CXX_FLAGS "${CXX_FLAGS} -Wall -Wno-comment -std=c++0x -fopenmp")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS}")
add_executable(${NAME} src/gemm_openmp.cpp)
install(TARGETS ${NAME} DESTINATION bin)
else ()
message("## Skipping '${NAME}': no OpenMP support found")
endif()
unset(NAME)
README
=======
# 1. Code sample name
gemm_openmp
# 2. Description of the code sample package
This example demonstrates the use of OpenMP for matrix-matrix multiplication.
# 3. Release date
19 August 2016
# 4. Version history
1.0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
# 7. Language(s)
C++ 11
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
3 arguments:
- matrix A rows
- matrix A cols
- matrix B cols
# 13. Sample output(s)
execution time of algorithm wih and wihout OpenMP
// =================================================================================================
// This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
// CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
//
// Author(s):
// Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
//
// This example demonstrates the use of OpenMP for matrix-matrix multiplication and
// compares execution time of algorithms.
// The example is set-up to perform single precision matrix-matrix multiplication.
// The example takes a triple input arguments (matrix A rows, matrix A cols, matric B cols),
// specifying the size of the matrices.
// See [http://www.openmp.org/] for the full OpenMP documentation.
//
// =================================================================================================
#include <omp.h>
#include <random>
#include <iostream>
void fill_random(float *A, const int &n, const int &m)
{
std::mt19937 e(static_cast<unsigned int>(time(nullptr)));
std::uniform_real_distribution<float> f;
for(int i=0; i<n; ++i)
{
for(int j=0; j<m; ++j)
{
A[i*m+j] = f(e);
}
}
}
void gemm(float *A, float *B, float *C,
const int &A_rows, const int &A_cols, const int &B_rows)
{
for(int i=0; i<A_rows; i++)
{
for(int j=0; j<B_rows; j++) {
float sum = 0.0;
for (int k=0; k<A_cols; k++) {
sum += A[i*A_cols+k] * B[k*B_rows+j];
}
C[i*B_rows+j ] = sum;
}
}
}
void gemm_OpenMP(float *A, float *B, float *C,
const int &A_rows, const int &A_cols, const int &B_rows)
{
int i, j, k;
#pragma omp parallel for shared(A, B, C, A_rows, A_cols, B_rows) private(i, j, k)
for (i = 0; i < A_rows; i++)
{
for (j = 0; j < B_rows; j++)
{
float sum = 0.0;
for (k=0; k<A_cols; k++)
{
sum += A[i*A_cols+k] * B[k*B_rows+j];
}
C[i*B_rows+j] = sum;
}
}
}
int main(int argc, char **argv)
{
int A_rows, A_cols, B_rows, B_cols;
if (argc != 4)
{
std::cout << "Usage: 3 arguments: matrix A rows, matrix A cols and matrix B cols"<< std::endl;
return 1;
}
else
{
A_rows = atoi(argv[1]);
A_cols = atoi(argv[2]);
B_rows = atoi(argv[2]);
B_cols = atoi(argv[3]);
}
double dtime;
float *A = new float[A_rows*A_cols];
float *B = new float[B_rows*B_cols];
float *C = new float[A_rows*B_cols];
fill_random(A, A_rows, A_cols);
fill_random(B, B_rows, B_cols);
dtime = omp_get_wtime();
gemm_OpenMP(A, B, C, A_rows, A_cols, B_cols);
dtime = omp_get_wtime() - dtime;
std::cout << "Time with OpenMp: " << dtime << std::endl;
dtime = omp_get_wtime();
gemm(A,B,C, A_rows, A_cols, B_cols);
dtime = omp_get_wtime() - dtime;
std::cout << "Time without OpenMP: " << dtime << std::endl;
delete[] A;
delete[] B;
delete[] C;
return 0;
}
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Rafal Gandecki <rafal.gandeci@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required(VERSION 2.8.7 FATAL_ERROR)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/common.cmake)
# ==================================================================================================
if ("${DWARF_PREFIX}" STREQUAL "")
set(DWARF_PREFIX lud_openmp)
endif()
find_package(Common)
find_package(OpenMP)
enable_language (C)
set(NAME ${DWARF_PREFIX})
if (OPENMP_FOUND)
set(CXX_FLAGS "${CXX_FLAGS} -Wall -Wno-comment -std=c++0x -fopenmp")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS}")
add_executable(${NAME} src/lud_openmp.cpp)
install(TARGETS ${NAME} DESTINATION bin)
else()
message("## Skipping '${NAME}': no OpenMP support found")
endif()
unset(NAME)
README
=======
# 1. Code sample name
lud_openmp
# 2. Description of the code sample package
This example demonstrates the use of OpenMP for LU decomposition (Doolittle algorithm).
# 3. Release date
19 August 2016
# 4. Version history
1.0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
# 7. Language(s)
C++ 11
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
1 argument:
- matrix size
# 13. Sample output(s)
execution time of algorthims with and without OpenMP
~
~
~
~
~
~
// =================================================================================================
// This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
// CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
//
// Author(s):
// Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
//
// This example demonstrates the use of OpenMP for LU decomposition (Doolittle algorithm) and
// compares execution time.
// The example takes a single input argument, specifying the size of the matrices.
//
// See [http://www.openmp.org/] for the full OpenMP documentation.
//
// =================================================================================================
#include <omp.h>
#include <random>
#include <iostream>
void fill_random(float *A, const int &n, const int &m)
{
std::mt19937 e(static_cast<unsigned int>(time(nullptr)));
std::uniform_real_distribution<float> f;
for(int i=0; i<n; ++i)
{
for(int j=0; j<m; ++j)
{
A[i*m+j] = f(e);
}
}
}
void lud(float *A, float *L, float *U, const int &n)
{
for(int i=0; i<n; i++)
{
for(int j=0; j<n; j++)
{
if(j>i)
U[j*n+i] = 0;
U[i*n+j] = A[i*n+j];
for(int k=0; k<i; k++)
{
U[i*n+j] -= U[k*n+j] * L[i*n+k];
}
}
for(int j=0; j<n; j++)
{
if(i>j)
L[j*n+i] = 0;
else if (j==i)
L[j*n+i] = 1;
else
{
L[j*n+i] = A[j*n+i] / U[i*n+i];
for(int k=0; k<i; k++)
{
L[j*n+i] -= ((U[k*n+i] * L[j*n+k]) / U[i*n+i]);
}
}
}
}
}
void lud_OpenMP(float *A, float *L, float *U, const int &n)
{
int i, j, k;
#pragma omp parallel for shared(A, L, U, n) private(i, j, k)
for (i=0; i<n; i++)
{
for(j=0; j<n; j++)
{
if(j>i)
U[j*n+i] = 0;
U[i*n+j] = A[i*n+j];
for(k=0; k<i; k++)
{
U[i*n+j] -= U[k*n+j] * L[i*n+k];
}
}
for(j=0; j<n; j++)
{
if(i>j)
L[j*n+i] = 0;
else if (j==i)
L[j*n+i] = 1;
else
{
L[j*n+i] = A[j*n+i] / U[i*n+i];
for(k=0; k<i; k++)
{
L[j*n+i] -= ((U[k*n+i] * L[j*n+k]) / U[i*n+i]);
}
}
}
}
}
int main(int argc, char **argv)
{
int n;
float *A, *L, *U;
if (argc != 2)
{
std::cout << "Usage: 1 argument: matrix size" << std::endl;
return 1;
}
else
{
n = atoi(argv[1]);
}
A = new float[n*n];
L = new float[n*n];
U = new float[n*n];
fill_random(A, n, n);
double dtime;
dtime = omp_get_wtime();
lud(A, L, U, n);
dtime = omp_get_wtime() - dtime;
std::cout << "Time without OpenMP: " << dtime << std::endl;
dtime = omp_get_wtime();
lud_OpenMP(A, L, U, n);
dtime = omp_get_wtime() - dtime;
std::cout << "Time with OpenMP: " << dtime << std::endl;
delete[] A;
delete[] L;
delete[] U;
return 0;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment