Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
CodeVault
hpc-kernels
dense_linear_algebra
Commits
7e79c47f
Commit
7e79c47f
authored
Aug 22, 2016
by
Rafal Gandecki
Browse files
Added OpenMP code samples for gemm and LU decomposition(Doolittle algorithm)
parent
a8581ed3
Changes
6
Hide whitespace changes
Inline
Side-by-side
gemm/gemm_openmp/CMakeLists.txt
0 → 100644
View file @
7e79c47f
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Rafal Gandecki <rafal.gandeci@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required
(
VERSION 2.8.7 FATAL_ERROR
)
include
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../../cmake/common.cmake
)
# ==================================================================================================
if
(
"
${
DWARF_PREFIX
}
"
STREQUAL
""
)
set
(
DWARF_PREFIX gemm_openmp
)
endif
()
find_package
(
Common
)
find_package
(
OpenMP
)
enable_language
(
C
)
set
(
NAME
${
DWARF_PREFIX
}
)
if
(
OPENMP_FOUND
)
set
(
CXX_FLAGS
"
${
CXX_FLAGS
}
-Wall -Wno-comment -std=c++0x -fopenmp"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
CXX_FLAGS
}
"
)
add_executable
(
${
NAME
}
src/gemm_openmp.cpp
)
install
(
TARGETS
${
NAME
}
DESTINATION bin
)
else
()
message
(
"## Skipping '
${
NAME
}
': no OpenMP support found"
)
endif
()
unset
(
NAME
)
gemm/gemm_openmp/README.md
0 → 100644
View file @
7e79c47f
README
=======
# 1. Code sample name
gemm_openmp
# 2. Description of the code sample package
This example demonstrates the use of OpenMP for matrix-matrix multiplication.
# 3. Release date
19 August 2016
# 4. Version history
1.
0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Rafal Gandecki
<rafal.gandecki@pwr.edu.pl>
# 7. Language(s)
C++ 11
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
3 arguments:
-
matrix A rows
-
matrix A cols
-
matrix B cols
# 13. Sample output(s)
execution time of algorithm wih and wihout OpenMP
gemm/gemm_openmp/src/gemm_openmp.cpp
0 → 100644
View file @
7e79c47f
// =================================================================================================
// This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
// CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
//
// Author(s):
// Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
//
// This example demonstrates the use of OpenMP for matrix-matrix multiplication and
// compares execution time of algorithms.
// The example is set-up to perform single precision matrix-matrix multiplication.
// The example takes a triple input arguments (matrix A rows, matrix A cols, matric B cols),
// specifying the size of the matrices.
// See [http://www.openmp.org/] for the full OpenMP documentation.
//
// =================================================================================================
#include
<omp.h>
#include
<random>
#include
<iostream>
void
fill_random
(
float
*
A
,
const
int
&
n
,
const
int
&
m
)
{
std
::
mt19937
e
(
static_cast
<
unsigned
int
>
(
time
(
nullptr
)));
std
::
uniform_real_distribution
<
float
>
f
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
j
=
0
;
j
<
m
;
++
j
)
{
A
[
i
*
m
+
j
]
=
f
(
e
);
}
}
}
void
gemm
(
float
*
A
,
float
*
B
,
float
*
C
,
const
int
&
A_rows
,
const
int
&
A_cols
,
const
int
&
B_rows
)
{
for
(
int
i
=
0
;
i
<
A_rows
;
i
++
)
{
for
(
int
j
=
0
;
j
<
B_rows
;
j
++
)
{
float
sum
=
0.0
;
for
(
int
k
=
0
;
k
<
A_cols
;
k
++
)
{
sum
+=
A
[
i
*
A_cols
+
k
]
*
B
[
k
*
B_rows
+
j
];
}
C
[
i
*
B_rows
+
j
]
=
sum
;
}
}
}
void
gemm_OpenMP
(
float
*
A
,
float
*
B
,
float
*
C
,
const
int
&
A_rows
,
const
int
&
A_cols
,
const
int
&
B_rows
)
{
int
i
,
j
,
k
;
#pragma omp parallel for shared(A, B, C, A_rows, A_cols, B_rows) private(i, j, k)
for
(
i
=
0
;
i
<
A_rows
;
i
++
)
{
for
(
j
=
0
;
j
<
B_rows
;
j
++
)
{
float
sum
=
0.0
;
for
(
k
=
0
;
k
<
A_cols
;
k
++
)
{
sum
+=
A
[
i
*
A_cols
+
k
]
*
B
[
k
*
B_rows
+
j
];
}
C
[
i
*
B_rows
+
j
]
=
sum
;
}
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
A_rows
,
A_cols
,
B_rows
,
B_cols
;
if
(
argc
!=
4
)
{
std
::
cout
<<
"Usage: 3 arguments: matrix A rows, matrix A cols and matrix B cols"
<<
std
::
endl
;
return
1
;
}
else
{
A_rows
=
atoi
(
argv
[
1
]);
A_cols
=
atoi
(
argv
[
2
]);
B_rows
=
atoi
(
argv
[
2
]);
B_cols
=
atoi
(
argv
[
3
]);
}
double
dtime
;
float
*
A
=
new
float
[
A_rows
*
A_cols
];
float
*
B
=
new
float
[
B_rows
*
B_cols
];
float
*
C
=
new
float
[
A_rows
*
B_cols
];
fill_random
(
A
,
A_rows
,
A_cols
);
fill_random
(
B
,
B_rows
,
B_cols
);
dtime
=
omp_get_wtime
();
gemm_OpenMP
(
A
,
B
,
C
,
A_rows
,
A_cols
,
B_cols
);
dtime
=
omp_get_wtime
()
-
dtime
;
std
::
cout
<<
"Time with OpenMp: "
<<
dtime
<<
std
::
endl
;
dtime
=
omp_get_wtime
();
gemm
(
A
,
B
,
C
,
A_rows
,
A_cols
,
B_cols
);
dtime
=
omp_get_wtime
()
-
dtime
;
std
::
cout
<<
"Time without OpenMP: "
<<
dtime
<<
std
::
endl
;
delete
[]
A
;
delete
[]
B
;
delete
[]
C
;
return
0
;
}
lud/lud_openmp/CMakeLists.txt
0 → 100644
View file @
7e79c47f
# ==================================================================================================
# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
#
# Author(s):
# Rafal Gandecki <rafal.gandeci@pwr.edu.nl>
#
# ==================================================================================================
cmake_minimum_required
(
VERSION 2.8.7 FATAL_ERROR
)
include
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../../cmake/common.cmake
)
# ==================================================================================================
if
(
"
${
DWARF_PREFIX
}
"
STREQUAL
""
)
set
(
DWARF_PREFIX lud_openmp
)
endif
()
find_package
(
Common
)
find_package
(
OpenMP
)
enable_language
(
C
)
set
(
NAME
${
DWARF_PREFIX
}
)
if
(
OPENMP_FOUND
)
set
(
CXX_FLAGS
"
${
CXX_FLAGS
}
-Wall -Wno-comment -std=c++0x -fopenmp"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
CXX_FLAGS
}
"
)
add_executable
(
${
NAME
}
src/lud_openmp.cpp
)
install
(
TARGETS
${
NAME
}
DESTINATION bin
)
else
()
message
(
"## Skipping '
${
NAME
}
': no OpenMP support found"
)
endif
()
unset
(
NAME
)
lud/lud_openmp/README.md
0 → 100644
View file @
7e79c47f
README
=======
# 1. Code sample name
lud_openmp
# 2. Description of the code sample package
This example demonstrates the use of OpenMP for LU decomposition (Doolittle algorithm).
# 3. Release date
19 August 2016
# 4. Version history
1.
0
# 6. Copyright / License of the code sample
Apache Version 2.0
# 5. Contributor (s) / Maintainer(s)
Rafal Gandecki
<rafal.gandecki@pwr.edu.pl>
# 7. Language(s)
C++ 11
# 8. Parallelisation Implementation(s)
OpenMP
# 9. Level of the code sample complexity
basic
# 10. Instructions on how to compile the code
Uses the CodeVault CMake infrastructure, see main README.md
# 11. Instructions on how to run the code
Just run compiled executable
# 12. Sample input(s)
1 argument:
-
matrix size
# 13. Sample output(s)
execution time of algorthims with and without OpenMP
~
~
~
~
~
~
lud/lud_openmp/src/lud_openmp.cpp
0 → 100644
View file @
7e79c47f
// =================================================================================================
// This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.
// CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).
//
// Author(s):
// Rafal Gandecki <rafal.gandecki@pwr.edu.pl>
//
// This example demonstrates the use of OpenMP for LU decomposition (Doolittle algorithm) and
// compares execution time.
// The example takes a single input argument, specifying the size of the matrices.
//
// See [http://www.openmp.org/] for the full OpenMP documentation.
//
// =================================================================================================
#include
<omp.h>
#include
<random>
#include
<iostream>
void
fill_random
(
float
*
A
,
const
int
&
n
,
const
int
&
m
)
{
std
::
mt19937
e
(
static_cast
<
unsigned
int
>
(
time
(
nullptr
)));
std
::
uniform_real_distribution
<
float
>
f
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
j
=
0
;
j
<
m
;
++
j
)
{
A
[
i
*
m
+
j
]
=
f
(
e
);
}
}
}
void
lud
(
float
*
A
,
float
*
L
,
float
*
U
,
const
int
&
n
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
if
(
j
>
i
)
U
[
j
*
n
+
i
]
=
0
;
U
[
i
*
n
+
j
]
=
A
[
i
*
n
+
j
];
for
(
int
k
=
0
;
k
<
i
;
k
++
)
{
U
[
i
*
n
+
j
]
-=
U
[
k
*
n
+
j
]
*
L
[
i
*
n
+
k
];
}
}
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
if
(
i
>
j
)
L
[
j
*
n
+
i
]
=
0
;
else
if
(
j
==
i
)
L
[
j
*
n
+
i
]
=
1
;
else
{
L
[
j
*
n
+
i
]
=
A
[
j
*
n
+
i
]
/
U
[
i
*
n
+
i
];
for
(
int
k
=
0
;
k
<
i
;
k
++
)
{
L
[
j
*
n
+
i
]
-=
((
U
[
k
*
n
+
i
]
*
L
[
j
*
n
+
k
])
/
U
[
i
*
n
+
i
]);
}
}
}
}
}
void
lud_OpenMP
(
float
*
A
,
float
*
L
,
float
*
U
,
const
int
&
n
)
{
int
i
,
j
,
k
;
#pragma omp parallel for shared(A, L, U, n) private(i, j, k)
for
(
i
=
0
;
i
<
n
;
i
++
)
{
for
(
j
=
0
;
j
<
n
;
j
++
)
{
if
(
j
>
i
)
U
[
j
*
n
+
i
]
=
0
;
U
[
i
*
n
+
j
]
=
A
[
i
*
n
+
j
];
for
(
k
=
0
;
k
<
i
;
k
++
)
{
U
[
i
*
n
+
j
]
-=
U
[
k
*
n
+
j
]
*
L
[
i
*
n
+
k
];
}
}
for
(
j
=
0
;
j
<
n
;
j
++
)
{
if
(
i
>
j
)
L
[
j
*
n
+
i
]
=
0
;
else
if
(
j
==
i
)
L
[
j
*
n
+
i
]
=
1
;
else
{
L
[
j
*
n
+
i
]
=
A
[
j
*
n
+
i
]
/
U
[
i
*
n
+
i
];
for
(
k
=
0
;
k
<
i
;
k
++
)
{
L
[
j
*
n
+
i
]
-=
((
U
[
k
*
n
+
i
]
*
L
[
j
*
n
+
k
])
/
U
[
i
*
n
+
i
]);
}
}
}
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
n
;
float
*
A
,
*
L
,
*
U
;
if
(
argc
!=
2
)
{
std
::
cout
<<
"Usage: 1 argument: matrix size"
<<
std
::
endl
;
return
1
;
}
else
{
n
=
atoi
(
argv
[
1
]);
}
A
=
new
float
[
n
*
n
];
L
=
new
float
[
n
*
n
];
U
=
new
float
[
n
*
n
];
fill_random
(
A
,
n
,
n
);
double
dtime
;
dtime
=
omp_get_wtime
();
lud
(
A
,
L
,
U
,
n
);
dtime
=
omp_get_wtime
()
-
dtime
;
std
::
cout
<<
"Time without OpenMP: "
<<
dtime
<<
std
::
endl
;
dtime
=
omp_get_wtime
();
lud_OpenMP
(
A
,
L
,
U
,
n
);
dtime
=
omp_get_wtime
()
-
dtime
;
std
::
cout
<<
"Time with OpenMP: "
<<
dtime
<<
std
::
endl
;
delete
[]
A
;
delete
[]
L
;
delete
[]
U
;
return
0
;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment