Commit 6371d555 authored by Jacob Finkenrath's avatar Jacob Finkenrath
Browse files

5IP Update - update Readme and add scripts

parent 55e1c11c
......@@ -188,3 +188,20 @@ Chip Time (s)
Intel KNL Xeon Phi 9.72E+01
NVIDIA P100 GPU 5.60E+01
**********************************************************************
Prace 5IP - Results (see White Paper for more):
Irene KNL Irene SKL Juwels Marconi-KNL MareNostrum PizDaint Davide Frioul Deep Mont-Blanc 3
1 148,68 219,68 182,49 133,38 186,40 53,73 53.4c 151 656,41 206,17
2 79,35 114,22 91,83 186,14 94,63 32,38 113 86.9 432,93 93,48
4 48,07 58,11 46,58 287,17 47,22 19,13 21.4 52.7 277,67 49,95
8 28,42 32,09 25,37 533,49 25,86 12,78 14.8 36.5 189,83 25,19
16 17,08 14,35 11,77 1365,72 11,64 9,20 10.1 27.8 119,14 12,55
32 10,56 7,28 5,43 2441,29 5,59 6,35 6.94 15.6
64 9,01 4,18 2,65 -- 2,65 6,41 -- 11.7
128 5,08 -- 1,39 -- 2,48 5,95
256 -- -- 1,38 -- -- 5,84
512 -- -- 0,89 --
CFLAGS = $(DEFINES) -DARCH=0 -DVVL=8 -DAoS -O3 -xMIC-AVX512 -std=c99 -fma -align -finline-functions
LDFLAGS = -lm -qopenmp
CC=mpicc
TARGETCC=mpicc
TARGETCFLAGS=-x c -qopenmp $(CFLAGS)
#CPU (CRAY XC30) configuration file
CFLAGS = $(DEFINES) -std=c99 -O3 -axCORE-AVX512 -mtune=skylake-avx512 -qopenmp -DARCH=0
LDFLAGS = -lm -qopenmp
CC=mpicc
TARGETCC=mpicc
TARGETCFLAGS=-x c -qopenmp $(CFLAGS) -DVVL=4 -DAoSoA
#CPU (CRAY XC30) configuration file
CFLAGS = $(DEFINES) -std=gnu89 -O2 -DARCH=0 -Ofast -march=armv8-a -mcpu=cortex-a72 -fomit-frame-pointer
LDFLAGS = -lm -fopenmp
CC=mpicc
TARGETCC=mpicc
TARGETCFLAGS=-x c -fopenmp $(CFLAGS) -DVVL=4 -DAoSoA
#lattice
nx #NX#
ny #NY#
nz #NZ#
nt #NT#
totnodes #PX# #PY# #PZ# #PT#
#wilson
mass_wilson 0.01
#max iterations
max_cg_iters 1000
#etc
verbose 1
##prepare Kernel E
##
##
##
##
nx=$1
ny=$2
nz=$3
nt=$4
px=$5
py=$6
pz=$7
pt=$8
folder=$9
echo creating input file in $folder
sed 's/#NX#/'${nx}'/g' kernel_E.input_template > test
mv test kernel_E.input_tmp
sed 's/#NY#/'${ny}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#NZ#/'${nz}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#NT#/'${nt}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PX#/'${px}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PY#/'${py}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PZ#/'${pz}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PT#/'${pt}'/g' kernel_E.input_tmp > test
mv test $folder/kernel_E.input
##
##
##
##
##
time=$1
nodes=$2
n=$3
g=$4
omp=$5
perm=$6
src=$7
folder=$8
echo Creating submit-script in $folder
cp submit_job_part1.sh.template ${folder}/.
cd $folder
sed 's/#NODES#/'${nodes}'/g' submit_job_part1.sh.template > test
mv test submit_job_part1.temp
sed 's/#NTASK#/'${n}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#TASKPERNODE#/'${g}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#OMPTHREADS#/'${omp}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#TIME#/'${time}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
wrc=$(pwd)
echo $wrc
sed 's #WRC# '${wrc}' g' submit_job_part1.temp > test
mv test ${src}
if [ $perm -eq 1 ];then
chmod +x $src
fi
rm submit_job_part1.temp
rm submit_job_part1.sh.template
cd ..
##
## RUN - Part 1
##
## Before starting this job-script replace "SUBMIT" with the submition-command of the local queing system.
## Additional in the script submit_job the execution command has to be adjusted to the local machine.
##
##
## Script for Part 1 of the UEABS Benchmarksuite
##
#!/bin/bash
EXE=/gpfs/projects/pr1ehq00/bench/part1/bench
## Set scaling-mode: Strong or Weak
sca_mode="Strong"
#sca_mode="Weak"
mode="Analysis"
#mode="Run"
## sbatch_on=1
exe_perm=1 ## use chmod to allow execution of submit_job_Nx_Gx.sh
## lattice size (size strong 1)
gx=64
gt=8
g=8 ##MPItaskperNODE
openmp=6 ##OMP
## lattice size (size strong 2) - there is no other testcase yet
#gx=64
#gt=128
## lattice size (size weak 1)
#gx=48
#gt=24
## use smaller lattice size of weak scaling mode: like gx=24 gt=24
##
lt=$gt
lx=$gx
ly=$gx
lz=$gx
#for n in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384; do
for n in 8 16 32 64 128 256 512 1024 2048 4096 8192 16384; do
#for n in 8;do
px=$n
py=1
pz=1
pt=1
if [ $n -eq 16 ];then
py=2
px=8
fi
if [ $n -eq 32 ];then
py=4
px=8
fi
if [ $n -eq 64 ];then
pt=2
py=4
px=8
fi
if [ $n -eq 128 ];then
pz=2
py=8
px=8
fi
if [ $n -eq 256 ];then
pz=4
py=8
px=8
fi
if [ $n -eq 512 ];then
pz=8
py=8
px=8
fi
if [ $n -eq 1024 ];then
pz=8
py=8
px=8
pt=2
fi
if [ $n -eq 2048 ];then
pz=8
py=8
px=16
pt=2
fi
if [ $n -eq 4096 ];then
pz=8
py=16
px=16
pt=2
fi
if [ $n -eq 8192 ];then
pz=16
py=16
px=16
pt=2
fi
if [ $n -eq 16384 ];then
pz=16
py=16
px=16
pt=2
fi
if [ $sca_mode = "Strong" ];then
lt1=$((gt/pt))
lx1=$((gx/px))
ly1=$((gx/py))
lz1=$((gx/pz))
else
lt1=$lt
lx1=$lx
ly1=$ly
lz1=$lz
lt=$((gt*pt))
lx=$((gx*px))
ly=$((gx*py))
lz=$((gx*pz))
fi
node=$((n/g))
name=${sca_mode}_part1_${px}x${py}x${pz}x${pt}_${lx}x${ly}x${lz}x${lt}_${n}
folder=N${node}_NtaskpN${g}_${lx}x${ly}x${lz}x${lt}
if [ $mode != "Analysis" ];then
echo $name
mkdir $folder
submitscript=submit_job_part1_N${n}.sh
./prepare_submit_job_part1.sh '01:30:00' ${node} ${n} ${g} ${openmp} ${exe_perm} ${submitscript} ${folder}
./prepare_kernelE_input.sh ${lx} ${ly} ${lz} ${lt} ${px} ${py} ${pz} ${pt} $folder
cd $folder
echo sbatch $submitscript $EXE $name
sbatch ./$submitscript $EXE $name
sleep 1
cd ..
## Scaning the output and save the data in dat_nameif
else
echo $name >> Part1_$mode.log
less $folder/$name | grep "sec" >> Part1_$mode.log
fi
done
##
## RUN - Part 1
##
## Before starting this job-script replace "SUBMIT" with the submition-command of the local queing system.
## Additional in the script submit_job the execution command has to be adjusted to the local machine.
##
##
## Script for Part 1 of the UEABS Benchmarksuite
##
#!/bin/bash
EXE=/gpfs/projects/pr1ehq00/bench/part1/bench
## Set scaling-mode: Strong or Weak
sca_mode="Strong"
#sca_mode="Weak"
## mode="Analysis"
mode="Run"
## sbatch_on=1
exe_perm=1 ## use chmod to allow execution of submit_job_Nx_Gx.sh
## lattice size (size strong 1)
gx=64
gt=8
g=8 ##MPItaskperNODE
openmp=6 ##OMP
## lattice size (size strong 2) - there is no other testcase yet
#gx=64
#gt=128
## lattice size (size weak 1)
#gx=48
#gt=24
## use smaller lattice size of weak scaling mode: like gx=24 gt=24
##
lt=$gt
lx=$gx
ly=$gx
lz=$gx
#for n in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384; do
for n in 8 16 32 64 128 256 512 1024 2048 4096 8192 16384; do
#for n in 8;do
px=$n
py=1
pz=1
pt=1
if [ $n -eq 16 ];then
py=2
px=8
fi
if [ $n -eq 32 ];then
py=4
px=8
fi
if [ $n -eq 64 ];then
pt=2
py=4
px=8
fi
if [ $n -eq 128 ];then
pz=2
py=8
px=8
fi
if [ $n -eq 256 ];then
pz=4
py=8
px=8
fi
if [ $n -eq 512 ];then
pz=8
py=8
px=8
fi
if [ $n -eq 1024 ];then
pz=8
py=8
px=8
pt=2
fi
if [ $n -eq 2048 ];then
pz=8
py=8
px=16
pt=2
fi
if [ $n -eq 4096 ];then
pz=8
py=16
px=16
pt=2
fi
if [ $n -eq 8192 ];then
pz=16
py=16
px=16
pt=2
fi
if [ $n -eq 16384 ];then
pz=16
py=16
px=16
pt=2
fi
if [ $sca_mode = "Strong" ];then
lt1=$((gt/pt))
lx1=$((gx/px))
ly1=$((gx/py))
lz1=$((gx/pz))
else
lt1=$lt
lx1=$lx
ly1=$ly
lz1=$lz
lt=$((gt*pt))
lx=$((gx*px))
ly=$((gx*py))
lz=$((gx*pz))
fi
node=$((n/g))
name=${sca_mode}_part1_${px}x${py}x${pz}x${pt}_${lx}x${ly}x${lz}x${lt}_${n}
folder=N${node}_NtaskpN${g}_${lx}x${ly}x${lz}x${lt}
if [ $mode != "Analysis" ];then
echo $name
mkdir $folder
submitscript=submit_job_part1_N${n}.sh
./prepare_submit_job_part1.sh '01:30:00' ${node} ${n} ${g} ${openmp} ${exe_perm} ${submitscript} ${folder}
./prepare_kernelE_input.sh ${lx} ${ly} ${lz} ${lt} ${px} ${py} ${pz} ${pt} $folder
cd $folder
echo sbatch $submitscript $EXE $name
sbatch ./$submitscript $EXE $name
sleep 1
cd ..
## Scaning the output and save the data in dat_nameif
else
echo $name >> Part1_$mode.log
less $folder/$name | grep "sec" >> Part1_$mode.log
fi
done
#!/bin/bash
#SBATCH --job-name=TheM1
#SBATCH --workdir=#WRC#
#SBATCH --output=mpi_%j_#NODES#.out
#SBATCH --error=mpi_%j_#NODES#.err
#SBATCH --ntasks=#NTASK#
#SBATCH --time=00:10:00
#SBATCH --constraint=highmem
#SBATCH --nodes=#NODES#
#SBATCH --ntasks-per-node=#TASKPERNODE#
#SBATCH --cpus-per-task=1
#SBATCH --ntasks=#NODES#
#SBATCH --exclusive
#set -e
#export OMP_NUM_THREADS=#OMPTHREADS#
#export KMP_AFFINITY=compact,1,0,granularity=fine,verbose
#export KMP_HW_SUBSET=1T
export OMP_NUM_THREADS=#OMPTHREADS#
export KMP_AFFINITY=balanced,granularity=fine
export I_MPI_PIN=1
export I_MPI_PIN_DOMAIN=6
module load intel/2018.4 impi/2018.4
module load hdf5
EXE=$1
name=$2
echo "mpirun -n #NTASK# $EXE "
mpirun -n #NTASK# $EXE > $name
#lattice
nx #NX#
ny #NY#
nz #NZ#
nt #NT#
totnodes #PX# #PY# #PZ# #PT#
#wilson
mass_wilson 0.01
#max iterations
max_cg_iters 1000
#etc
verbose 1
##prepare Kernel E
##
##
##
##
nx=$1
ny=$2
nz=$3
nt=$4
px=$5
py=$6
pz=$7
pt=$8
folder=$9
echo creating input file in $folder
sed 's/#NX#/'${nx}'/g' kernel_E.input_template > test
mv test kernel_E.input_tmp
sed 's/#NY#/'${ny}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#NZ#/'${nz}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#NT#/'${nt}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PX#/'${px}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PY#/'${py}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PZ#/'${pz}'/g' kernel_E.input_tmp > test
mv test kernel_E.input_tmp
sed 's/#PT#/'${pt}'/g' kernel_E.input_tmp > test
mv test $folder/kernel_E.input
##
##
##
##
##
time=$1
nodes=$2
n=$3
g=$4
omp=$5
cpuptask=$6
perm=$7
src=$8
folder=$9
exe=${10}
name=${11}
echo Creating submit-script in $folder
cp submit_job_part1.sh.template ${folder}/.
cd $folder
sed 's/#NODES#/'${nodes}'/g' submit_job_part1.sh.template > test
mv test submit_job_part1.temp
sed 's/#NTASK#/'${n}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#TASKPERNODE#/'${g}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#OMPTHREADS#/'${omp}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#CPUPTASK#/'${cpuptask}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#TIME#/'${time}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's #EXE# '${exe}' g' submit_job_part1.temp > test
mv test submit_job_part1.temp
sed 's/#NAME#/'${name}'/g' submit_job_part1.temp > test
mv test submit_job_part1.temp
wrc=$(pwd)
echo $wrc
sed 's #WRC# '${wrc}' g' submit_job_part1.temp > test
mv test ${src}
if [ $perm -eq 1 ];then
chmod +x $src
fi
rm submit_job_part1.temp
rm submit_job_part1.sh.template
cd ..
##
## RUN - Part 1
##
## Before starting this job-script replace "SUBMIT" with the submition-command of the local queing system.
## Additional in the script submit_job the execution command has to be adjusted to the local machine.
##
##
## Script for Part 1 of the UEABS Benchmarksuite
##
#!/bin/bash
EXE=/ccc/cont005/home/unicy/finkenrj/run/part1/bench
## Set scaling-mode: Strong or Weak
sca_mode="Strong"
#sca_mode="OneNode"
#sca_mode="Weak"
mode="Analysis"
##mode="Run"
## sbatch_on=1
exe_perm=1 ## use chmod to allow execution of submit_job_Nx_Gx.sh
## lattice size (size strong 1)
gx=64
gy=64
gz=64
gt=8
g=8 ##MPItaskperNODE
openmp=6 ##OMP
cpuptask=6 ## CPUPERTASK
## lattice size (size strong 2) - there is no other testcase yet
#gx=64
#gt=128