Skip to content
d7.5_4IP_1.0.xml 349 KiB
Newer Older
Victor's avatar
Victor committed
<?xml-model href="http://www.le-tex.de/resource/schema/hub/1.2/hub.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
<?xml-model href="http://www.le-tex.de/resource/schema/hub/1.2/hub.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
<hub xmlns="http://docbook.org/ns/docbook" xmlns:css="http://www.w3.org/1996/css" xml:base="file:/home/cameo/git/CodeVault/ueabs_accelerator/doc/d7.5_4IP_1.0.hub.xml" css:version="3.0-variant le-tex_Hub-1.2" xml:lang="de" css:rule-selection-attribute="role" version="5.1-variant le-tex_Hub-1.2"><info><keywordset role="hub"><keyword role="formatting-deviations-only">true</keyword><keyword role="source-type">docx</keyword><keyword role="source-dir-uri">file:/home/cameo/git/CodeVault/ueabs_accelerator/doc/d7.5_4IP_1.0.docx.tmp/</keyword><keyword role="archive-dir-uri">file:/home/cameo/git/CodeVault/ueabs_accelerator/doc/</keyword><keyword role="source-basename">d7.5_4IP_1.0</keyword><keyword role="source-application">Microsoft Macintosh Word</keyword><keyword role="marked-identifiers">true</keyword><keyword role="processed-lists">true</keyword></keywordset><keywordset role="docProps"><keyword role="dc:title">New Template</keyword><keyword role="dc:creator">Dietmar Erwin</keyword><keyword role="cp:lastModifiedBy">Victor Cameo</keyword><keyword role="cp:revision">475</keyword><keyword role="cp:lastPrinted">2017-03-10T17:05:00Z</keyword><keyword role="dcterms:created">2017-03-10T19:54:00Z</keyword><keyword role="dcterms:modified">2017-03-27T09:52:00Z</keyword><keyword role="extendedProps:Template">Normal.dotm</keyword><keyword role="extendedProps:TotalTime">750</keyword><keyword role="extendedProps:Pages">54</keyword><keyword role="extendedProps:Words">13968</keyword><keyword role="extendedProps:Characters">79621</keyword><keyword role="extendedProps:Application">Microsoft Macintosh Word</keyword><keyword role="extendedProps:DocSecurity">0</keyword><keyword role="extendedProps:Lines">663</keyword><keyword role="extendedProps:Paragraphs">186</keyword><keyword role="extendedProps:ScaleCrop">false</keyword><keyword role="extendedProps:Company">Home</keyword><keyword role="extendedProps:LinksUpToDate">false</keyword><keyword role="extendedProps:CharactersWithSpaces">93403</keyword><keyword role="extendedProps:SharedDoc">false</keyword><keyword role="extendedProps:HyperlinksChanged">false</keyword><keyword role="extendedProps:AppVersion">15.0000</keyword></keywordset><css:rules><css:rule layout-type="para" native-name="caption" css:font-size="10pt" css:font-family="Times New Roman" xml:lang="en" css:text-align="justify" css:font-weight="bold" name="Caption"/><css:rule layout-type="para" native-name="Heading1" css:font-size="14pt" css:font-family="Arial" xml:lang="en" css:page-break-after="avoid" css:margin-top="12pt" css:margin-bottom="6pt" css:margin-left="0pt" css:text-indent="0pt" remap="h1" css:font-weight="bold" css:text-align="center" css:text-align-last="center" name="Heading10"><tabs><tab horizontal-position="21.6pt"/></tabs></css:rule><css:rule layout-type="para" native-name="Heading2" css:font-size="14pt" css:font-family="Arial" xml:lang="en" css:page-break-after="avoid" css:margin-top="5pt" css:margin-bottom="5pt" css:text-align="justify" remap="h2" css:font-weight="bold" name="Heading2"><css:attic css:margin-left="18pt" css:text-indent="-18pt"/><tabs><tab horizontal-position="28.8pt"/></tabs></css:rule><css:rule layout-type="para" native-name="Heading3" css:font-size="12pt" css:font-family="Arial" xml:lang="en" css:page-break-after="avoid" css:margin-top="5pt" css:margin-bottom="5pt" css:text-align="justify" remap="h2" css:font-weight="bold" name="Heading3"><css:attic css:margin-left="21.6pt" css:text-indent="-21.6pt"/><tabs><tab horizontal-position="28.8pt"/></tabs></css:rule><css:rule layout-type="para" native-name="Heading4" css:font-size="12pt" css:font-family="Arial" xml:lang="en" css:page-break-after="avoid" css:margin-top="5pt" css:margin-bottom="5pt" css:margin-left="61.2pt" css:text-indent="-25.2pt" css:text-align="justify" remap="h2" css:font-weight="normal" css:font-style="italic" name="Heading4"><tabs><tab horizontal-position="28.8pt"/><tab align="left" horizontal-position="36pt"/></tabs></css:rule><css:rule layout-type="para" native-name="List Paragraph" css:font-size="12pt" css:font-family="Times New Roman" css:margin-left="36pt" name="ListParagraph"/><css:rule layout-type="para" native-name="Normal PRACE" css:font-size="12pt" css:font-family="Times New Roman" xml:lang="en" css:margin-bottom="6pt" css:text-align="justify" name="NormalPRACE"/><css:rule layout-type="para" native-name="toc 1" css:font-size="11pt" css:font-family="Times New Roman" xml:lang="en" css:margin-top="6pt" css:font-weight="bold" name="TOC1"><tabs><tab align="right" leader="dot" horizontal-position="452.45pt"/></tabs></css:rule><css:rule layout-type="para" native-name="toc 2" css:font-size="10pt" css:font-family="Times New Roman" xml:lang="en" css:margin-top="3pt" css:margin-left="22.1pt" css:font-weight="bold" name="TOC2"><tabs><tab align="left" horizontal-position="44.25pt"/><tab align="right" leader="dot" horizontal-position="453.1pt"/></tabs></css:rule><css:rule layout-type="para" native-name="toc 3" css:font-size="10pt" css:font-family="Times New Roman" xml:lang="en" css:margin-top="2pt" css:margin-left="44.25pt" css:font-style="italic" name="TOC3"><tabs><tab align="left" horizontal-position="66.9pt"/><tab align="right" leader="dot" horizontal-position="453.05pt"/></tabs></css:rule><css:rule layout-type="para" native-name="table of figures" css:font-size="11pt" css:font-family="Times New Roman" xml:lang="en" css:text-align="justify" name="TableofFigures"/><css:rule layout-type="para" native-name="Title" css:font-size="16pt" css:font-family="Times New Roman" css:margin-top="12pt" css:margin-bottom="12pt" css:text-align="center" css:text-align-last="center" remap="h1" css:font-weight="bold" name="Title"/><css:rule layout-type="inline" native-name="annotation reference" css:font-size="9pt" name="CommentReference"/><css:rule layout-type="inline" native-name="Emphasis" css:font-style="italic" name="Emphasis"/><css:rule layout-type="inline" native-name="Hyperlink" css:color="#000000" name="Hyperlink"/><css:rule layout-type="inline" native-name="page number" name="PageNumber"/></css:rules></info><para css:text-align="right"><phrase css:font-family="Arial" xml:lang="en"><mediaobject><alt>PRACE_Logo_pos_RGB</alt><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image1.jpeg" css:width="150.11322834645668pt" css:height="102pt"/></imageobject></mediaobject></phrase></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:color="#000000" css:font-size="18pt" xml:lang="en">E-Infrastructures</phrase></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:color="#000000" css:font-size="18pt" xml:lang="en">H2020-EINFRA-2014-2015</phrase></para><para css:margin-top="6pt" css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">EINFRA-4-2014: Pan-European High Performance Computing</phrase></para><para css:margin-top="6pt" css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">Infrastructure and Services</phrase></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">PRACE-4IP</phrase></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">PRACE Fourth Implementation Phase Project</phrase><anchor role="start" xml:id="Title"><?latex \label{ref-0001}?></anchor></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" xml:lang="en">Grant Agreement Number: <anchor role="start" xml:id="ReferenceNo"><?latex \label{ref-0002}?></anchor>EINFRA-653838</phrase></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">D7.5</phrase><anchor role="start" xml:id="DeliverableNumber"><?latex \label{ref-0003}?></anchor></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-size="16pt" xml:lang="en">Application performance on accelerators</phrase><anchor role="start" xml:id="DeliverableTitle"><?latex \label{ref-0004}?></anchor></para><para css:text-align="center" css:text-align-last="center"><phrase css:font-weight="bold" css:font-style="italic" css:font-size="16pt" xml:lang="en">Final </phrase> <anchor role="start" xml:id="Status"><?latex \label{ref-0005}?></anchor></para><para><phrase xml:lang="en">Version:</phrase> <tab xml:space="preserve">	</tab><phrase xml:lang="en"><anchor role="start" xml:id="Version"><?latex \label{ref-0006}?></anchor>1.0</phrase></para><para><phrase xml:lang="en">Author(s):</phrase> <tab xml:space="preserve">	</tab><phrase xml:lang="en"><anchor role="start" xml:id="Author"><?latex \label{ref-0007}?></anchor>Victor Cameo Ponz, CINES</phrase></para><para><phrase xml:lang="en">Date:</phrase><tab xml:space="preserve">	</tab><phrase xml:lang="en">24.03.2016</phrase></para><para role="Heading10" css:text-align="left">Project and Deliverable Information Sheet<anchor role="start" xml:id="_Toc478378948"><?latex \label{ref-0008}?></anchor></para><informaltable css:border-collapse="collapse" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt"><tgroup cols="3" rowsep="1" colsep="1"><colspec colnum="1" colname="col1" colwidth="38.48805798mm"/><colspec colnum="2" colname="col2" colwidth="47.16639186mm"/><colspec colnum="3" colname="col3" colwidth="78.1402827mm"/><tbody><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="109.1pt" colname="col1" morerows="7"><para><phrase css:font-weight="bold" xml:lang="en">PRACE Project</phrase></para></entry><entry css:break-inside="avoid" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Project Ref. №:</phrase> <phrase css:font-weight="bold" xml:lang="en"><link linkend="ReferenceNo"><?latex {\hyperref[ref-0002]{EINFRA-653838}}?></link></phrase></para></entry></row><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Project Title:</phrase> <phrase css:font-weight="bold" xml:lang="en" css:font-size="16pt"><link linkend="Title"><?latex {\hyperref[ref-0001]{PRACE Fourth Implementation Phase Project}}?></link></phrase></para></entry></row><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Project Web Site:</phrase>      <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.prace-project.eu">http://www.prace-project.eu</link></phrase></para></entry></row><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Deliverable ID:</phrase> <phrase xml:lang="en">&lt;</phrase> <phrase css:font-weight="bold" css:font-size="11pt" xml:lang="en"><link linkend="DeliverableNumber"><?latex {\hyperref[ref-0003]{D7.5}}?></link></phrase><phrase xml:lang="en">&gt;</phrase></para></entry></row><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Deliverable Nature:</phrase> <phrase xml:lang="en">&lt;DOC_TYPE: Report / Other&gt;</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.75pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" colname="col2" morerows="1"><para><phrase css:font-weight="bold" xml:lang="en">Dissemination Level:</phrase></para><para><phrase xml:lang="en">PU</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.5pt" colname="col3"><para><phrase css:font-weight="bold" xml:lang="en">Contractual Date of Delivery:</phrase></para><para><phrase xml:lang="en">31 / 03 / 2017</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.75pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.5pt" colname="col3"><para><phrase css:font-weight="bold" xml:lang="en">Actual Date of Delivery:</phrase></para><para><phrase xml:lang="en">DD / Month / YYYY</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.75pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.2pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">EC Project Officer:</phrase> <phrase role="Emphasis" css:font-weight="bold" css:font-style="normal" xml:lang="en">Leonardo Flores Añover</phrase></para></entry></row></tbody></tgroup></informaltable><para><phrase xml:lang="en">*</phrase> <phrase css:font-size="10pt" xml:lang="en">- The dissemination level are indicated as follows:</phrase> <phrase css:font-weight="bold" css:font-size="10pt" xml:lang="en">PU</phrase> <phrase css:font-size="10pt" xml:lang="en">– Public,</phrase> <phrase css:font-weight="bold" css:font-size="10pt" xml:lang="en">CO</phrase> <phrase css:font-size="10pt" xml:lang="en">– Confidential, only for members of the consortium (including the Commission Services)</phrase> <phrase css:font-weight="bold" css:font-size="10pt" xml:lang="en">CL</phrase> <phrase css:font-size="10pt" xml:lang="en">– Classified, as referred to in Commission Decision 2991/844/EC.</phrase></para><para role="Heading10" css:text-align="left">Document Control Sheet<anchor role="start" xml:id="_Toc478378949"><?latex \label{ref-0009}?></anchor></para><informaltable css:border-collapse="collapse" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt"><tgroup cols="3" rowsep="1" colsep="1"><colspec colnum="1" colname="col1" colwidth="38.48805798mm"/><colspec colnum="2" colname="col2" colwidth="47.16639186mm"/><colspec colnum="3" colname="col3" colwidth="78.1402827mm"/><tbody><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="109.15pt" colname="col1" morerows="5"><para><phrase css:font-weight="bold" xml:lang="en">Document</phrase></para></entry><entry css:break-inside="avoid" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.25pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Title:</phrase> <phrase css:font-weight="bold" xml:lang="en"><link linkend="DeliverableTitle"><?latex {\hyperref[ref-0004]{Application performance on accelerators}}?></link></phrase></para></entry></row><row css:break-inside="avoid" css:min-height="15.3pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.25pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">ID:</phrase>        <phrase css:font-weight="bold" css:font-size="11pt" xml:lang="en"><link linkend="DeliverableNumber"><?latex {\hyperref[ref-0003]{D7.5}}?></link></phrase> </para></entry></row><row css:break-inside="avoid" css:min-height="13.75pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Version:</phrase> <phrase xml:lang="en">&lt;</phrase><phrase xml:lang="en"><link linkend="Version"><?latex {\hyperref[ref-0006]{1.0}}?></link></phrase><phrase xml:lang="en">&gt;</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.55pt" colname="col3"><para><phrase css:font-weight="bold" xml:lang="en">Status:</phrase> <phrase css:font-weight="bold" css:font-style="italic" css:font-size="11pt" xml:lang="en"><link linkend="Status"><?latex {\hyperref[ref-0005]{Final}}?></link></phrase></para></entry></row><row css:break-inside="avoid" css:min-height="12.3pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.25pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Available at:</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.prace-project.eu">http://www.prace-project.eu</link></phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.75pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.25pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">Software Tool:</phrase> <phrase xml:lang="en">Microsoft Word 2010</phrase></para></entry></row><row css:break-inside="avoid"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="355.25pt" namest="col2" nameend="col3"><para><phrase css:font-weight="bold" xml:lang="en">File(s):</phrase> <phrase xml:lang="en">d7.5_4IP_1.0.docx</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="15.25pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="109.15pt" colname="col1" morerows="3"><para><phrase css:font-weight="bold" xml:lang="en">Authorship</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Written by:</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.55pt" colname="col3"><para><phrase xml:lang="en"><link linkend="Author"><?latex {\hyperref[ref-0007]{Victor Cameo Ponz,}}?></link></phrase><phrase xml:lang="en">CINES</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.85pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Contributors:</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.55pt" colname="col3"><para><phrase xml:lang="en">Adem Tekin, ITU</phrase></para><para><phrase xml:lang="en">Alan Grey, EPCC</phrase></para><para><phrase xml:lang="en">Andrew Emerson, CINECA</phrase></para><para><phrase xml:lang="en">Andrew Sunderland, STFC</phrase></para><para><phrase xml:lang="fr">Arno Proeme, EPCC</phrase></para><para><phrase xml:lang="fr">Charles Moulinec, STFC</phrase></para><para><phrase xml:lang="fr">Dimitris Dellis, GRNET</phrase></para><para><phrase xml:lang="fr">Fiona Reid, EPCC</phrase></para><para><phrase xml:lang="fr">Gabriel Hautreux, INRIA</phrase></para><para><phrase xml:lang="en">Jacob Finkenrath, CyI</phrase></para><para><phrase xml:lang="en">James Clark, STFC</phrase></para><para><phrase xml:lang="en">Janko Strassburg, BSC</phrase></para><para><phrase xml:lang="en">Jorge Rodriguez, BSC</phrase></para><para><phrase xml:lang="en">Martti Louhivuori, CSC</phrase></para><para><phrase xml:lang="fr">Philippe Segers, GENCI</phrase></para><para><phrase xml:lang="fr">Valeriu Codreanu, SURFSARA</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.85pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Reviewed by:</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.55pt" colname="col3"><para><phrase xml:lang="en">Filip Stanek, IT4I</phrase></para><para><phrase xml:lang="en">Thomas Eickermann, FZJ</phrase></para></entry></row><row css:break-inside="avoid" css:min-height="13.85pt"><entry css:break-inside="avoid" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="133.7pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Approved by:</phrase></para></entry><entry css:break-inside="avoid" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="221.55pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" colname="col3"><para><phrase xml:lang="en">MB/TB</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Heading10" css:text-align="left">Document Status Sheet<anchor role="start" xml:id="_Toc478378950"><?latex \label{ref-0010}?></anchor></para><informaltable css:border-collapse="collapse" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt"><tgroup cols="4" rowsep="1" colsep="1"><colspec colnum="1" colname="col1" colwidth="39.10541913mm"/><colspec colnum="2" colname="col2" colwidth="38.94666912mm"/><colspec colnum="3" colname="col3" colwidth="38.99958579mm"/><colspec colnum="4" colname="col4" colwidth="46.7430585mm"/><tbody><row><entry css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="110.85pt" colname="col1"><para><phrase css:font-weight="bold" xml:lang="en">Version</phrase></para></entry><entry css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.4pt" colname="col2"><para><phrase css:font-weight="bold" xml:lang="en">Date</phrase></para></entry><entry css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.6pt" colname="col3"><para><phrase css:font-weight="bold" xml:lang="en">Status</phrase></para></entry><entry css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="132.55pt" colname="col4"><para><phrase css:font-weight="bold" xml:lang="en">Comments</phrase></para></entry></row><row><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="110.85pt" colname="col1"><para><phrase xml:lang="en">0.1</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.4pt" colname="col2"><para><phrase xml:lang="en">13/03/2017</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.6pt" colname="col3"><para><phrase xml:lang="en">Draft</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="132.55pt" colname="col4"><para><phrase xml:lang="en">First revision</phrase></para></entry></row><row><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="110.85pt" colname="col1"><para><phrase xml:lang="en">0.2</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.4pt" colname="col2"><para><phrase xml:lang="en">15/03/2017</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.6pt" colname="col3"><para><phrase xml:lang="en">Draft</phrase></para></entry><entry css:border-top-style="solid" css:border-bottom-style="solid" css:border-top-width="0.5pt" css:border-bottom-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="132.55pt" colname="col4"><para><phrase xml:lang="en">Include remark of the first review + new figures</phrase></para></entry></row><row><entry css:border-top-style="solid" css:border-top-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:width="110.85pt" colname="col1"><para><phrase xml:lang="en">1.0</phrase></para></entry><entry css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.4pt" colname="col2"><para><phrase xml:lang="en">24/03/2017</phrase></para></entry><entry css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-right-style="solid" css:border-left-width="0.5pt" css:border-right-width="0.5pt" css:width="110.6pt" colname="col3"><para><phrase xml:lang="en">Final version</phrase></para></entry><entry css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:width="132.55pt" colname="col4"><para><phrase xml:lang="en">Improved the application performance section</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Heading10" css:text-align="left" css:page-break-before="always">Document Keywords <anchor role="start" xml:id="_Toc478378951"><?latex \label{ref-0011}?></anchor></para><informaltable css:border-collapse="collapse" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt"><tgroup cols="2" rowsep="1" colsep="1"><colspec colnum="1" colname="col1" colwidth="39.10541913mm"/><colspec colnum="2" colname="col2" colwidth="124.68931341mm"/><tbody><row><entry css:border-right-style="solid" css:border-right-width="0.5pt" css:width="110.85pt" colname="col1"><para><phrase css:font-weight="bold" xml:lang="en">Keywords:</phrase></para></entry><entry css:border-left-style="solid" css:border-left-width="0.5pt" css:width="353.55pt" css:vertical-align="middle" colname="col2"><para><phrase xml:lang="en">PRACE, HPC, Research Infrastructure, Accelerators, GPU, Xeon Phi, Benchmark suite</phrase></para></entry></row></tbody></tgroup></informaltable><para css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:text-align="justify"><phrase css:font-weight="bold" xml:lang="en">Disclaimer</phrase></para><para css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:text-align="justify"><phrase xml:lang="en">This deliverable has been prepared by the responsible Work Package of the Project in accordance with the Consortium Agreement and the Grant Agreement n°</phrase> <phrase xml:lang="en"><link linkend="ReferenceNo"><?latex {\hyperref[ref-0002]{EINFRA-653838}}?></link></phrase><phrase xml:lang="en">. It solely reflects the opinion of the parties to such agreements on a collective basis in the context of the Project and to the extent foreseen in such agreements. Please note that even though all participants to the Project are members of PRACE AISBL, this deliverable has not been approved by the Council of PRACE AISBL and therefore does not emanate from it nor should it be considered to reflect PRACE AISBL’s individual opinion.</phrase></para><informaltable css:border-collapse="collapse" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt"><tgroup cols="1" rowsep="1" colsep="1"><colspec colnum="1" colname="col1" colwidth="163.79473254mm"/><tbody><row><entry css:width="464.3pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" colname="col1"><para><phrase css:font-weight="bold" xml:lang="en">Copyright notices</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">© 2016 PRACE Consortium Partners. All rights reserved. This document is a project document of the PRACE project. All contents are reserved by default and may not be disclosed to third parties without the written consent of the PRACE partners, except as mandated by the European Commission contract</phrase> <phrase xml:lang="en"><link linkend="ReferenceNo"><?latex {\hyperref[ref-0002]{EINFRA-653838}}?></link></phrase> <phrase xml:lang="en">for reviewing and dissemination purposes.</phrase> </para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">All trademarks and other rights on third party products mentioned in this document are acknowledged as own by the respective holders.</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Heading10" css:page-break-before="always">Table of Contents<anchor role="start" xml:id="_Toc478378952"><?latex \label{ref-0012}?></anchor></para><div role="hub:toc"><para role="TOC1">Project and Deliverable Information Sheet<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378948"><?latex \pageref{ref-0008}?></link></para><para role="TOC1">Document Control Sheet<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378949"><?latex \pageref{ref-0009}?></link></para><para role="TOC1">Document Status Sheet<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378950"><?latex \pageref{ref-0010}?></link></para><para role="TOC1">Document Keywords<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378951"><?latex \pageref{ref-0011}?></link></para><para role="TOC1">Table of Contents<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378952"><?latex \pageref{ref-0012}?></link></para><para role="TOC1">List of Figures<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378953"><?latex \pageref{ref-0013}?></link></para><para role="TOC1">List of Tables<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378954"><?latex \pageref{ref-0014}?></link></para><para role="TOC1">References and Applicable Documents<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378955"><?latex \pageref{ref-0015}?></link></para><para role="TOC1">List of Acronyms and Abbreviations<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378956"><?latex \pageref{ref-0036}?></link></para><para role="TOC1">List of Project Partner Acronyms<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378957"><?latex \pageref{ref-0037}?></link></para><para role="TOC1">Executive Summary<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378958"><?latex \pageref{ref-0038}?></link></para><para role="TOC1">1<tab xml:space="preserve">	</tab>Introduction<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378959"><?latex \pageref{ref-0039}?></link></para><para role="TOC1">2<tab xml:space="preserve">	</tab>Targeted architectures<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378960"><?latex \pageref{ref-0041}?></link></para><para role="TOC2"><phrase role="hub:identifier">2.1</phrase><tab xml:space="preserve">	</tab>Co-processor description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378961"><?latex \pageref{ref-0042}?></link></para><para role="TOC2"><phrase role="hub:identifier">2.2</phrase><tab xml:space="preserve">	</tab>Systems description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378962"><?latex \pageref{ref-0045}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">2.2.1</phrase></phrase><tab xml:space="preserve">	</tab>Cartesius K40<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378963"><?latex \pageref{ref-0047}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">2.2.2</phrase></phrase><tab xml:space="preserve">	</tab>MareNostrum KNC<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378964"><?latex \pageref{ref-0048}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">2.2.3</phrase></phrase><tab xml:space="preserve">	</tab>Ouessant P100<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378965"><?latex \pageref{ref-0049}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">2.2.4</phrase></phrase><tab xml:space="preserve">	</tab>Frioul KNL<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378966"><?latex \pageref{ref-0050}?></link></para><para role="TOC1">3<tab xml:space="preserve">	</tab>Benchmark suite description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378967"><?latex \pageref{ref-0052}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.1</phrase><tab xml:space="preserve">	</tab>Alya<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378968"><?latex \pageref{ref-0055}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.1.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378969"><?latex \pageref{ref-0056}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.1.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378970"><?latex \pageref{ref-0057}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.2</phrase><tab xml:space="preserve">	</tab>Code_Saturne<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378971"><?latex \pageref{ref-0058}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.2.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378972"><?latex \pageref{ref-0059}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.2.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378973"><?latex \pageref{ref-0060}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.3</phrase><tab xml:space="preserve">	</tab>CP2K<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378974"><?latex \pageref{ref-0061}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.3.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378975"><?latex \pageref{ref-0062}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.3.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378976"><?latex \pageref{ref-0063}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.4</phrase><tab xml:space="preserve">	</tab>GPAW<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378977"><?latex \pageref{ref-0064}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.4.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378978"><?latex \pageref{ref-0065}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.4.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378979"><?latex \pageref{ref-0066}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.5</phrase><tab xml:space="preserve">	</tab>GROMACS<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378980"><?latex \pageref{ref-0067}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.5.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378981"><?latex \pageref{ref-0068}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.5.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378982"><?latex \pageref{ref-0069}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.6</phrase><tab xml:space="preserve">	</tab>NAMD<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378983"><?latex \pageref{ref-0070}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.6.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378984"><?latex \pageref{ref-0071}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.6.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378985"><?latex \pageref{ref-0072}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.7</phrase><tab xml:space="preserve">	</tab>PFARM<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378986"><?latex \pageref{ref-0073}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.7.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378987"><?latex \pageref{ref-0074}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.7.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378988"><?latex \pageref{ref-0075}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.8</phrase><tab xml:space="preserve">	</tab>QCD<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378989"><?latex \pageref{ref-0076}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.8.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378990"><?latex \pageref{ref-0077}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.8.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378991"><?latex \pageref{ref-0078}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.9</phrase><tab xml:space="preserve">	</tab>Quantum Espresso<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378992"><?latex \pageref{ref-0079}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.9.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378993"><?latex \pageref{ref-0080}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.9.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378994"><?latex \pageref{ref-0081}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.10</phrase><tab xml:space="preserve">	</tab>Synthetic benchmarks – SHOC<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378995"><?latex \pageref{ref-0082}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.10.1</phrase></phrase><tab xml:space="preserve">	</tab>Code description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378996"><?latex \pageref{ref-0083}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.10.2</phrase></phrase><tab xml:space="preserve">	</tab>Test cases description<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378997"><?latex \pageref{ref-0084}?></link></para><para role="TOC2"><phrase role="hub:identifier">3.11</phrase><tab xml:space="preserve">	</tab>SPECFEM3D<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378998"><?latex \pageref{ref-0085}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">3.11.1</phrase></phrase><tab xml:space="preserve">	</tab>Test cases definition<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478378999"><?latex \pageref{ref-0086}?></link></para><para role="TOC1">4<tab xml:space="preserve">	</tab>Applications performances<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379000"><?latex \pageref{ref-0088}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.1</phrase><tab xml:space="preserve">	</tab>Alya<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379001"><?latex \pageref{ref-0089}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.2</phrase><tab xml:space="preserve">	</tab>Code_Saturne<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379002"><?latex \pageref{ref-0094}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.3</phrase><tab xml:space="preserve">	</tab>CP2K<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379003"><?latex \pageref{ref-0101}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.4</phrase><tab xml:space="preserve">	</tab>GPAW<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379004"><?latex \pageref{ref-0104}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.5</phrase><tab xml:space="preserve">	</tab>GROMACS<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379005"><?latex \pageref{ref-0110}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.6</phrase><tab xml:space="preserve">	</tab>NAMD<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379006"><?latex \pageref{ref-0113}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.7</phrase><tab xml:space="preserve">	</tab>PFARM<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379007"><?latex \pageref{ref-0116}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.8</phrase><tab xml:space="preserve">	</tab>QCD<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379008"><?latex \pageref{ref-0123}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">4.8.1</phrase></phrase><tab xml:space="preserve">	</tab>First implementation<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379009"><?latex \pageref{ref-0124}?></link></para><para role="TOC3"><phrase role="hub:identifier"><phrase css:font-style="normal" css:color="#000000">4.8.2</phrase></phrase><tab xml:space="preserve">	</tab>Second implementation<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379010"><?latex \pageref{ref-0131}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.9</phrase><tab xml:space="preserve">	</tab>Quantum Espresso<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379011"><?latex \pageref{ref-0142}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.10</phrase><tab xml:space="preserve">	</tab>Synthetic benchmarks (SHOC)<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379012"><?latex \pageref{ref-0152}?></link></para><para role="TOC2"><phrase role="hub:identifier">4.11</phrase><tab xml:space="preserve">	</tab>SPECFEM3D<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379013"><?latex \pageref{ref-0155}?></link></para><para role="TOC1">5<tab xml:space="preserve">	</tab>Conclusion and future work<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379014"><?latex \pageref{ref-0158}?></link></para></div><para role="Heading10">List of Figures<anchor role="start" xml:id="_Toc478378953"><?latex \label{ref-0013}?></anchor></para><div role="hub:toc"><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379015"><?latex {\hyperref[ref-0090]{Figure 1 Shows the matrix construction part of Alya that is parallelised with OpenMP and benefits significantly from the many cores available on KNL.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379015"><?latex {\hyperref[ref-0090]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379016"><?latex {\hyperref[ref-0091]{Figure 2 Demonstrates the scalability of the code. As expected Haswell cores with K80 GPU are high-performing while the KNL port is currently being optimized further.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379016"><?latex {\hyperref[ref-0091]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379017"><?latex {\hyperref[ref-0093]{Figure 3 Best performance is achieved with GPU in combination with powerful CPU cores. Single thread performance has a big impact on the speedup, both threading and vectorization are employed for additional performance.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379017"><?latex {\hyperref[ref-0093]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379018"><?latex {\hyperref[ref-0096]{Figure 4 Code_Saturne's performance on KNL. AMG is used as a solver in V4.2.2.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379018"><?latex {\hyperref[ref-0096]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379019"><?latex {\hyperref[ref-0103]{Figure 5 Test case 1 of CP2K on the ARCHER cluster}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379019"><?latex {\hyperref[ref-0103]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379020"><?latex {\hyperref[ref-0109]{Figure 6 Relative performance (to / t) of GPAW is shown for parallel jobs using an increasing number of CPU (blue) or Xeon Phi KNC (red). Single CPU SCF-cycle runtime (to) was used as the baseline for the normalisation. Ideal scaling is shown as a linear dashed line for comparison. Case 1 (Carbon Nanotube) is shown with square markers and Case 2 (Copper Filament) is shown with round markers.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379020"><?latex {\hyperref[ref-0109]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379021"><?latex {\hyperref[ref-0111]{Figure 7 Scalability for GROMACS test case GluCL Ion Channel}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379021"><?latex {\hyperref[ref-0111]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379022"><?latex {\hyperref[ref-0112]{Figure 8 Scalability for GROMACS test case Lignocellulose}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379022"><?latex {\hyperref[ref-0112]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379023"><?latex {\hyperref[ref-0114]{Figure 9 Scalability for NAMD test case STMV.8M}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379023"><?latex {\hyperref[ref-0114]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379024"><?latex {\hyperref[ref-0115]{Figure 10 Scalability for NAMD test case STMV.28M}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379024"><?latex {\hyperref[ref-0115]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379025"><?latex {\hyperref[ref-0118]{Figure 11 Eigensolver performance on KNL and GPU}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379025"><?latex {\hyperref[ref-0118]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379026"><?latex {\hyperref[ref-0126]{Figure 12 Small test case results for QCD, first implementation}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379026"><?latex {\hyperref[ref-0126]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379027"><?latex {\hyperref[ref-0128]{Figure 13 Large test case results for QCD, first implementation}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379027"><?latex {\hyperref[ref-0128]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379028"><?latex {\hyperref[ref-0130]{Figure 14 shows the time taken by the full MILC 64x64x64x8 test cases on traditional CPU, Intel Knights Landing Xeon Phi and NVIDIA P100 (Pascal) GPU architectures.}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379028"><?latex {\hyperref[ref-0130]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379029"><?latex {\hyperref[ref-0133]{Figure 15 Result of second implementation of QCD on K40m GPU}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379029"><?latex {\hyperref[ref-0133]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379030"><?latex {\hyperref[ref-0135]{Figure 16 Result of second implementation of QCD on P100 GPU}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379030"><?latex {\hyperref[ref-0135]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379031"><?latex {\hyperref[ref-0137]{Figure 17 Result of second implementation of QCD on P100 GPU on larger test case}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379031"><?latex {\hyperref[ref-0137]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379032"><?latex {\hyperref[ref-0139]{Figure 18 Result of second implementation of QCD on KNC}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379032"><?latex {\hyperref[ref-0139]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379033"><?latex {\hyperref[ref-0141]{Figure 19 Result of second implementation of QCD on KNL}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379033"><?latex {\hyperref[ref-0141]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379034"><?latex {\hyperref[ref-0144]{Figure 20 Scalability of Quantum Espresso on GPU for test case 1}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379034"><?latex {\hyperref[ref-0144]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379035"><?latex {\hyperref[ref-0146]{Figure 21 Scalability of Quantum Espresso on GPU for test case 2}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379035"><?latex {\hyperref[ref-0146]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379036"><?latex {\hyperref[ref-0148]{Figure 22 Scalability of Quantum Espresso on KNL for test case 1}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379036"><?latex {\hyperref[ref-0148]{  }}?></link></para><para role="TableofFigures"><phrase role="Hyperlink"><link linkend="_Toc478379037"><?latex {\hyperref[ref-0150]{Figure 23 Quantum Espresso - KNL vs BDW vs BGQ (at scale)}}?></link></phrase><tab xml:space="preserve">	</tab><link linkend="_Toc478379037"><?latex {\hyperref[ref-0150]{  }}?></link></para></div><para role="Heading10">List of Tables<anchor role="start" xml:id="_Toc478378954"><?latex \label{ref-0014}?></anchor></para><div role="hub:toc"><para role="TableofFigures">Table 1  Main co-processors specifications<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379038"><?latex \pageref{ref-0044}?></link></para><para role="TableofFigures">Table 2 Codes and corresponding APIs available (in green)<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379039"><?latex \pageref{ref-0054}?></link></para><para role="TableofFigures">Table 3 Performance of Code_Saturne + PETSc on 1 node of the POWER8 clusters. Comparison between 2 different nodes, using different types of CPU and GPU. PETSc is built on LAPACK. The speedup is computed at the ratio between the time to solution on the CPU for a given number of MPI tasks and the time to solution on the CPU/GPU for the same number of MPI tasks.<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379040"><?latex \pageref{ref-0098}?></link></para><para role="TableofFigures">Table 4 Performance of Code_Saturne and PETSc on 1 node of KNL. PETSc is built on the MKL library<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379041"><?latex \pageref{ref-0100}?></link></para><para role="TableofFigures">Table 5 GPAW runtimes (in seconds) for the smaller benchmark (Carbon Nanotube) measured on several architectures when using n sockets (i.e. processors or accelerators).<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379042"><?latex \pageref{ref-0106}?></link></para><para role="TableofFigures">Table 6 GPAW runtimes (in seconds) for the larger benchmark (Copper Filament) measured on several architectures when using n sockets (i.e. processors or accelerators). *Due to memory limitations on the GPU the grid spacing was increased from 0.22 to 0.28 to have a sparser grid. To account for this in the comparison, the K40 and K80 runtimes have been scaled up using a corresponding CPU runtime as a yardstick (scaling factor q=2.1132).<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379043"><?latex \pageref{ref-0108}?></link></para><para role="TableofFigures">Table 7 Overall EXDIG runtime performance on various accelerators (runtime, secs)<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379044"><?latex \pageref{ref-0120}?></link></para><para role="TableofFigures">Table 8 Overall EXDIG runtime parallel performance using MPI-GPU version<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379045"><?latex \pageref{ref-0122}?></link></para><para role="TableofFigures">Table 9 Synthetic benchmarks results on GPU and Xeon Phi<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379046"><?latex \pageref{ref-0154}?></link></para><para role="TableofFigures">Table 10 SPECFEM 3D GLOBE results (run time in second)<tab xml:space="preserve">	</tab><link role="page" linkend="_Toc478379047"><?latex \pageref{ref-0156}?></link></para></div><para role="Heading10">References and Applicable Documents<anchor role="start" xml:id="_Toc478378955"><?latex \label{ref-0015}?></anchor></para><orderedlist numeration="arabic"><listitem override="[1]"><para css:margin-bottom="3pt"><phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.prace-ri.eu"><anchor role="start" xml:id="_Ref477156108"><?latex \label{ref-0016}?></anchor>http://www.prace-ri.eu</link></phrase> </para></listitem><listitem override="[2]"><para css:margin-bottom="3pt"><phrase xml:lang="en">The Unified European Application Benchmark Suite –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.prace-ri.eu/ueabs/">http://www.prace-ri.eu/ueabs/</link></phrase><anchor role="start" xml:id="_Ref476982133"><?latex \label{ref-0017}?></anchor></para></listitem><listitem override="[3]"><para css:margin-bottom="3pt"><phrase xml:lang="en">D7.4 Unified European Applications Benchmark Suite – Mark Bull et al. – 2013</phrase><anchor role="start" xml:id="_Ref476982292"><?latex \label{ref-0018}?></anchor></para></listitem><listitem override="[4]"><para css:margin-bottom="3pt"><phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.nvidia.com/object/quadro-design-and-manufacturing.html"><anchor role="start" xml:id="_Ref476982100"><?latex \label{ref-0019}?></anchor>http://www.nvidia.com/object/quadro-design-and-manufacturing.html</link></phrase></para></listitem><listitem override="[5]"><para css:margin-bottom="3pt"><phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://userinfo.surfsara.nl/systems/cartesius/description">https://userinfo.surfsara.nl/systems/cartesius/description</link></phrase><anchor role="start" xml:id="_Ref476982066"><?latex \label{ref-0020}?></anchor></para></listitem><listitem override="[6]"><para css:margin-bottom="3pt"><phrase xml:lang="en">MareNostrum III User’s Guide Barcelona Supercomputing Center –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://www.bsc.es/support/MareNostrum3-ug.pdf">https://www.bsc.es/support/MareNostrum3-ug.pdf</link></phrase><anchor role="start" xml:id="_Ref476984580"><?latex \label{ref-0021}?></anchor></para></listitem><listitem override="[7]"><para css:margin-bottom="3pt"><phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.idris.fr/eng/ouessant/">http://www.idris.fr/eng/ouessant/</link></phrase><anchor role="start" xml:id="_Ref476985408"><?latex \label{ref-0022}?></anchor></para></listitem><listitem override="[8]"><para css:margin-bottom="3pt"><phrase xml:lang="en">PFARM reference –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://hpcforge.org/plugins/mediawiki/wiki/pracewp8/images/3/34/Pfarm_long_lug.pdf">https://hpcforge.org/plugins/mediawiki/wiki/pracewp8/images/3/34/Pfarm_long_lug.pdf</link></phrase><anchor role="start" xml:id="_Ref476987482"><?latex \label{ref-0023}?></anchor></para></listitem><listitem override="[9]"><para css:margin-bottom="3pt"><phrase xml:lang="en">Solvent-Driven Preferential Association of Lignin with Regions of Crystalline Cellulose in Molecular Dynamics Simulation – Benjamin Lindner et al. – Biomacromolecules, 2013</phrase><anchor role="start" xml:id="_Ref476989175"><?latex \label{ref-0024}?></anchor></para></listitem><listitem override="[10]"><para css:margin-bottom="3pt"><phrase xml:lang="en">NAMD website –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.ks.uiuc.edu/Research/namd/">http://www.ks.uiuc.edu/Research/namd/</link></phrase><anchor role="start" xml:id="_Ref476989447"><?latex \label{ref-0025}?></anchor></para></listitem><listitem override="[11]"><para css:margin-bottom="3pt"><phrase xml:lang="en">SHOC source repository –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://github.com/vetter/shoc">https://github.com/vetter/shoc</link></phrase><anchor role="start" xml:id="_Ref477368547"><?latex \label{ref-0026}?></anchor></para></listitem><listitem override="[12]"><para css:margin-bottom="3pt"><phrase xml:lang="en">Parallelizing the QUDA Library for Multi-GPU Calculations in Lattice Quantum Chromodynamics – R. Babbich, M. Clark and B. Joo – SC 10 (Supercomputing 2010)</phrase><anchor role="start" xml:id="_Ref477103549"><?latex \label{ref-0027}?></anchor></para></listitem><listitem override="[13]"><para css:margin-bottom="3pt"><phrase xml:lang="en">Lattice QCD on Intel Xeon Phi – B. Joo, D. D. Kalamkar, K. Vaidyanathan, M. Smelyanskiy, K. Pamnany, V. W. Lee, P. Dubey, W. Watson III – International Supercomputing Conference (ISC’13), 2013</phrase><anchor role="start" xml:id="_Ref477103568"><?latex \label{ref-0028}?></anchor></para></listitem><listitem override="[14]"><para css:margin-bottom="3pt"><phrase xml:lang="en">Extension of fractional step techniques for incompressible flows: The preconditioned Orthomin(1) for the pressure Schur complement – G. Houzeaux, R. Aubry, and M. Vázquez – Computers &amp; Fluids, 44:297-313, 2011</phrase><anchor role="start" xml:id="_Ref477369174"><?latex \label{ref-0029}?></anchor></para></listitem><listitem override="[15]"><para css:margin-bottom="3pt"><phrase xml:lang="en">MIMD Lattice Computation (MILC) Collaboration –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://physics.indiana.edu/~sg/milc.html">http://physics.indiana.edu/~sg/milc.html</link></phrase><anchor role="start" xml:id="_Ref477371577"><?latex \label{ref-0030}?></anchor></para></listitem><listitem override="[16]"><para css:margin-bottom="3pt"><phrase xml:lang="en">targetDP –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://ccpforge.cse.rl.ac.uk/svn/ludwig/trunk/targetDP/README">https://ccpforge.cse.rl.ac.uk/svn/ludwig/trunk/targetDP/README</link></phrase><anchor role="start" xml:id="_Ref477371673"><?latex \label{ref-0031}?></anchor></para></listitem><listitem override="[17]"><para css:margin-bottom="3pt"><phrase xml:lang="en">QUDA: A library for QCD on GPU –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://lattice.github.io/quda/">https://lattice.github.io/quda/</link></phrase><anchor role="start" xml:id="_Ref477371810"><?latex \label{ref-0032}?></anchor></para></listitem><listitem override="[18]"><para css:margin-bottom="3pt"><phrase xml:lang="en">QPhiX, QCD for Intel Xeon Phi and Xeon processors –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://jeffersonlab.github.io/qphix/">http://jeffersonlab.github.io/qphix/</link></phrase><anchor role="start" xml:id="_Ref477371895"><?latex \label{ref-0033}?></anchor></para></listitem><listitem override="[19]"><para css:margin-bottom="3pt"><phrase xml:lang="en">KNC MaxFlops issue (both SP and DP) –</phrase> <phrase role="Hyperlink" xml:lang="en"><link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="https://github.com/vetter/shoc/issues/37">https://github.com/vetter/shoc/issues/37</link></phrase><anchor role="start" xml:id="_Ref477999206"><?latex \label{ref-0034}?></anchor></para></listitem><listitem override="[20]"><para css:margin-bottom="3pt"> <phrase xml:lang="en"> <anchor role="start" xml:id="_Ref477999262"><?latex \label{ref-0035}?></anchor>KNC SpMV issue – https://github.com/vetter/shoc/issues/24, https://github.com/vetter/shoc/issues/23.</phrase></para></listitem></orderedlist><para role="Heading10">List of Acronyms and Abbreviations<anchor role="start" xml:id="_Toc478378956"><?latex \label{ref-0036}?></anchor></para><variablelist><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">aisbl</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Association International Sans But Lucratif</phrase> <br/> <tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">(legal form of the PRACE-RI)</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">BCO</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Benchmark Code Owner</phrase> </para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">CoE</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Center of Excellence</phrase> </para><variablelist><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">CPU</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Central Processing Unit</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">CUDA</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Compute Unified Device Architecture (NVIDIA)</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">DARPA</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Defense Advanced Research Projects Agency</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">DEISA</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Distributed European Infrastructure for Supercomputing Applications EU project by leading national HPC centres</phrase></para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">DoA</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Description of Action (formerly known as DoW)</phrase></para><variablelist><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">EC</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">European Commission</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">EESI</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">European Exascale Software Initiative</phrase></para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">EoI</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Expression of Interest</phrase></para><variablelist><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">ESFRI</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">European Strategy Forum on Research Infrastructures</phrase> </para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">GB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Giga (= 2</phrase><superscript css:font-family="Arial" xml:lang="en">30</superscript> <phrase css:font-family="Arial" xml:lang="en">~ 10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript><phrase css:font-family="Arial" xml:lang="en">) Bytes (= 8 bits), also GByte</phrase></para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">Gb/s</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Giga (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript><phrase css:font-family="Arial" xml:lang="en">) bits per second, also Gbit/s</phrase></para><para><phrase css:font-family="Arial" xml:lang="en">GB/s</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Giga (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript><phrase css:font-family="Arial" xml:lang="en">) Bytes (= 8 bits) per second, also GByte/s</phrase></para><variablelist><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">GÉANT</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Collaboration between National Research and Education Networks to build a multi-gigabit pan-European network. The current EC-funded project as of 2015 is GN4.</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">GFlop/s</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Giga (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript><phrase css:font-family="Arial" xml:lang="en">) Floating point operations (usually in 64-bit, i.e. DP) per second, also GF/s</phrase></para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">GHz</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Giga (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript><phrase css:font-family="Arial" xml:lang="en">) Hertz, frequency =10</phrase><superscript css:font-family="Arial" xml:lang="en">9</superscript> <phrase css:font-family="Arial" xml:lang="en">periods or clock cycles per second</phrase></para><variablelist><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">GPU</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Graphic Processing Unit</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">HET</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">High Performance Computing in Europe Taskforce. Taskforce by representatives from European HPC community to shape the European HPC Research Infrastructure. Produced the scientific case and valuable groundwork for the PRACE project.</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">HMM</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Hidden Markov Model</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">HPC</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">High Performance Computing; Computing at a high performance level at any given time; often used synonym with Supercomputing</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">HPL</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">High Performance LINPACK</phrase> </para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">ISC</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">International Supercomputing Conference; European equivalent to the US based SCxx conference. Held annually in Germany.</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">KB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Kilo (= 2</phrase><superscript css:font-family="Arial" xml:lang="en">10</superscript> <phrase css:font-family="Arial" xml:lang="en">~10</phrase><superscript css:font-family="Arial" xml:lang="en">3</superscript><phrase css:font-family="Arial" xml:lang="en">) Bytes (= 8 bits), also KByte</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">LINPACK</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Software library for Linear Algebra</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">MB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Management Board (highest decision making body of the project)</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">MB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Mega (= 2</phrase><superscript css:font-family="Arial" xml:lang="en">20</superscript> <phrase css:font-family="Arial" xml:lang="en">~ 10</phrase><superscript css:font-family="Arial" xml:lang="en">6</superscript><phrase css:font-family="Arial" xml:lang="en">) Bytes (= 8 bits), also MByte</phrase></para></listitem></varlistentry></variablelist><para><phrase css:font-family="Arial" xml:lang="en">MB/s</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Mega (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">6</superscript><phrase css:font-family="Arial" xml:lang="en">) Bytes (= 8 bits) per second, also MByte/s</phrase></para><phrase css:font-family="Arial" xml:lang="en">MFlop/s</phrase><para><phrase css:font-family="Arial" xml:lang="en">Mega (= 10</phrase><superscript css:font-family="Arial" xml:lang="en">6</superscript><phrase css:font-family="Arial" xml:lang="en">) Floating point operations (usually in 64-bit, i.e. DP) per second, also MF/s</phrase></para><para><phrase css:font-family="Arial" xml:lang="en">MooC</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Massively open online Course</phrase></para><para><phrase css:font-family="Arial" xml:lang="en">MoU</phrase><tab xml:space="preserve">	</tab><phrase css:font-family="Arial" xml:lang="en">Memorandum of Understanding</phrase></para><variablelist><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">MPI</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Message Passing Interface</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">NDA</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Non-Disclosure Agreement. Typically signed between vendors and customers working together on products prior to their general availability or announcement.</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">PA</phrase></term><listitem><para css:margin-top="4pt"><phrase css:font-family="Arial" xml:lang="en">Preparatory Access (to PRACE resources)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="70.9pt"/></tabs><phrase css:font-family="Arial" xml:lang="en">PATC</phrase></term><listitem><para css:margin-top="4pt"><phrase css:font-family="Arial" xml:lang="en">PRACE Advanced Training Centres</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">PRACE</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Partnership for Advanced Computing in Europe; Project Acronym</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">PRACE 2</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">The upcoming next phase of the PRACE Research Infrastructure following the initial five year period.</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">PRIDE</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Project Information and Dissemination Event</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">RI</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Research Infrastructure</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">TB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Technical Board (group of Work Package leaders)</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">TB</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Tera (= 240 ~ 1012) Bytes (= 8 bits), also TByte</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">TCO</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Total Cost of Ownership. Includes recurring costs (e.g. personnel, power, cooling, maintenance) in addition to the purchase cost.</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">TDP</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Thermal Design Power</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">TFlop/s</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Tera (= 1012) Floating-point operations (usually in 64-bit, i.e. DP) per second, also TF/s</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">Tier-0</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Denotes the apex of a conceptual pyramid of HPC systems. In this context the Supercomputing Research Infrastructure would host the Tier-0 systems; national or topical HPC centres would constitute Tier-1</phrase></para></listitem></varlistentry><varlistentry><term><phrase css:font-family="Arial" xml:lang="en">UNICORE</phrase></term><listitem><para><phrase css:font-family="Arial" xml:lang="en">Uniform Interface to Computing Resources. Grid software for seamless access to distributed resources.</phrase></para></listitem></varlistentry></variablelist><para css:page-break-before="always"/><para role="Heading10">List of Project Partner Acronyms<anchor role="start" xml:id="_Toc478378957"><?latex \label{ref-0037}?></anchor></para><variablelist><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">BADW-LRZ</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Leibniz-Rechenzentrum der Bayerischen Akademie der Wissenschaften, Germany (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to GCS)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">BILKENT</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Bilkent University, Turkey (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to UYBHM)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">BSC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Barcelona Supercomputing Center - Centro Nacional de Supercomputacion, Spain</phrase> </para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CaSToRC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Computation-based Science and Technology Research Center, Cyprus</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CCSAS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Computing Centre of the Slovak Academy of Sciences, Slovakia</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">CEA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Commissariat à l’Energie Atomique et aux Energies Alternatives, France (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="fr"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Party to GENCI)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CESGA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Fundacion Publica Gallega Centro Tecnológico de Supercomputación de Galicia, Spain, (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to BSC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">CINECA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">CINECA Consorzio Interuniversitario, Italy</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">CINES</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Centre Informatique National de l’Enseignement Supérieur, France (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="fr"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Party to GENCI)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">CNRS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Centre National de la Recherche Scientifique, France (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="fr"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Party to GENCI)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CSC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CSC Scientific Computing Ltd., Finland</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CSIC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Spanish Council for Scientific Research (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to BSC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">CYFRONET</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Academic Computing Centre CYFRONET AGH, Poland (3rd party to PNSC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">EPCC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">EPCC at The University of Edinburgh, UK</phrase> </para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">ETHZurich (CSCS)</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Eidgenössische Technische Hochschule Zürich – CSCS, Switzerland</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">FIS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">FACULTY OF INFORMATION STUDIES, Slovenia (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to ULFME)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">GCS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Gauss Centre for Supercomputing e.V.</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">GENCI</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Grand Equipement National de Calcul Intensiv, France</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">GRNET</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Greek Research and Technology Network, Greece</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">INRIA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Institut National de Recherche en Informatique et Automatique, France (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="fr"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Party to GENCI)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">IST</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Instituto Superior Técnico, Portugal (3rd Party to UC-LCA)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">IUCC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">INTER UNIVERSITY COMPUTATION CENTRE, Israel</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">JKU</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Institut fuer Graphische und Parallele Datenverarbeitung der Johannes Kepler Universitaet Linz, Austria</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">JUELICH</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Forschungszentrum Juelich GmbH, Germany</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">KTH</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Royal Institute of Technology, Sweden (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to SNIC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">LiU</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Linkoping University, Sweden (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to SNIC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">NCSA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">NATIONAL CENTRE FOR SUPERCOMPUTING APPLICATIONS, Bulgaria</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">NIIF</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">National Information Infrastructure Development Institute, Hungary</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">NTNU</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">The Norwegian University of Science and Technology, Norway (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to SIGMA)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">NUI-Galway</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">National University of Ireland Galway, Ireland</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">PRACE</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Partnership for Advanced Computing in Europe aisbl, Belgium</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">PSNC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Poznan Supercomputing and Networking Center, Poland</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">RISCSW</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">RISC Software GmbH</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">RZG</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Max Planck Gesellschaft zur Förderung der Wissenschaften e.V., Germany (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to GCS)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">SIGMA2</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UNINETT Sigma2 AS, Norway</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">SNIC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Swedish National Infrastructure for Computing (within the Swedish Science Council), Sweden</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">STFC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Science and Technology Facilities Council, UK (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to EPSRC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">SURFsara</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Dutch national high-performance computing and e-Science support center, part of the SURF cooperative, Netherlands</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">UC-LCA</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="fr">Universidade de Coimbra, Labotatório de Computação Avançada, Portugal</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UCPH</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Københavns Universitet, Denmark</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UHEM</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Istanbul Technical University, Ayazaga Campus, Turkey</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UiO</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">University of Oslo, Norway (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to SIGMA)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">ULFME</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UNIVERZA V LJUBLJANI, Slovenia</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UmU</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Umea University, Sweden (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en"> rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to SNIC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UnivEvora</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Universidade de Évora, Portugal (3rd Party to UC-LCA)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UPC</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Universitat Politècnica de Catalunya, Spain (3rd Party to BSC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">UPM/CeSViMa</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Madrid Supercomputing and Visualization Center, Spain (3</phrase><superscript css:font-family="Arial" css:color="#000000" xml:lang="en">rd</superscript> <phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Party to BSC)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">USTUTT-HLRS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Universitaet Stuttgart – HLRS, Germany (3rd Party to GCS)</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">VSB-TUO</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">VYSOKA SKOLA BANSKA - TECHNICKA UNIVERZITA OSTRAVA, Czech Republic</phrase></para></listitem></varlistentry><varlistentry><term><tabs><tab align="left" horizontal-position="109.85pt"/></tabs><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">WCNS</phrase></term><listitem><para><phrase css:font-family="Arial" css:color="#000000" xml:lang="en">Politechnika Wroclawska, Poland (3rd party to PNSC)</phrase></para></listitem></varlistentry></variablelist><para role="Title"><phrase xml:lang="en">Executive Summary</phrase><anchor role="start" xml:id="_Toc478378958"><?latex \label{ref-0038}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">This document describes an accelerator benchmark suite, a set of 11 codes that includes 1 synthetic benchmark and 10 commonly used applications. The key focus of this task has been exploiting accelerators or co-processors to improve the performance of real applications. It aims at providing a set of scalable, currently relevant and publically available codes and datasets.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">This work has been undertaken by Task7.2B "Accelerator Benchmarks" in the PRACE Fourth Implementation Phase (PRACE-4IP) project.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Most of the selected application are a subset of the Unified European Applications Benchmark Suite (UEABS)</phrase> <phrase xml:lang="en"><link linkend="_Ref476982133"><?latex {\hyperref[ref-0017]{[2]}}?></link></phrase><phrase xml:lang="en"><link linkend="_Ref476982292"><?latex {\hyperref[ref-0018]{[3]}}?></link></phrase><phrase xml:lang="en">. One application and a synthetic benchmark have been added.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">As a result, selected codes are: Alya, Code_Saturne, CP2K, GROMACS, GPAW, NAMD, PFARM, QCD, Quantum Espresso, SHOC and SPECFEM3D.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">For each code either two or more test case datasets have been selected. These are described in this document, along with a brief introduction to the application codes themselves. For each code, some sample results are presented, from first run on leading edge systems and prototypes.</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading2" docx2tex:config="headline" css:margin-top="18pt"><phrase role="docx2tex:identifier" css:font-size="14pt">1</phrase>Introduction<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378959"><?latex \label{ref-0039}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">The work produced within this task is an extension of the UEABS for accelerators. This document will cover each code, presenting the code as well as the test cases defined for the benchmarks and the first results that have been recorded on various accelerator systems.</phrase></para><para role="NormalPRACE">As the UEABS, this suite aims to present results for many scientific fields that can use HPC accelerated resources. Hence, it will help the European scientific communities to decide in terms of infrastructures they could buy in a near future. We focus on Intel Xeon Phi coprocessors and NVIDIA GPU cards for benchmarking as they are the two most wide-spread accelerated resources available now.</para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Section</phrase> <phrase xml:lang="en"><link linkend="_Ref476982656"><?latex {\hyperref[ref-0040]{2}}?></link></phrase> <phrase xml:lang="en">will present both types of accelerator systems, Xeon Phi and GPU card along with architecture examples. Section</phrase> <phrase xml:lang="en"><link linkend="_Ref477340653"><?latex {\hyperref[ref-0051]{3}}?></link></phrase> <phrase xml:lang="en">gives a description of each of the selected applications, together with the test case datasets while section</phrase> <phrase xml:lang="en"><link linkend="_Ref477340707"><?latex {\hyperref[ref-0087]{4}}?></link></phrase> <phrase xml:lang="en">presents some sample results. Section</phrase> <phrase xml:lang="en"><link linkend="_Ref477340783"><?latex {\hyperref[ref-0157]{5}}?></link></phrase> <phrase xml:lang="en">outlines further work on, and using, the suite.</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading2" docx2tex:config="headline" css:margin-top="18pt"><phrase role="docx2tex:identifier" css:font-size="14pt">2</phrase>Targeted architectures<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref476982656"><?latex \label{ref-0040}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378960"><?latex \label{ref-0041}?></anchor></para><para><phrase xml:lang="en">This suite is targeting accelerator cards, more specifically the Intel Xeon Phi and NVIDIA GPU architecture. This section will quickly describe them and will present the 4 machines, the benchmarks ran on.</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">2.1</phrase>Co-processor description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378961"><?latex \label{ref-0042}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Scientific computing using co-processors has gained popularity in recent years. First the utility of GPU has been demonstrated and evaluated in several application domains</phrase> <phrase xml:lang="en"><link linkend="_Ref476982100"><?latex {\hyperref[ref-0019]{[4]}}?></link></phrase><phrase xml:lang="en">. As a response to NVIDIA’s supremacy in this field, Intel designed Xeon Phi cards.</phrase></para><para role="NormalPRACE">Architectures and programming models of co-processors may differ from CPU and vary among different co-processor types. The main challenges are the high-level parallelism ability required from software and the fact that code may have to be offloaded to the accelerator card.</para><para role="NormalPRACE">The <link linkend="_Ref477772034"><?latex {\hyperref[ref-0043]{Table 1}}?></link> enlightens this fact:</para><informaltable css:border-collapse="collapse" css:margin-left="4.65pt" css:width="449.55pt"><tgroup cols="5"><colspec colnum="1" colname="col1" colwidth="37.67666904mm"/><colspec colnum="2" colname="col2" colwidth="28.3986129mm"/><colspec colnum="3" colname="col3" colwidth="31.60889088mm"/><colspec colnum="4" colname="col4" colwidth="31.09736307mm"/><colspec colnum="5" colname="col5" colwidth="29.8097241mm"/><tbody><row css:min-height="18.05pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:width="106.8pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="170.1pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" namest="col2" nameend="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Intel Xeon Phi</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="172.65pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" namest="col4" nameend="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">NVIDIA GPU</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="106.8pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="80.5pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"> <phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">5110P (KNC)</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="89.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">7250 (KNL)</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="88.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">K40m</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="84.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:white-space="nowrap" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">P100</phrase></para></entry></row><row css:min-height="28pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="106.8pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">public availability date</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="80.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Nov-12</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="89.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Jun-16</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="88.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Jun-13</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="84.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">May-16</phrase></para></entry></row><row css:min-height="28pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="106.8pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">theoretical peak perf</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="80.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">1,011 GF/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="89.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">3,046 GF/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="88.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">1,430 GF/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="84.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">5,300 GF/s</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="106.8pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">offload required</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="80.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">possible</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="89.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">not possible</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="88.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">required</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="84.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">required</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="106.8pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#E7E6E6" css:vertical-align="bottom" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">max number of thread/cuda cores</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="80.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">240</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="89.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">272</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="88.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">2880</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="84.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">3584</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Caption">Table 1  Main co-processors specifications<anchor role="start" xml:id="_Ref477772034"><?latex \label{ref-0043}?></anchor><anchor role="start" xml:id="_Toc478379038"><?latex \label{ref-0044}?></anchor></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">2.2</phrase>Systems description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378962"><?latex \label{ref-0045}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">The benchmark suite has been officially granted access to 4 different machines hosted by PRACE partners. Most results presented in this paper were obtained on these machines but some of the simulation has run on similar ones. This section will cover specifications of the sub mentioned 4 official systems while the few other ones will be presented along with concerned results.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">As it can be noticed on the previous section, leading edge architectures have been available quite recently and some code couldn't run on it yet. Results will be completed in a near future and will be delivered with an update of the benchmark suite. Still, presented performances are a good indicator about potential efficiency of codes on both Xeon Phi and NVIDIA GPU platforms.</phrase></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">As for the future, the PRACE-3IP PCP is in its third and last phase and will be a good candidate to provide access to bigger machines. The following suppliers had been awarded with a contract: ATOS/Bull SAS (France), E4 Computer Engineering (Italy) and Maxeler Technologies (UK), providing pilots using Xeon Phi, OPENPower and FPGA technologies. During this final phase, which started in October 2016, the contractors will have to deploy pilot system with a compute capability of around 1 PFlop/s, to demonstrate technology readiness of the proposed solution and the progress in terms of energy efficiency, using high frequency monitoring designed for this purpose. These results will be evaluated on a subset of applications from UEABS (NEMO, SPECFEM3D, QuantumEspresso, BQCD). The access to these systems is foreseen to be open to PRACE partners, with a special interest for the 4IP-WP7 task on accelerated Benchmarks.</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">2.2.1</phrase>Cartesius K40<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref477768402"><?latex \label{ref-0046}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378963"><?latex \label{ref-0047}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">The SURFsara institute in The Netherlands granted access to Cartesius which has a GPU island (installed May 2014) with following specifications</phrase> <phrase xml:lang="en"><link linkend="_Ref476982066"><?latex {\hyperref[ref-0020]{[5]}}?></link></phrase><phrase xml:lang="en">:</phrase></para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">66 Bullx B515 GPU accelerated nodes</para><orderedlist numeration="loweralpha"><listitem override="o"><para role="NormalPRACE">2x 8-core 2.5 GHz Intel Xeon E5-2450 v2 (Ivy Bridge) CPU/node</para></listitem><listitem override="o"><para role="NormalPRACE">2x NVIDIA Tesla K40m GPU/node</para></listitem><listitem override="o"><para role="NormalPRACE">96 GB/node, DDR3-1600 RAM</para></listitem></orderedlist></listitem><listitem><para role="NormalPRACE">Total theoretical peak performance (Ivy Bridge + K40m) 1,056 cores + 132 GPU: 210 TF/s</para></listitem></itemizedlist><para role="NormalPRACE">The interconnect has a fully non-blocking fat-tree topology. Every node has two ConnectX-3 InfiniBand FDR adapters: one per GPU.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">2.2.2</phrase>MareNostrum KNC<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378964"><?latex \label{ref-0048}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">The Barcelona Supercomputing Center (BSC) in Spain granted access to MareNostrum III which features KNC nodes (upgrade June 2013). Here's the description of this partition</phrase> <phrase xml:lang="en"><link linkend="_Ref476984580"><?latex {\hyperref[ref-0021]{[6]}}?></link></phrase><phrase xml:lang="en">:</phrase></para><itemizedlist mark="bullet"><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">42 hybrid nodes containing:</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">1x Sandy-Bridge-EP (2 x 8 cores) host processors E5-2670</phrase> </para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">8x 8G DDR3–1600 DIMMs (4GB/core), total: 64GB/node</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">2x Xeon Phi 5110P accelerators</phrase></para></listitem></orderedlist></listitem><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Interconnection networks:</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Infiniband Mellanox FDR10: High bandwidth network used by parallel applications communications (MPI)</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Gigabit Ethernet: 10GbitEthernet network used by the GPFS Filesystem.</phrase></para></listitem></orderedlist><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">2.2.3</phrase>Ouessant P100<anchor role="start" xml:id="_Toc478378965"><?latex \label{ref-0049}?></anchor></para></listitem></itemizedlist><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">GENCI granted access to the Ouessant prototype at IDRIS in France (installed September 2016). It is composed of 12 IBM Minsky compute nodes with each containing</phrase> <phrase xml:lang="en"><link linkend="_Ref476985408"><?latex {\hyperref[ref-0022]{[7]}}?></link></phrase><phrase xml:lang="en">:</phrase></para><itemizedlist mark="bullet"><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Compute nodes</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">POWER8+ sockets, 10 cores, 8 threads per core (or 160 threads per node)</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">128 GB of DDR4 memory (bandwidth &gt; 9 GB/s per core)</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">4 NVIDIA’s new generation Pascal P100 GPU, 16 GB of HBM2 memory</phrase></para></listitem></orderedlist></listitem><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Interconnect</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">4 NVLink interconnects (40GB/s of bi-directional bandwidth per interconnect); each GPU card is connected to a CPU with 2 NVLink interconnects and another GPU with 2 interconnects remaining</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">A Mellanox EDR InfiniBand CAPI interconnect network (1 interconnect per node)</phrase></para></listitem></orderedlist><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">2.2.4</phrase>Frioul KNL<anchor role="start" xml:id="_Toc478378966"><?latex \label{ref-0050}?></anchor></para></listitem></itemizedlist><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">GENCI also granted access to the Frioul prototype at CINES in France (installed December 2016). It is composed of 48 Intel KNL compute nodes each containing:</phrase></para><itemizedlist mark="bullet"><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Compute nodes</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">7250 KNL, 68 cores, 4 threads per cores</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">192GB of DDR4 memory</phrase></para></listitem><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">16GB of MCDRAM</phrase></para></listitem></orderedlist></listitem><listitem><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">Interconnect:</phrase></para><orderedlist numeration="loweralpha"><listitem override="o"><para role="ListParagraph" css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">A Mellanox EDR 4x InfiniBand</phrase></para></listitem></orderedlist></listitem></itemizedlist><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading2" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-size="14pt">3</phrase>Benchmark suite description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref477340653"><?latex \label{ref-0051}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378967"><?latex \label{ref-0052}?></anchor></para><para css:margin-bottom="6pt" css:text-align="justify"><phrase xml:lang="en">This part will cover each code, presenting the interest for the scientific community as well as the test cases defined for the benchmarks.</phrase></para><para role="NormalPRACE">As an extension to the EUABS, most codes presented in this suite are included in the latter. Exceptions are PFARM which comes from PRACE-2IP <link linkend="_Ref476987482"><?latex {\hyperref[ref-0023]{[8]}}?></link> and SHOC <link linkend="_Ref477368547"><?latex {\hyperref[ref-0026]{[11]}}?></link> a synthetic benchmark suite.</para><para role="NormalPRACE" css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><inlinemediaobject annotations="object_d16349e8616" css:width="294pt" css:height="193pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image2.emf"/></imageobject></inlinemediaobject><inlinemediaobject role="OLEObject" annotations="object_d16349e8616"><imageobject role="Excel.Sheet.12"><imagedata fileref="embeddings/Microsoft_Excel_Worksheet1.xlsx"/></imageobject></inlinemediaobject></para><para role="Caption" css:text-align="left">Table 2 Codes and corresponding APIs available (in green)<anchor role="start" xml:id="_Ref478378316"><?latex \label{ref-0053}?></anchor><anchor role="start" xml:id="_Toc478379039"><?latex \label{ref-0054}?></anchor></para><para role="NormalPRACE"><link linkend="_Ref478378316"><?latex {\hyperref[ref-0053]{Table 2}}?></link> lists the codes that will be presented in the next sections as well as their implementations available. It should be noted that OpenMP can be used with the Intel Xeon Phi architecture while CUDA is used for NVidia GPU cards. OpenCL has been considered as a third alternative that can be used on both architectures. It has been available on the first generation of Xeon Phi (KNC) but has not been ported to the second one (KNL). SHOC is the only code that is impacted, this problem is addressed in section <link linkend="_Ref478378712"><?latex {\hyperref[ref-0151]{4.10}}?></link>.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.1</phrase>Alya<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378968"><?latex \label{ref-0055}?></anchor></para><para role="NormalPRACE">Alya is a high performance computational mechanics code that can solve different coupled mechanics problems: incompressible/compressible flows, solid mechanics, chemistry, excitable media, heat transfer and Lagrangian particle transport. It is one single code. There are no particular parallel or individual platform versions. Modules, services and kernels can be compiled individually and used a la carte. The main discretisation technique employed in Alya is based on the variational multiscale finite element method to assemble the governing equations into Algebraic systems. These systems can be solved using solvers like GMRES, Deflated Conjugate Gradient, pipelined CG together with preconditioners like SSOR, Restricted Additive Schwarz, etc. The coupling between physics solved in different computational domains (like fluid-structure interactions) is carried out in a multi-code way, using different instances of the same executable. Asynchronous coupling can be achieved in the same way in order to transport Lagrangian particles.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.1.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378969"><?latex \label{ref-0056}?></anchor></para><para role="NormalPRACE">The code is parallelised with MPI and OpenMP. Two OpenMP strategies are available, without and with a colouring strategy to avoid ATOMICs during the assembly step. A CUDA version is also available for the different solvers. Alya has been also compiled for MIC (Intel Xeon Phi).</para><para role="NormalPRACE">Alya is written in Fortran 1995 and the incompressible fluid module, present in the benchmark suite, is freely available. This module solves the Navier-Stokes equations using an Orthomin(1) <link linkend="_Ref477369174"><?latex {\hyperref[ref-0029]{[14]}}?></link> method for the pressure Schur complement. This method is an algebraic split strategy which converges to the monolithic solution. At each linearisation step, the momentum is solved twice and the continuity equation is solved once or twice depending whether the momentum preserving or the continuity preserving algorithm is selected.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.1.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378970"><?latex \label{ref-0057}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">Cavity-hexaedra elements (10M elements)</phrase></para><para role="NormalPRACE">This test is the classical lid-driven cavity. The problem geometry is a cube of dimensions 1x1x1. The fluid properties are density=1.0 and viscosity=0.01. Dirichlet boundary conditions are applied on all sides, with three no-slip walls and one moving wall with velocity equal to 1.0, which corresponds to a Reynolds number of 100. The Reynolds number is low so the regime is laminar and turbulence modelling is not necessary. The domain is discretised into 9800344 hexaedra elements. The solvers are the GMRES method for the momentum equations and the Deflated Conjugate Gradient to solve the continuity equation. This test case can be run using pure MPI parallelisation or the hybrid MPI/OpenMP strategy.</para><para role="NormalPRACE"><phrase role="Emphasis">Cavity-hexaedra elements (30M elements)</phrase></para><para role="NormalPRACE">This is the same cavity test as before but with 30M of elements. Note that a mesh multiplication strategy enables one to multiply the number of elements by powers of 8, by simply activating the corresponding option in the ker.dat file.</para><para role="NormalPRACE"><phrase role="Emphasis">Cavity-hexaedra elements-GPU version (10M elements)</phrase></para><para role="NormalPRACE">This is the same test as Test case 1, but using the pure MPI parallelisation strategy with acceleration of the algebraic solvers using GPU.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.2</phrase>Code_Saturne<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378971"><?latex \label{ref-0058}?></anchor></para><para role="NormalPRACE">Code_Saturne is a CFD software package developed by EDF R&amp;D since 1997 and open-source since 2007. The Navier-Stokes equations are discretised following a finite volume method approach. The code can handle any type of mesh built with any type of cell/grid structure. Incompressible and compressible flows can be simulated, with or without heat transfer, and a range of turbulence models is available. The code can also be coupled with itself or other software to model some multi-physics problems (fluid-structure, fluid-conjugate heat transfer, for instance).</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.2.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378972"><?latex \label{ref-0059}?></anchor></para><para role="NormalPRACE">Parallelism is handled by distributing the domain over the processors (several partitioning tools are available, either internally, i.e. SFC Hilbert and Morton, or through external libraries, i.e. METIS Serial, ParMETIS, Scotch Serial, PT-SCOTCH. Communications between subdomains are handled by MPI. Hybrid parallelism using MPI/OpenMP has recently been optimised for improved multicore performance.</para><para role="NormalPRACE">For incompressible simulations, most of the time is spent during the computation of the pressure through Poisson equations. The matrices are very sparse. PETSc has recently been linked to the code to offer alternatives to the internal solvers to compute the pressure. The developer’s version of PETSc supports CUDA and is used in this benchmark suite.</para><para role="NormalPRACE">Code_Saturne is written in C, F95 and Python. It is freely available under the GPL license.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.2.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378973"><?latex \label{ref-0060}?></anchor></para><para role="NormalPRACE">Two test cases are dealt with, the former with a mesh made of hexahedral cells and the latter with a mesh made of tetrahedral cells. Both configurations are meant for incompressible laminar flows. The first test case is run on KNL in order to test the performance of the code always completely filling up a node using 64 MPI tasks and then either 1, 2, 4 OpenMP threads, or 1, 2, 4 extra MPI tasks to investigate the effect of hyper-threading. In this case, the pressure is computed using the code's native Algebraic Multigrid (AMG) algorithm as a solver. The second test case is run on KNL and GPU. In this configuration, the pressure equation is solved using the conjugate gradient (CG) algorithm from the PETSc library (the version of PETSc is the developer's version which supports GPU) and tests are run on KNL as well as on CPU+GPU. PETSc is built with the CUSP library and the CUSP format is used.</para><para role="NormalPRACE">Note that computing the pressure using a CG algorithm has always been slower than using the native AMG algorithm, when using Code_Saturne. The second test is then meant to compare the current results obtained on KNL and GPU using CG only, and not to compare CG and AMG time to solution.</para><para role="NormalPRACE"><phrase role="Emphasis">Flow in a 3-D lid-driven cavity (tetrahedral cells)</phrase></para><para role="NormalPRACE">The geometry is very simple, i.e. a cube, but the mesh is built using tetrahedral cells only. The Reynolds number is set to 100, and symmetry boundary conditions are applied in the spanwise direction. The case is modular and the mesh size can easily been varied. The largest mesh has about 13 million cells and is used to get some first comparisons using Code_Saturne linked to the developer's PETSc library, in order to get use of the GPU.</para><para role="NormalPRACE"><phrase role="Emphasis">3-D Taylor-Green vortex flow (hexahedral cells)</phrase></para><para role="NormalPRACE">The Taylor-Green vortex flow is traditionally used to assess the accuracy of CFD code numerical schemes. Periodicity is used in the 3 directions. The total kinetic energy (integral of the velocity) and enstrophy (integral of the vorticity) evolutions as a function of the time are looked at. Code_Saturne is set for 2nd order time and spatial schemes. The mesh size is 2563 cells.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.3</phrase>CP2K<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378974"><?latex \label{ref-0061}?></anchor></para><para role="NormalPRACE">CP2K is a quantum chemistry and solid state physics software package that can perform atomistic simulations of solid state, liquid, molecular, periodic, material, crystal, and biological systems. It can perform molecular dynamics, metadynamics, Quantum Monte Carlo, Ehrenfest dynamics, vibrational analysis, core level spectroscopy, energy minimisation, and transition state optimisation using NEB or dimer method.</para><para role="NormalPRACE">CP2K provides a general framework for different modelling methods such as density functional theory (DFT) using the mixed Gaussian and plane waves approaches (GPW) and Gaussian and Augmented Plane (GAPW). Supported theory levels include DFTB, LDA, GGA, MP2, RPA, semi-empirical methods (AM1, PM3, PM6, RM1, MNDO, …), and classical force fields (AMBER, CHARMM, …).</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.3.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378975"><?latex \label{ref-0062}?></anchor></para><para role="NormalPRACE">Parallelisation is achieved using a combination of OpenMP-based multi-threading and MPI.</para><para role="NormalPRACE">Offloading for accelerators is implemented through CUDA and OpenCL for GPU and through OpenMP for MIC (Intel Xeon Phi).</para><para role="NormalPRACE">CP2K is written in Fortran 2003 and freely available under the GPL license.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.3.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378976"><?latex \label{ref-0063}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">LiH-HFX</phrase></para><para role="NormalPRACE">This is a single-point energy calculation for a particular configuration of a 216 atom Lithium Hydride crystal with 432 electrons in a 12.3 Å<superscript>3</superscript> (Angstroms cubed) cell. The calculation is performed using a DFT algorithm with GAPW under the hybrid Hartree-Fock exchange (HFX) approximation. These types of calculations are generally around one hundred times the computational cost of a standard local DFT calculation, although the cost of the latter can be reduced by using the Auxiliary Density Matrix Method (ADMM). Using OpenMP is of particular benefit here as the HFX implementation requires a large amount of memory to store partial integrals. By using several threads, fewer MPI processes share the available memory on the node and thus enough memory is available to avoid recomputing any integrals on-the-fly, improving performance</para><para role="NormalPRACE">This test case is expected to scale efficiently to 1000+ nodes.</para><para role="NormalPRACE"><phrase role="Emphasis">H2O-DFT-LS</phrase></para><para role="NormalPRACE">This is a single-point energy calculation for 2048 water molecules in a 39 Å<superscript>3</superscript> box using linear-scaling DFT. A local-density approximation (LDA) functional is used to compute the Exchange-Correlation energy in combination with a DZVP MOLOPT basis set and a 300 Ry cutoff. For large systems, the linear-scaling approach for solving Self-Consistent-Field equations should be much cheaper computationally than using standard DFT, and allow scaling up to 1 million atoms for simple systems. The linear scaling cost results from the fact that the algorithm is based on an iteration on the density matrix. The cubically-scaling orthogonalisation step of standard DFT is avoided and key operations are sparse matrix-matrix multiplications, which have a number of non-zero entries that scale linearly with system size. These are implemented efficiently in CP2K's DBCSR library.</para><para role="NormalPRACE">This test case is expected to scale efficiently to 4000+ nodes.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.4</phrase>GPAW<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378977"><?latex \label{ref-0064}?></anchor></para><para role="NormalPRACE">GPAW is a DFT program for ab-initio electronic structure calculations using the projector augmented wave method. It uses a uniform real-space grid representation of the electronic wavefunctions, that allows for excellent computational scalability and systematic converge properties.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.4.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378978"><?latex \label{ref-0065}?></anchor></para><para role="NormalPRACE">GPAW is written mostly in Python, but includes also computational kernels written in C as well as leveraging external libraries such as NumPy, BLAS and ScaLAPACK. Parallelisation is based on message-passing using MPI with no threading. Development branches for GPU and MICs include support for offloading to accelerators using either CUDA or pyMIC, respectively. GPAW is freely available under the GPL license.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.4.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378979"><?latex \label{ref-0066}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">Carbon Nanotube</phrase></para><para role="NormalPRACE">This test case is a ground state calculation for a carbon nanotube in vacuum. By default, it uses a 6-6-10 nanotube with 240 atoms (freely adjustable) and serial LAPACK with an option to use ScaLAPACK.</para><para role="NormalPRACE">This benchmark is aimed at smaller systems, with an intended scaling range of up to 10 nodes.</para><para role="NormalPRACE"><phrase role="Emphasis">Copper Filament</phrase></para><para role="NormalPRACE">This test case is a ground state calculation for a copper filament in vacuum. By default, it uses a 2x2x3 FCC lattice with 71 atoms (freely adjustable) and ScaLAPACK for parallelisation.</para><para role="NormalPRACE">This benchmark is aimed at larger systems, with an intended scaling range of up to 100 nodes. A lower limit on the number of nodes may be imposed by the amount of memory required, which can be adjusted to some extent with the run parameters (e.g. lattice size or grid spacing).</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.5</phrase>GROMACS<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378980"><?latex \label{ref-0067}?></anchor></para><para role="NormalPRACE">GROMACS is a versatile package to perform molecular dynamics, i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles.</para><para role="NormalPRACE">It is primarily designed for biochemical molecules like proteins, lipids and nucleic acids that have a lot of complicated bonded interactions, but since GROMACS is extremely fast at calculating the nonbonded interactions (that usually dominate simulations) many groups are also using it for research on non-biological systems, e.g. polymers.</para><para role="NormalPRACE">GROMACS supports all the usual algorithms you expect from a modern molecular dynamics implementation, and some additional features:</para><para role="NormalPRACE">GROMACS provides extremely high performance compared to all other programs. A lot of algorithmic optimisations have been introduced in the code; for instance, the calculation of the virial has been extracted from the innermost loops over pairwise interactions, and we use our own software routines to calculate the inverse square root. In GROMACS 4.6 and up, on almost all common computing platforms, the innermost loops are written in C using intrinsic functions that the compiler transforms to SIMD machine instructions, to utilise the available instruction-level parallelism. These kernels are available in both single and double precision, and support all different kinds of SIMD support found in x86-family (and other) processors.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.5.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378981"><?latex \label{ref-0068}?></anchor></para><para role="NormalPRACE">Parallelisation is achieved using combined OpenMP and MPI.</para><para role="NormalPRACE">Offloading for accelerators is implemented through CUDA for GPU and through OpenMP for MIC (Intel Xeon Phi).</para><para role="NormalPRACE">GROMACS is written in C/C++ and freely available under the GPL license.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.5.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378982"><?latex \label{ref-0069}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">GluCL Ion Channel</phrase></para><para role="NormalPRACE">The ion channel system is the membrane protein GluCl, which is a pentameric chloride channel embedded in a lipid bilayer. The GluCl ion channel was embedded in a DOPC membrane and solvated in TIP3P water. This system contains 142k atoms, and is a quite challenging parallelisation case due to the small size. However, it is likely one of the most wanted target sizes for biomolecular simulations due to the importance of these proteins for pharmaceutical applications. It is particularly challenging due to a highly inhomogeneous and anisotropic environment in the membrane, which poses hard challenges for load balancing with domain decomposition.</para><para role="NormalPRACE">This test case was used as the “Small” test case in previous 2IP and 3IP PRACE phases. It is included in the package's version 5.0 benchmark cases. It is reported to scale efficiently up to 1000+ cores on x86 based systems.</para><para role="NormalPRACE"><phrase role="Emphasis">Lignocellulose</phrase></para><para role="NormalPRACE">A model of cellulose and lignocellulosic biomass in an aqueous solution <link linkend="_Ref476989175"><?latex {\hyperref[ref-0024]{[9]}}?></link>. This system of 3.3 million atoms is inhomogeneous. This system uses reaction-field electrostatics instead of PME and therefore scales well on x86. This test case was used as the “Large” test case in previous PRACE 2IP and 3IP projects. It is reported in previous PRACE projects to scale efficiently up to 10000+ x86 cores.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.6</phrase>NAMD<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378983"><?latex \label{ref-0070}?></anchor></para><para role="NormalPRACE">NAMD is a widely used molecular dynamics application designed to simulate bio-molecular systems on a wide variety of compute platforms. NAMD is developed by the “Theoretical and Computational Biophysics Group” at the University of Illinois at Urbana Champaign. In the design of NAMD particular emphasis has been placed on scalability when utilising a large number of processors. The application can read a wide variety of different file formats, for example force fields, protein structures, which are commonly used in bio-molecular science. A NAMD license can be applied for on the developer’s website free of charge. Once the license has been obtained, binaries for a number of platforms and the source can be downloaded from the website. Deployment areas of NAMD include pharmaceutical research by academic and industrial users. NAMD is particularly suitable when the interaction between a number of proteins or between proteins and other chemical substances is of interest. Typical examples are vaccine research and transport processes through cell membrane proteins.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.6.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378984"><?latex \label{ref-0071}?></anchor></para><para role="NormalPRACE">NAMD is written in C++ and parallelised using Charm++ parallel objects, which are implemented on top of MPI, supporting both pure MPI and hybrid parallelisation <link linkend="_Ref476989447"><?latex {\hyperref[ref-0025]{[10]}}?></link>.</para><para role="NormalPRACE">Offloading for accelerators is implemented for both GPU and MIC (Intel Xeon Phi).</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.6.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378985"><?latex \label{ref-0072}?></anchor></para><para role="NormalPRACE">The datasets are based on the original "Satellite Tobacco Mosaic Virus (STMV)" dataset from the official NAMD site. The memory optimised build of the package and data sets are used in benchmarking. Data are converted to the appropriate binary format used by the memory optimised build.</para><para role="NormalPRACE"><phrase role="Emphasis">STMV.1M</phrase></para><para role="NormalPRACE">This is the original STMV dataset from the official NAMD site. The system contains roughly 1 million atoms. This data set scales efficiently up to 1000+ x86 Ivy Bridge cores.</para><para role="NormalPRACE"><phrase role="Emphasis">STMV.8M</phrase></para><para role="NormalPRACE">This is a 2x2x2 replication of the original STMV dataset from the official NAMD site. The system contains roughly 8 million atoms. This data set scales efficiently up to 6000 x86 Ivy Bridge cores.</para><para role="NormalPRACE">STMV.28M</para><para role="NormalPRACE">This is a 3x3x3 replication of the original STMV dataset from the official NAMD site. The system contains roughly 28 million atoms. This data set also scales efficiently up to 6000 x86 Ivy Bridge cores.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.7</phrase>PFARM<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378986"><?latex \label{ref-0073}?></anchor></para><para role="NormalPRACE">PFARM is part of a suite of programs based on the ‘R-matrix’ ab-initio approach to the varitional solution of the many-electron Schrödinger equation for electron-atom and electron-ion scattering. The package has been used to calculate electron collision data for astrophysical applications (such as: the interstellar medium, planetary atmospheres) with, for example, various ions of Fe and Ni and neutral O, plus other applications such as data for plasma modelling and fusion reactor impurities. The code has recently been adapted to form a compatible interface with the UKRmol suite of codes for electron (positron) molecule collisions thus enabling large-scale parallel ‘outer-region’ calculations for molecular systems as well as atomic systems.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.7.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378987"><?latex \label{ref-0074}?></anchor></para><para role="NormalPRACE">In order to enable efficient computation, the external region calculation takes place in two distinct stages, named EXDIG and EXAS, with intermediate files linking the two. EXDIG is dominated by the assembly of sector Hamiltonian matrices and their subsequent eigensolutions. EXAS uses a combined functional/domain decomposition approach where good load-balancing is essential to maintain efficient parallel performance. Each of the main stages in the calculation is written in Fortran 2003 (or Fortran 2003-compliant Fortran 95), is parallelised using MPI and is designed to take advantage of highly optimised, numerical library routines. Hybrid MPI / OpenMP parallelisation has also been introduced into the code via shared memory enabled numerical library kernels.</para><para role="NormalPRACE">Accelerator-based implementations have been implemented for both EXDIG and EXAS. EXAS uses offloading via MAGMA (or MKL) for sector Hamiltonian diagonalisations on Intel Xeon Phi and GPU accelerators. EXDIG uses combined MPI and OpenMP to distribute the scattering energy calculations on CPU efficiently both across and within Intel Xeon Phi co-processors.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.7.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378988"><?latex \label{ref-0075}?></anchor></para><para role="NormalPRACE">External region R-matrix propagations take place over the outer partition of configuration space, including the region where long-range potentials remain important. The radius of this region is determined from the user input and the program decides upon the best strategy for dividing this space into multiple sub-regions (or sectors). Generally, a choice of larger sector lengths requires the application of larger numbers of basis functions (and therefore larger Hamiltonian matrices) in order to maintain accuracy across the sector and vice-versa. Memory limits on the target hardware may determine the final preferred configuration for each test case.</para><para role="NormalPRACE"><phrase role="Emphasis">Iron, FeIII</phrase></para><para role="NormalPRACE">This is an electron-ion scattering case with 1181 channels. Hamiltonian assembly in the coarse region applies 10 Legendre functions leading to Hamiltonian matrix diagonalisations of order 11810. In the ‘fine energy region’ up to 30 Legendre functions may be applied leading to Hamiltonian matrices of up to order 35430. The number of sector calculations is likely to range from about 15 to over 30 depending on the user specifications. Several thousand scattering energies are used in the calculation. </para><para role="NormalPRACE"><phrase role="Emphasis">Methane, CH4</phrase></para><para role="NormalPRACE">The dataset is an electron-molecule calculation with 1361 channels. Hamiltonian dimensions are therefore estimated between 13610 and ~40000.  A process in the code which splits the constituent channels according to spin can be used to approximately halve the Hamiltonian size (whilst doubling the overall number of Hamiltonian matrices). As eigensolvers generally require O(N3) operations, spin splitting leads to a saving in both memory requirements and operation count. The final radius of the external region required is relatively long, leading to more numerous sectors calculations (estimated to between 20 and 30). The calculation will require many thousands of scattering energies.</para><para role="NormalPRACE">In the current model, parallelism in EXDIG is limited to the number of sector calculations, i.e a maximum of around 30 accelerator nodes. </para><para role="NormalPRACE">Methane is a relatively new dataset which has not been calculated on novel technology platforms at the very large-scale to date, so this is somewhat a step into the unknown. We are also somewhat reliant on collaborative partners that are not associated with PRACE for continuing to develop and fine tune the accelerator-based EXAS program for this proposed work. Access to suitable hardware with throughput suited to development cycles is also a necessity if suitable progress is to be ensured.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.8</phrase>QCD<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378989"><?latex \label{ref-0076}?></anchor></para><para role="NormalPRACE">Matter consists of atoms, which in turn consist of nuclei and electrons. The nuclei consist of neutrons and protons, which comprise quarks bound together by gluons.</para><para role="NormalPRACE">The theory of how quarks and gluons interact to form nucleons and other elementary particles is called Quantum Chromo Dynamics (QCD). For most problems of interest, it is not possible to solve QCD analytically, and instead numerical simulations must be performed. Such “Lattice QCD” calculations are very computationally intensive, and occupy a significant percentage of all HPC resources worldwide.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.8.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378990"><?latex \label{ref-0077}?></anchor></para><para role="NormalPRACE">The QCD benchmark benefits of two different implementations described below.</para><para role="NormalPRACE"><phrase role="Emphasis">First implementation</phrase></para><para role="NormalPRACE">The MILC code is a freely-available suite for performing Lattice QCD simulations, developed over many years by a collaboration of researchers <link linkend="_Ref477371577"><?latex {\hyperref[ref-0030]{[15]}}?></link>.</para><para role="NormalPRACE">The benchmark used here is derived from the MILC code (v6), and consists of a full conjugate gradient solution using Wilson fermions. The benchmark is consistent with “QCD kernel E” in the full UAEBS, and has been adapted so that it can efficiently use accelerators as well as traditional CPU.</para><para role="NormalPRACE">The implementation for accelerators has been achieved using the “targetDP” programming model <link linkend="_Ref477371673"><?latex {\hyperref[ref-0031]{[16]}}?></link>, a lightweight abstraction layer designed to allow the same application source code to be able to target multiple architectures, e.g. NVIDIA GPU and multicore/manycore CPU, in a performance portable manner. The targetDP syntax maps, at compile time, to either NVIDIA CUDA (for execution on GPU) or OpenMP+vectorisation (for implementation on multi/manycore CPU including Intel Xeon Phi). The base language of the benchmark is C and MPI is used for node-level parallelism.</para><para role="NormalPRACE"><phrase role="Emphasis">Second implementation</phrase></para><para role="NormalPRACE">The QCD Accelerator Benchmark suite Part 2 consists of two kernels, the QUDA <link linkend="_Ref477103549"><?latex {\hyperref[ref-0027]{[12]}}?></link> and the QPhix <link linkend="_Ref477103568"><?latex {\hyperref[ref-0028]{[13]}}?></link> library. The library QUDA is based on CUDA and optimize for running on NVIDIA GPU <link linkend="_Ref477371810"><?latex {\hyperref[ref-0032]{[17]}}?></link>. The QPhix library consists of routines which are optimize to use INTEL intrinsic functions of multiple vector length, including optimized routines for KNC and KNL's <link linkend="_Ref477371895"><?latex {\hyperref[ref-0033]{[18]}}?></link>. In both QUDA and QPhix, the benchmark kernel uses the conjugate gradient solvers implemented within the libraries.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.8.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378991"><?latex \label{ref-0078}?></anchor></para><para role="NormalPRACE">Lattice QCD involves discretisation of space-time into a lattice of points, where the extent of the lattice in each of the 3 spatial and 1 temporal dimensions can be chosen. This means that the benchmark is very flexible, where the size of the lattice can be varied with the size of the computing system in use (weak scaling) or can be fixed (strong scaling). For testing on a single node, then 64x64x32x8 is a reasonable size, since this fits on a single Intel Xeon Phi or a single GPU. For larger numbers of nodes, the lattice extents can be increased accordingly, keeping the geometric shape roughly similar. Test cases for the second implementation are given by a strong-scaling mode with a lattice size of 32x32x32x96 and 64x64x64x128 and a weak scaling mode with a local lattice size of 48x48x48x24.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.9</phrase>Quantum Espresso<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378992"><?latex \label{ref-0079}?></anchor></para><para role="NormalPRACE">QUANTUM ESPRESSO is an integrated suite of computer codes for electronic-structure calculations and materials modelling, based on density-functional theory, plane waves, and pseudopotentials (norm-conserving, ultrasoft, and projector-augmented wave). QUANTUM ESPRESSO stands for <phrase role="Emphasis">opEn Source Package for Research in Electronic Structure, Simulation, and Optimisation</phrase>. It is freely available to researchers around the world under the terms of the GNU General Public License. QUANTUM ESPRESSO builds upon newly restructured electronic-structure codes that have been developed and tested by some of the original authors of novel electronic-structure algorithms and applied in the last twenty years by some of the leading materials modelling groups worldwide. Innovation and efficiency are still its main focus, with special attention paid to massively parallel architectures, and a great effort being devoted to user friendliness. QUANTUM ESPRESSO is evolving towards a distribution of independent and inter-operable codes in the spirit of an open-source project, where researchers active in the field of electronic-structure calculations are encouraged to participate in the project by contributing their own codes or by implementing their own ideas into existing codes.</para><para role="NormalPRACE">QUANTUM ESPRESSO is written mostly in Fortran90, and parallelised using MPI and OpenMP and is released under a GPL license.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.9.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378993"><?latex \label{ref-0080}?></anchor></para><para role="NormalPRACE">During 2011 a GPU-enabled version of Quantum ESPRESSO was publicly released. The code is currently developed and maintained by Filippo Spiga at the High Performance Computing Service - University of Cambridge (United Kingdom) and Ivan Girotto at the International Centre for Theoretical Physics (Italy). The initial work has been supported by the EC-funded PRACE and a SFI (Science Foundation Ireland, grant 08/HEC/I1450). At the time of writing, the project is self-sustained thanks to the dedication of the people involved and thanks to NVIDIA support in providing hardware and expertise in GPU programming.</para><para role="NormalPRACE">The current public version of QE-GPU is 14.10.0 as it is the last version maintained as plug-in working on all QE 5.x versions. QE-GPU utilised phiGEMM (external) for CPU+GPU GEMM computation, MAGMA (external) to accelerate eigen-solvers and explicit CUDA kernel to accelerate compute-intensive routines. FFT capabilities on GPU are available only for serial computation due to the hard challenges posed in managing accelerators in the parallel distributed 3D-FFT portion of the code where communication is the dominant element that limits excellent scalability beyond hundreds of MPI ranks.</para><para role="NormalPRACE">A version for Intel Xeon Phi (MIC) accelerators is not currently available.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.9.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378994"><?latex \label{ref-0081}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">PW-IRMOF_M11</phrase></para><para role="NormalPRACE">Full SCF calculation of a Zn-based isoreticular metal–organic framework (total 130 atoms) over 1 K point.  Benchmarks run in 2012 demonstrated speedups due to GPU (NVIDIA K20s, with respect to non-accelerated nodes) in the range 1.37 – 1.87, according to node count (maximum number of accelerators=8). Runs with current hardware technology and an updated version of the code are expected to exhibit higher speedups (probably 2-3x) and scale up to a couple hundred nodes.</para><para role="NormalPRACE"><phrase role="Emphasis">PW-SiGe432</phrase></para><para role="NormalPRACE">This is a SCF calculation of a Silicon-Germanium crystal with 430 atoms. Being a fairly large system, parallel scalability up to several hundred, perhaps a 1000 nodes is expected, with accelerated speed-ups likely to be of 2-3x.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.10</phrase>Synthetic benchmarks – SHOC<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378995"><?latex \label{ref-0082}?></anchor></para><para role="NormalPRACE">The Accelerator Benchmark Suite will also include a series of synthetic benchmarks. For this purpose, we choose the Scalable HeterOgeneous Computing (SHOC) benchmark suite, augmented with a series of benchmark examples developed internally. SHOC is a collection of benchmark programs testing the performance and stability of systems using computing devices with non-traditional architectures for general purpose computing. Its initial focus is on systems containing GPU and multi-core processors, and on the OpenCL programming standard, but CUDA and OpenACC versions were added. Moreover, a subset of the benchmarks is optimised for the Intel Xeon Phi coprocessor. SHOC can be used on clusters as well as individual hosts.</para><para role="NormalPRACE">The SHOC benchmark suite currently contains benchmark programs categorised by complexity.  Some measure low-level 'feeds and speeds' behaviour (Level 0), some measure the performance of a higher-level operation such as a Fast Fourier Transform (FFT) (Level 1), and the others measure real application kernels (Level 2).</para><para role="NormalPRACE">The SHOC benchmark suite has been selected to evaluate the performance of accelerators on synthetic benchmarks, mostly because SHOC provides CUDA/OpenCL/Offload/OpenACC variants of the benchmarks. This allowed us to evaluate NVIDIA GPU (with CUDA/OpenCL/OpenACC), Intel Xeon Phi KNC (with both Offload and OpenCL), but also Intel host CPU (with OpenCL/OpenACC). However, on the latest Xeon Phi processor (codenamed KNL) none of these 4 models is supported. Thus, benchmarks on the KNL architecture can not be run at this point, and there aren't any news of Intel supporting OpenCL on the KNL. However, there is work in progress on the PGI compiler to support the KNL as a target. This support will be added during 2017. This will allow us to compile and run the OpenACC benchmarks for the KNL. Alternatively, the OpenACC benchmarks will be ported to OpenMP and executed on the KNL.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.10.1</phrase>Code description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378996"><?latex \label{ref-0083}?></anchor></para><para role="NormalPRACE">All benchmarks are MPI-enabled. Some will report aggregate metrics over all MPI ranks, others will only perform work for specific ranks.</para><para role="NormalPRACE">Offloading for accelerators is implemented through CUDA and OpenCL for GPU and through OpenMP for MIC (Intel Xeon Phi). For selected benchmarks OpenACC implementations are provided for GPU. Multi-node parallelisation is achieved using MPI.</para><para role="NormalPRACE">SHOC is written in C++ and is open-source and freely available.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.10.2</phrase>Test cases description<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378997"><?latex \label{ref-0084}?></anchor></para><para role="NormalPRACE">The benchmarks contained in SHOC currently feature 4 different sizes for increasingly large systems. The size convention is as follows:</para><orderedlist numeration="arabic"><listitem override="1."><para role="NormalPRACE">CPU / debugging</para></listitem><listitem override="2."><para role="NormalPRACE">Mobile/integrated GPU</para></listitem><listitem override="3."><para role="NormalPRACE">Discrete GPU (e.g. GeForce or Radeon series)</para></listitem><listitem override="4."><para role="NormalPRACE">HPC-focused or large memory GPU (e.g. Tesla or Firestream Series)</para></listitem></orderedlist><para role="NormalPRACE">In order to go even larger scale, we plan to add a 5th level for massive supercomputers.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">3.11</phrase>SPECFEM3D<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378998"><?latex \label{ref-0085}?></anchor></para><para role="NormalPRACE">The software package SPECFEM3D simulates three-dimensional global and regional seismic wave propagation based upon the spectral-element method (SEM). All SPECFEM3D_GLOBE software is written in Fortran90 with full portability in mind, and conforms strictly to the Fortran95 standard. It uses no obsolete or obsolescent features of Fortran77. The package uses parallel programming based upon the Message Passing Interface (MPI).</para><para role="NormalPRACE">The SEM was originally developed in computational fluid dynamics and has been successfully adapted to address problems in seismic wave propagation. It is a continuous Galerkin technique, which can easily be made discontinuous; it is then close to a particular case of the discontinuous Galerkin technique, with optimised efficiency because of its tensorised basis functions. In particular, it can accurately handle very distorted mesh elements. It has very good accuracy and convergence properties. The spectral element approach admits spectral rates of convergence and allows exploiting hp-convergence schemes. It is also very well suited to parallel implementation on very large supercomputers as well as on clusters of GPU accelerating graphics cards. Tensor products inside each element can be optimised to reach very high efficiency, and mesh point and element numbering can be optimised to reduce processor cache misses and improve cache reuse. The SEM can also handle triangular (in 2D) or tetrahedral (3D) elements as well as mixed meshes, although with increased cost and reduced accuracy in these elements, as in the discontinuous Galerkin method.</para><para role="NormalPRACE">In many geological models in the context of seismic wave propagation studies (except for instance for fault dynamic rupture studies, in which very high frequencies of supershear rupture need to be modelled near the fault) a continuous formulation is sufficient because material property contrasts are not drastic and thus conforming mesh doubling bricks can efficiently handle mesh size variations. This is particularly true at the scale of the full earth. Effects due to lateral variations in compressional-wave speed, shear-wave speed, density, a 3D crustal model, ellipticity, topography and bathyletry, the oceans, rotation, and self-gravitation are included. The package can accommodate full 21-parameter anisotropy as well as lateral variations in attenuation. Adjoint capabilities and finite-frequency kernel simulations are also included.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">3.11.1</phrase>Test cases definition<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478378999"><?latex \label{ref-0086}?></anchor></para><para role="NormalPRACE">Both test cases will use the same input data. A 3D shear-wave speed model (S362ANI) will be used to benchmark the code.</para><para role="NormalPRACE">Here is an explanation of the simulation parameters that will be used to size the test case:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE"><phrase role="Emphasis">NCHUNKS,</phrase> number of face of the cubed sphere included in the simulation (will be always 6)</para></listitem><listitem><para role="NormalPRACE"><phrase role="Emphasis">NPROC_XI</phrase>, number of slice along one chunk of the cubed sphere (will represents also the number of processors used for 1 chunk</para></listitem><listitem><para role="NormalPRACE"><phrase role="Emphasis">NEX_XI</phrase>, number of spectral elements along one side of a chunk.</para></listitem><listitem><para role="NormalPRACE"><phrase role="Emphasis">RECORD_LENGHT_IN_MINUTES,</phrase> length of the simulated seismograms. The time of the simulation should vary linearly with this parameter.</para></listitem></itemizedlist><para role="NormalPRACE"><phrase role="Emphasis">Small test case</phrase> </para><para role="NormalPRACE">It runs with 24 MPI tasks and has the following mesh characteristics: </para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">NCHUCKS=6</para></listitem><listitem><para role="NormalPRACE">NPROC_XI=2</para></listitem><listitem><para role="NormalPRACE">NEX_XI =80 </para></listitem><listitem><para role="NormalPRACE">RECORD_LENGHT_IN_MINUTES =2.0</para></listitem></itemizedlist><para role="NormalPRACE"><phrase role="Emphasis">Bigger test case</phrase> </para><para role="NormalPRACE">It runs with 150 MPI tasks and has the following mesh characteristics: </para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">NCHUCKS=6</para></listitem><listitem><para role="NormalPRACE">NPROC_XI=5</para></listitem><listitem><para role="NormalPRACE">NEX_XI =80 </para></listitem><listitem><para role="NormalPRACE">RECORD_LENGHT_IN_MINUTES =2.0</para></listitem></itemizedlist><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading2" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-size="14pt">4</phrase>Applications performances<phrase role="CommentReference" css:font-family="Times New Roman" css:font-weight="normal" xml:lang="de"><annotation><info><author><personname><othername role="display-name">Victor Cameo</othername><othername role="initials">VC</othername></personname></author><date>2017-03-19T19:18:00Z</date></info><para role="CommentText"><phrase role="CommentReference" css:font-size="12pt">Faire un tableau récap</phrase></para></annotation></phrase><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref477340707"><?latex \label{ref-0087}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379000"><?latex \label{ref-0088}?></anchor></para><para role="NormalPRACE">This section presents some sample results on targeted machines.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.1</phrase>Alya<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379001"><?latex \label{ref-0089}?></anchor></para><para role="NormalPRACE">Alya has been compiled and run using test case A on three different types of compute nodes:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">BSC MinoTauro Westemere Partition (Intel E5649 12 core 2.53 GHz, 24 GB RAM, Infiniband)</para></listitem><listitem><para role="NormalPRACE">BSC MinoTauro Haswell + K80 Partition (Intel Xeon E5-2630 v3 16 core 2.4 GHz, 128 GB RAM, NVIDIA K80, Infiniband)</para></listitem><listitem><para role="NormalPRACE">KNL 7250 (68 core 1.40 GHz, 16 GB MCDRAM, 96BG DDR4 RAM, Ethernet)</para></listitem></itemizedlist><para role="NormalPRACE">Alya supports parallelism via different options, mainly MPI for problem decomposition, OpenMP within the matrix construction phase and CUDA parallelism for selected solvers. In general, the best distribution and performance can be achieved by using MPI. Running on KNL it has been proven optimal to use 4 OpenMP threads and 16 MPI processes for a total of 64 processes, each on its own physical core. The Xeon Phi processor shows slightly better performance in Alya configured in Quadrant/Cache when compared to Quadrant/Flat, although the difference is negligible. The application is not optimized for the first generation Xeon Phi KNC and does not support offloading.</para><para role="NormalPRACE">Overall speedups have been compared to a one node CPU run on the Haswell partition of MinoTauro. As the application is heavily optimized for traditional computation the best and almost linear scaling is observed on the CPU only runs. Some calculations benefit from the accelerators, GPU yielding from 3.6x to 6.5x speedup for one to three nodes. The KNL runs are limited by the OpenMP scalability and too many MPI tasks on these processors lead to suboptimal scaling. Speedups in this case range from 0.9x to 1.6x and can be further optimized by introducing more threading parallelism. The communication overhead when running with many MPI tasks on KNL is noticeable and further limited by the ethernet connection on multinode runs. High-performance fabrics such as Omni-Path or Infiniband promise to provide significant enhancement for these cases. The results are compared in <link linkend="_Ref478141367"><?latex {\hyperref[ref-0092]{Figure 3}}?></link>.</para><para role="NormalPRACE">It can be seen that the best performance is gained on the most recent standard Xeon CPU in conjunction with GPU. This is expected as Alya has been heavily optimized for traditional HPC scalability using mainly MPI and makes good use of available cores. The addition of GPU enabled solvers provides a noticeable boost to the overall performance. To fully exploit the KNL further optimizations are ongoing and additional OpenMP parallelism will need to be employed.</para><figure><title>Figure 1 Shows the matrix construction part of Alya that is parallelised with OpenMP and benefits significantly from the many cores available on KNL.<anchor role="start" xml:id="_Toc478379015"><?latex \label{ref-0090}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image3.png" css:width="453.5pt" css:height="486.45pt"/></imageobject></mediaobject></figure><figure><title>Figure 2 Demonstrates the scalability of the code. As expected Haswell cores with K80 GPU are high-performing while the KNL port is currently being optimized further.<anchor role="start" xml:id="_Toc478379016"><?latex \label{ref-0091}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image4.png" css:width="453.5pt" css:height="475.7pt"/></imageobject></mediaobject></figure><figure><title>Figure 3 Best performance is achieved with GPU in combination with powerful CPU cores. Single thread performance has a big impact on the speedup, both threading and vectorization are employed for additional performance.<anchor role="start" xml:id="_Ref478141367"><?latex \label{ref-0092}?></anchor><anchor role="start" xml:id="_Toc478379017"><?latex \label{ref-0093}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image5.png" css:width="453.5pt" css:height="445.4pt"/></imageobject></mediaobject></figure><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.2</phrase>Code_Saturne<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379002"><?latex \label{ref-0094}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">Description runtime architecture:</phrase></para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE"><phrase role="PageNumber">KNL: ARCHER (model 7210) - The following environment is used, i.e. ENV_6.0.3. The INTEL compiler's version is 17.0.0.098.</phrase></para></listitem><listitem><para role="NormalPRACE"><phrase role="PageNumber">GPU: 2 POWER8 nodes, i.e. S822LC (2x P8 10-cores + 2x K80 (2 G210 per K80)) and S824L (2x P8 12-cores + 2x K40 (1 G180 per K40)) - The compiler is at/8.0, the MPI distribution openmpi/1.8.8 and the CUDA compiler's version is 7.5.</phrase></para></listitem></itemizedlist><para role="NormalPRACE"><phrase role="Emphasis">3-D Taylor-Green vortex flow (hexahedral cells)</phrase></para><para role="NormalPRACE">The first test case has been run on ARCHER KNL and the performance has been investigated for several configurations, each of them using 64 MPI tasks per node and either 1, 2 or 4 hyper-threads (extra MPI tasks) or OpenMP threads have been added for testing. The results are compared to ARCHER CPU, in this case IvyBridge CPU. Up to 8 nodes are used for comparison.</para><figure><title>Figure 4 Code_Saturne's performance on KNL. AMG is used as a solver in V4.2.2.<anchor role="start" xml:id="_Ref477440013"><?latex \label{ref-0095}?></anchor><anchor role="start" xml:id="_Toc478379018"><?latex \label{ref-0096}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image6.png" css:width="469.8411811023622pt" css:height="417.8138582677165pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref477440013"><?latex {\hyperref[ref-0095]{Figure 4}}?></link> shows the CPU time per time step as a function of the number threads/MPI tasks. For all the cases, the time to solution decreases when the number of threads increases. For the case using MPI only and no hyper-threading (green line) only, a simulation is run on half a node as well to investigate the speedup going from half a node to a node, which is about 2 as seen on the figure. The ellipses help comparing the time to solution per node, and finally, a comparison is carried out with simulations run on ARCHER without KNL, using Ivybridge processors. When using 8 nodes, the best configuration for Code_Saturne to run on KNL is for 64 MPI tasks and 2 OpenMP threads per task (blue line on the figure), which is about 15 to 20% faster than running on the Ivybridge nodes, using the same number of nodes.</para><para role="NormalPRACE"><phrase role="Emphasis">Flow in a 3-D lid-driven cavity (tetrahedral cells)</phrase></para><para role="NormalPRACE">The following options are used for PETSc: -</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">-CPU: -ksp_type = cg and -pc_type = jacobi</para></listitem><listitem><para role="NormalPRACE">-GPU: -ksp_type = cg and -vec_type = cusp and -mat_type = aijcusp and -pc_type = jacobi</para></listitem></itemizedlist><para role="NormalPRACE" css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><mediaobject css:width="449pt" css:height="106pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image7.emf"/></imageobject></mediaobject></para><para role="Caption">Table 3 Performance of Code_Saturne + PETSc on 1 node of the POWER8 clusters. Comparison between 2 different nodes, using different types of CPU and GPU. PETSc is built on LAPACK. The speedup is computed at the ratio between the time to solution on the CPU for a given number of MPI tasks and the time to solution on the CPU/GPU for the same number of MPI tasks.<anchor role="start" xml:id="_Ref477996102"><?latex \label{ref-0097}?></anchor><anchor role="start" xml:id="_Toc478379040"><?latex \label{ref-0098}?></anchor></para><para css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><mediaobject css:width="260pt" css:height="111pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image8.emf"/></imageobject></mediaobject></para><para role="Caption" css:text-align="center" css:text-align-last="center">Table 4 Performance of Code_Saturne and PETSc on 1 node of KNL. PETSc is built on the MKL library<anchor role="start" xml:id="_Ref477996105"><?latex \label{ref-0099}?></anchor><anchor role="start" xml:id="_Toc478379041"><?latex \label{ref-0100}?></anchor></para><para role="NormalPRACE"><link linkend="_Ref477996102"><?latex {\hyperref[ref-0097]{Table 3}}?></link> and <link linkend="_Ref477996105"><?latex {\hyperref[ref-0099]{Table 4}}?></link> show the results obtained using POWER8 CPU and CPU/GPU, and KNL, respectively. Focusing on the results on the POWER8 nodes first, a speedup is observed on each node of the POWER8, when using the same number of MPI tasks and of GPU. However, when the nodes are fully populated (20 and 24 MPI tasks, respectively), it is cheaper to run on the CPU only than using CPU/GPU. This could be explained by the fact that the same overall amount of data is transferred but the system administration costs, latency costs, asynchronicity of transfer in 20 (S822LC) or 24 (S824L) slices might be prohibitive.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.3</phrase>CP2K<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379003"><?latex \label{ref-0101}?></anchor></para><para role="NormalPRACE">Times shown in the ARCHER KNL (model 7210, 1.30GHz, 96GB memory DDR) vs Ivy Bridge (E5-2697 v2, 2.7 GHz, 64GB) plot are for those CP2K threading configurations that give the best performance in each case. The shorthand for naming threading configurations is:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">MPI: pure MPI</para></listitem><listitem><para role="NormalPRACE">X_TH: X OpenMP threads per MPI rank</para></listitem></itemizedlist><para role="NormalPRACE">Whilst single-threaded pure MPI or 2 OpenMP threads is often fastest on conventional processors, on the KNL multithreading is more likely to be beneficial, especially in problems such as the LiH-HFX benchmark in which having fewer MPI ranks means more memory is available to each rank, allowing partial results to be stored in memory instead of expensively recomputed on the fly. </para><para role="NormalPRACE">Hyperthreads were left disabled (equivalent to the aprun option –j 1), as no significant performance benefit was observed using hyperthreading.</para><figure><title>Figure 5 Test case 1 of CP2K on the ARCHER cluster<anchor role="start" xml:id="_Ref477996530"><?latex \label{ref-0102}?></anchor><anchor role="start" xml:id="_Toc478379019"><?latex \label{ref-0103}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image9.png" css:width="453.5pt" css:height="251.85pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><phrase css:background-color="#FFFFFF">The node based comparison shows (</phrase><link linkend="_Ref477996530"><?latex {\hyperref[ref-0102]{Figure 5}}?></link><phrase css:background-color="#FFFFFF">) that the runtimes on KNL nodes are roughly 1.7 times slower than runtimes on 2-socket IvyBridge nodes.</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.4</phrase>GPAW<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379004"><?latex \label{ref-0104}?></anchor></para><para role="NormalPRACE">The performance of GPAW using both benchmarks was measured with a range of parallel job sizes on several architectures; with the architectures designated in the following tables, figures, and text as:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">CPU: x86 Haswell CPU (Intel Xeon E5-2690v3) in a dual-socket node</para></listitem><listitem><para role="NormalPRACE">KNC: Knights Corner MIC (Intel Xeon Phi 7120P) with a x86 Haswell host CPU (Intel Xeon E5-2680v3) in a dual-socket node</para></listitem><listitem><para role="NormalPRACE">KNL: Knights Landing MIC (Intel Xeon Phi 7210) in a single-socket node</para></listitem><listitem><para role="NormalPRACE">K40: K40 GPU (NVIDIA Tesla K40) with a x86 Ivy Bridge host CPU (Intel Xeon E5-2620-v2) in a dual-socket node</para></listitem><listitem><para role="NormalPRACE">K80: K80 GPU (NVIDIA Tesla K80) with a x86 Haswell host CPU (Intel Xeon E5-2680v3) in a quad-socket node</para></listitem></itemizedlist><para role="NormalPRACE">Only time spent in the main SCF-cycle was used as the runtime in the comparison (<link linkend="_Ref478142596"><?latex {\hyperref[ref-0105]{Table 5}}?></link> and <link linkend="_Ref478142598"><?latex {\hyperref[ref-0107]{Table 6}}?></link>) to exclude any differences in the initialisation overheads.</para><para role="NormalPRACE" css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><inlinemediaobject annotations="object_d16349e11066" css:width="393pt" css:height="111pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image10.emf"/></imageobject></inlinemediaobject><inlinemediaobject role="OLEObject" annotations="object_d16349e11066"><imageobject role="Excel.Sheet.12"><imagedata fileref="embeddings/Microsoft_Excel_Worksheet2.xlsx"/></imageobject></inlinemediaobject></para><para role="Caption" css:text-align="left">Table 5 GPAW runtimes (in seconds) for the smaller benchmark (Carbon Nanotube) measured on several architectures when using n sockets (i.e. processors or accelerators).<anchor role="start" xml:id="_Ref478142596"><?latex \label{ref-0105}?></anchor><anchor role="start" xml:id="_Toc478379042"><?latex \label{ref-0106}?></anchor></para><para role="NormalPRACE" css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><inlinemediaobject annotations="object_d16349e11090" css:width="393pt" css:height="127pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image11.emf"/></imageobject></inlinemediaobject><inlinemediaobject role="OLEObject" annotations="object_d16349e11090"><imageobject role="Excel.Sheet.12"><imagedata fileref="embeddings/Microsoft_Excel_Worksheet3.xlsx"/></imageobject></inlinemediaobject></para><para role="Caption" css:text-align="left">Table 6 GPAW runtimes (in seconds) for the larger benchmark (Copper Filament) measured on several architectures when using n sockets (i.e. processors or accelerators). *Due to memory limitations on the GPU the grid spacing was increased from 0.22 to 0.28 to have a sparser grid. To account for this in the comparison, the K40 and K80 runtimes have been scaled up using a corresponding CPU runtime as a yardstick (scaling factor q=2.1132).<anchor role="start" xml:id="_Ref478142598"><?latex \label{ref-0107}?></anchor><anchor role="start" xml:id="_Toc478379043"><?latex \label{ref-0108}?></anchor></para><para role="NormalPRACE">As can been seen from Table 2 and Table 3, in both benchmarks a single KNL or K40/K80 was faster than a single CPU. But when using multiple KNL, the performance does not seem to scale as well as for CPU. In the smaller benchmark (Carbon Nanotube), CPU outperform KNL when using more than 2 processors. In the larger benchmark (Copper Filament), KNL still outperform CPU with 8 processors but it seems likely that the CPU will overtake KNL when using an even larger number of processors.</para><para role="NormalPRACE">In contrast to KNL, the older KNC are slower than Haswell CPU across the board. Nevertheless, as can been seen from Figure 4, the scaling of KNC is to some extend comparable to CPU but with a lower scaling limit. It is therefore likely that, on systems with considerably slower host CPU than Haswells (e.g. Ivy Bridges), KNC may also give a performance boost over the host CPU.</para><figure><title>Figure 6 Relative performance (to / t) of GPAW is shown for parallel jobs using an increasing number of CPU (blue) or Xeon Phi KNC (red). Single CPU SCF-cycle runtime (to) was used as the baseline for the normalisation. Ideal scaling is shown as a linear dashed line for comparison. Case 1 (Carbon Nanotube) is shown with square markers and Case 2 (Copper Filament) is shown with round markers. <anchor role="start" xml:id="_Toc478379020"><?latex \label{ref-0109}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image12.png" css:width="463.86818897637795pt" css:height="365.7114960629921pt"/></imageobject></mediaobject></figure><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.5</phrase>GROMACS<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379005"><?latex \label{ref-0110}?></anchor></para><para role="NormalPRACE">Gromacs was successfully compiled and ran on the following systems:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">GRNET ARIS: Thin nodes (E5-2680v2), GPU nodes (Dual E5-2660v3+ Dual K40m), all with FDR14 Infiniband, Single node KNL 7210. </para></listitem><listitem><para role="NormalPRACE">CINES Frioul KNL 7230</para></listitem><listitem><para role="NormalPRACE">IDRIS Ouessant: IBM Power 8 + Dual P100</para></listitem></itemizedlist><para role="NormalPRACE">On KNL machines the runs were performed using Quadrant processor and both Cache / Flat memory configuration. On GRNET's single node KNL more configurations were tested. </para><para role="NormalPRACE">As it is expected the Quandrant/Cache mode gives the best performance for all cases. The performance dependence on the MPI Tasks/OpenMP threads combination was also explored. In most cases 66 tasks/per node using 2 or 4 threads/task gives the best performance on KNL 7230.</para><para role="NormalPRACE">In all accelerated runs a speed up of 2-2.6x with respect CPU only was achieved with GPU. Gromacs does not support offload on KNC.</para><figure><title>Figure 7 Scalability for GROMACS test case GluCL Ion Channel<anchor role="start" xml:id="_Toc478379021"><?latex \label{ref-0111}?></anchor></title><mediaobject><imageobject css:clip="rect(0.9500000000000001pt, 0.7000000000000001pt, 0.45pt, 0.25pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image13.png" css:width="463.8872440944882pt" css:height="333.2403149606299pt"/></imageobject></mediaobject></figure><figure><title>Figure 8 Scalability for GROMACS test case Lignocellulose<anchor role="start" xml:id="_Toc478379022"><?latex \label{ref-0112}?></anchor></title><mediaobject><imageobject css:clip="rect(1pt, 0.75pt, 0.5pt, 0.30000000000000004pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image14.png" css:width="463.8872440944882pt" css:height="334.1732283464567pt"/></imageobject></mediaobject></figure><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.6</phrase>NAMD<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379006"><?latex \label{ref-0113}?></anchor></para><para role="NormalPRACE">NAMD was successfully compiled and ran on the following systems:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">GRNET ARIS : Thin nodes (E5-2680v2), GPU nodes (Dual E5-2660v3+ Dual K40m), KNC Nodes (Dual E5-2660v2+Dual KNC 7120P), all with FDR14 Infiniband, Single node KNL 7210.</para></listitem><listitem><para role="NormalPRACE">Cines Frioul : KNL 7230 </para></listitem><listitem><para role="NormalPRACE">Cines Ouessant : IBM Power 8 + Dual P100</para></listitem></itemizedlist><para role="NormalPRACE">On KNL machines the runs were performed using Quadrant processor and both Cache / Flat memory configuration. On GRNET's single node KNL more configurations were tested. </para><para role="NormalPRACE">As it is expected the Quandrant/Cache mode gives the best performance for all cases. The performance dependence on the MPI Tasks/OpenMP threads combination was also explored. </para><para role="NormalPRACE">In most cases 66 tasks per node using 4 threads/task or 4 tasks per node/64 threads per task gives the best performance on KNL 7230.</para><para role="NormalPRACE">In all accelerated runs a speed up of 5-6x with respect CPU only runs was achieved with GPU.</para><para role="NormalPRACE">On KNC the speed up with respect CPU only is in the range 2-3.5 in all cases.</para><figure><title>Figure 9 Scalability for NAMD test case STMV.8M<anchor role="start" xml:id="_Toc478379023"><?latex \label{ref-0114}?></anchor></title><mediaobject><imageobject css:clip="rect(0.9500000000000001pt, 0.7000000000000001pt, 0.5pt, 0.30000000000000004pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image15.png" css:width="463.8872440944882pt" css:height="338.5843307086614pt"/></imageobject></mediaobject></figure><figure><title>Figure 10 Scalability for NAMD test case STMV.28M<anchor role="start" xml:id="_Toc478379024"><?latex \label{ref-0115}?></anchor></title><mediaobject><imageobject css:clip="rect(1pt, 0.75pt, 0.5pt, 0.2pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image16.png" css:width="463.8872440944882pt" css:height="330.41897637795273pt"/></imageobject></mediaobject></figure><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.7</phrase>PFARM<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379007"><?latex \label{ref-0116}?></anchor></para><para role="NormalPRACE">The code has been tested and timed on several architectures, designated in the following figures, tables and text as:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">CPU: node contains two 2.7 GHz, 12-core E5-2697 v2 (Ivy Bridge) series processors with 64GB memory.</para></listitem><listitem><para role="NormalPRACE">KNL: node is a 64-core KNL processor (model 7210) running at 1.30GHz with 96GB of memory.</para></listitem><listitem><para role="NormalPRACE">GPU: node contains a dual socket 16-core Haswell E5-2698 running at 2.3 GHz with 256GB memory and 4 K40, 4 K80 or 4 P100 GPU.</para></listitem></itemizedlist><para role="NormalPRACE">Codes on all architectures are compiled with the Intel compiler (CPU v15, KNL &amp; GPU v17).</para><para role="NormalPRACE">The divide-and-conquer eigensolver routine DSYEVD is used throughout the test runs. The routine is linked from the following numerical libraries:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">CPU: Intel MKL Version 11.2.2</para></listitem><listitem><para role="NormalPRACE">KNL: Intel MKL Version 2017 Initial Release</para></listitem><listitem><para role="NormalPRACE">GPU: MAGMA Version 2.2</para></listitem></itemizedlist><figure><title>Figure 11 Eigensolver performance on KNL and GPU<anchor role="start" xml:id="_Ref477737037"><?latex \label{ref-0117}?></anchor><anchor role="start" xml:id="_Toc478379025"><?latex \label{ref-0118}?></anchor></title><mediaobject><imageobject css:clip="rect(0.30000000000000004pt, 0.15000000000000002pt, 0.25pt, 0.30000000000000004pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image17.png" css:width="451.8872440944882pt" css:height="313.0933858267717pt"/></imageobject></mediaobject></figure><para role="NormalPRACE">EXDIG calculations are dominated by the eigensolver operations required to diagonalize each sector Hamiltonian matrix. <link linkend="_Ref477737037"><?latex {\hyperref[ref-0117]{Figure 11}}?></link> summarizes eigensolver performance, using DSYEVD, over a range of problem sizes for the Xeon (CPU), Intel Knight’s Landing (KNL) and a range of recent Nvidia GPU architectures. The results are normalised to the single node CPU performance using 24 OpenMP threads. The CPU runs use 24 OpenMP threads and the KNL runs use 64 OpenMP threads. Dense linear algebra calculations tend to be bound by memory bandwidth, so using hyperthreading on the KNL or CPU is not beneficial. MAGMA is able to parallelise the calculation automatically across multiple GPU on a compute node and these results are denoted by the x2, x4 labels. <link linkend="_Ref477737037"><?latex {\hyperref[ref-0117]{Figure 11}}?></link> demonstrates that MAGMA performance relative to CPU performance increases as problem size increases, due to the relative overhead cost of data transfer O(N^2) reducing compared to computational load O(N^3).</para><para role="NormalPRACE"><phrase role="Emphasis">Test Case 1 – FeIII</phrase></para><para role="NormalPRACE">Defining Computational Characteristics: 10 Fine Region Sector calculations involving Hamiltonian matrices of dimension 23620 and 10 Coarse Region Sector calculations involving Hamiltonian matrices of dimension 11810.</para><para role="NormalPRACE"><phrase role="Emphasis">Test Case 2 – CH4</phrase></para><para role="NormalPRACE">Defining Computational Characteristics: 10 ‘Spin 1’ Coarse Sector calculations involving Hamiltonian matrices of dimension 5720 and 10 ‘Spin 2’ Coarse Sector calculations involving Hamiltonian matrices of dimension 7890.</para><informaltable css:border-collapse="collapse" css:margin-left="5.4pt" css:width="458.8pt"><tgroup cols="9"><colspec colnum="1" colname="col1" colwidth="30.90333528mm"/><colspec colnum="2" colname="col2" colwidth="21.3430569mm"/><colspec colnum="3" colname="col3" colwidth="20.14361238mm"/><colspec colnum="4" colname="col4" colwidth="13.24680639mm"/><colspec colnum="5" colname="col5" colwidth="14.83430649mm"/><colspec colnum="6" colname="col6" colwidth="14.83430649mm"/><colspec colnum="7" colname="col7" colwidth="13.28208417mm"/><colspec colnum="8" colname="col8" colwidth="16.63347327mm"/><colspec colnum="9" colname="col9" colwidth="16.63347327mm"/><tbody><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:width="87.6pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="60.5pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="none" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">CPU 24 threads</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="57.1pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">KNL 64 threads</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.55pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">K80</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">K80x2</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">K80x4</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.65pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">P100</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col8"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">P100x2</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col9"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">P100x4</phrase></para></entry></row><row css:min-height="31.35pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="87.6pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">Test Case 1 ; Atomic ; FeIII</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="60.5pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">4475</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="57.1pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">2610</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.55pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">1215</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">828</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">631</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.65pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">544</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col8"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">427</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col9"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">377</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="87.6pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">Test Case 2 ; Molecular ; CH4</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="60.5pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">466</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="57.1pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">346</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.55pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">180</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">150</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="42.05pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">134</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="37.65pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">119</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col8"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">107</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="47.15pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col9"><para css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="fr">111</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Caption">Table 7 Overall EXDIG runtime performance on various accelerators (runtime, secs)<anchor role="start" xml:id="_Ref477737720"><?latex \label{ref-0119}?></anchor><anchor role="start" xml:id="_Toc478379044"><?latex \label{ref-0120}?></anchor></para><para role="NormalPRACE"><link linkend="_Ref477737720"><?latex {\hyperref[ref-0119]{Table 7}}?></link> records the overall run time on a range of architectures for both test cases described. For the complete runs (including I/O), both KNL-based and GPU-based computations significantly outperform the CPU-based calculations. For Test Case 1, utilising a node with single P100 GPU accelerator results in a runtime more than 8 times quicker than the CPU, correspondingly approximately 4 times quicker for Test Case 2. The smaller Hamiltonian matrices associated with Test Case 2 means that data transfer costs O(N2) are relatively high vs computation costs O(N3). Smaller matrices also result in poorer scaling as we increase the number of GPU per node for Test Case 2.</para><para role="Caption"><inlinemediaobject annotations="object_d16349e12407" css:width="449pt" css:height="116pt"><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image18.emf"/></imageobject></inlinemediaobject><inlinemediaobject role="OLEObject" annotations="object_d16349e12407"><imageobject role="Excel.Sheet.12"><imagedata fileref="embeddings/Microsoft_Excel_Worksheet4.xlsx"/></imageobject></inlinemediaobject></para><para role="Caption">Table 8 Overall EXDIG runtime parallel performance using MPI-GPU version<anchor role="start" xml:id="_Ref478145524"><?latex \label{ref-0121}?></anchor><anchor role="start" xml:id="_Toc478379045"><?latex \label{ref-0122}?></anchor></para><para role="NormalPRACE">A relatively simple MPI harness can be used in EXDIG to farm out different sector Hamiltonian calculations to multiple CPU, KNL or GPU nodes. <link linkend="_Ref478145524"><?latex {\hyperref[ref-0121]{Table 8}}?></link> shows that parallel scaling across nodes is very good for each test platform. This strategy is inherently scalable, however the replicated data approach requires significant amounts of memory per node. Test Case 1 is used as the dataset here, although the problem characteristics are slightly different to the setup used for <link linkend="_Ref477737720"><?latex {\hyperref[ref-0119]{Table 7}}?></link>, with 5 Fine Region sectors with Hamiltonian dimension of 23620 and 20 Coarse Region sectors with Hamiltonian dimension of 11810. With these characteristics, runs using 2 MPI tasks experience inferior load-balancing in the Fine Region calculation compared to runs using 5 MPI tasks.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.8</phrase>QCD<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379008"><?latex \label{ref-0123}?></anchor></para><para role="NormalPRACE">As stated in the description, QCD benchmark has two implementations.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">4.8.1</phrase>First implementation<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379009"><?latex \label{ref-0124}?></anchor></para><figure><title>Figure 12 Small test case results for QCD, first implementation<anchor role="start" xml:id="_Ref477152535"><?latex \label{ref-0125}?></anchor><anchor role="start" xml:id="_Toc478379026"><?latex \label{ref-0126}?></anchor></title><mediaobject><imageobject css:clip="rect(0.25pt, 3.2pt, 0.15000000000000002pt, 0.1pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image19.png" css:width="446.96094488188976pt" css:height="279.4092913385827pt"/></imageobject></mediaobject></figure><figure><title>Figure 13 Large test case results for QCD, first implementation<anchor role="start" xml:id="_Ref477772687"><?latex \label{ref-0127}?></anchor><anchor role="start" xml:id="_Toc478379027"><?latex \label{ref-0128}?></anchor></title><mediaobject><imageobject css:clip="rect(0.25pt, 0.05pt, 0.2pt, 3.1500000000000004pt)"><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image20.png" css:width="457.8872440944882pt" css:height="279.02503937007873pt"/></imageobject></mediaobject></figure><para role="NormalPRACE">The strong scaling, on Titan and ARCHER, for small (<link linkend="_Ref477152535"><?latex {\hyperref[ref-0125]{Figure 12}}?></link>) and large (<link linkend="_Ref477772687"><?latex {\hyperref[ref-0127]{Figure 13}}?></link>) problem sizes. For ARCHER, both CPU are used per node. For Titan, we include results with and without GPU utilization.</para><para role="NormalPRACE">On each node, Titan has one 16-core Interlagos CPU and one K20X GPU, whereas ARCHER has two 12-core Ivy-bridge CPU. In this section, we evaluate on a node-by-node basis. For Titan, a single MPI task per node, operating on the CPU, is used to drive the GPU on that node. We also include, for Titan, results just using the CPU on each node without any involvement from the GPU, for comparison. This means that, on a single node, our Titan results will be the same as those K20X and Interlagos results presented in the previous section (for the same test case). On ARCHER, however, we fully utilize both the processors per node: to do this we use two MPI tasks per node, each with 12 OpenMP threads (via targetDP). So the single node results for ARCHER are twice as fast as those Ivy-bridge single-processor results presented in the previous section.</para><figure><title>Figure 14 shows the time taken by the full MILC 64x64x64x8 test cases on traditional CPU, Intel Knights Landing Xeon Phi and NVIDIA P100 (Pascal) GPU architectures.<anchor role="start" xml:id="_Ref477152624"><?latex \label{ref-0129}?></anchor><anchor role="start" xml:id="_Toc478379028"><?latex \label{ref-0130}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image21.emf" css:width="453.5pt" css:height="278.1pt"/></imageobject></mediaobject></figure><para role="NormalPRACE">In <link linkend="_Ref477152624"><?latex {\hyperref[ref-0129]{Figure 14}}?></link> we present preliminary results for on the latest generation Intel Knights Landing (KNL) and NVIDIA Pascal architectures, which offer very high bandwidth stacked memory, together with the same traditional Intel-Ivy-bridge CPU used in previous sections. Note that these results are not directly comparable with those presented earlier, since they are for a different test case size (larger since we are no longer limited by the small memory size of the Knights Corner), and they are for a slightly updated verion of the benchmark. The KNL is the 64-core 7210 model, available from within a test and development platform provided as part of the ARCHER service. The Pascal is a NVIDIA P100 GPU provided as part of the “Ouessant” IBM service at IDRIS, where the host CPU is an IBM Power8+.</para><para role="NormalPRACE">It can be seen that the KNL is 7.5X faster than the Ivy-bridge; the Pascal is 13X faster than the Ivy-bridge; and the Pascal is 1.7X faster than the KNL. </para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading4" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-family="Arial" css:font-weight="normal" css:font-style="italic" css:text-transform="uppercase" css:font-variant="normal" css:display="inherit" css:color="#000000" css:top="0pt" css:position="relative">4.8.2</phrase>Second implementation<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379010"><?latex \label{ref-0131}?></anchor></para><para role="NormalPRACE"><phrase role="Emphasis">GPU results</phrase></para><para role="NormalPRACE">The GPU benchmark results of the second implementation are done on PizDaint located in Switzerland at CSCS and the GPU-partition of Cartesius at Surfsara based in Netherland, Amsterdam. The runs are performed by using the provided bash-scripts. PizDaint is equipped with one P100 Pascal-GPU per node.  Two different test-cases are depicted, the "strong-scaling" mode with a random lattice configuration of size 32x32x32x96 and 64x64x64x128.  The GPU nodes of Cartesius have two Kepler-GPU K40m per node and the "strong-scaling" test is shown for one card per node and for two cards per node. The benchmark kernel is using the conjugated gradient solver which solve a linear equation system given by D * x = b, for the unknown solution "x" based on the clover improved Wilson Dirac operator "D" and a known right hand side "b".</para><figure><title>Figure 15 Result of second implementation of QCD on K40m GPU<anchor role="start" xml:id="_Ref478368452"><?latex \label{ref-0132}?></anchor><anchor role="start" xml:id="_Toc478379029"><?latex \label{ref-0133}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image22.png" css:width="433.4173228346457pt" css:height="325.13385826771656pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref478368452"><?latex {\hyperref[ref-0132]{Figure 15}}?></link> shows strong scaling of the conjugate gradient solver on K40m GPU on Cartesius. The lattice size is given by 32x32x32x96, which corresponds to a moderate lattice size nowadays.  The test is performed with a mixed precision CG in double-double mode (red) and half-double mode (blue). The run is done on one GPU per node (filled) and two GPU nodes per node (non-filled).</para><figure><title>Figure 16 Result of second implementation of QCD on P100 GPU<anchor role="start" xml:id="_Ref478368421"><?latex \label{ref-0134}?></anchor><anchor role="start" xml:id="_Toc478379030"><?latex \label{ref-0135}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image23.png" css:width="432pt" css:height="324pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref478368421"><?latex {\hyperref[ref-0134]{Figure 16}}?></link> shows strong scaling of the conjugate gradient solver on P100 GPU on PizDaint. The lattice size is given by 32x32x32x96 similar to the strong scaling run on the K40m on Cartesius. The test is performed with mixed precision CG in double-double mode (red) and half-double mode (blue).</para><figure><title>Figure 17 Result of second implementation of QCD on P100 GPU on larger test case<anchor role="start" xml:id="_Ref478368605"><?latex \label{ref-0136}?></anchor><anchor role="start" xml:id="_Toc478379031"><?latex \label{ref-0137}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image24.png" css:width="432pt" css:height="324pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref478368605"><?latex {\hyperref[ref-0136]{Figure 17}}?></link> shows strong scaling of the conjugate gradient solver on P100 GPU on PizDaint. The lattice size is increase to 64x64x64x128, which is a large lattice nowadays. By increasing the lattice the scaling test shows that the conjugate gradient solver has a very good strong scaling up to 64 GPU.</para><para role="NormalPRACE"><phrase role="Emphasis">Xeon Phi results</phrase></para><para role="NormalPRACE">The benchmark results for the XeonPhi benchmark suite are performed on Frioul at CINES, and the hybrid partition on MareNostrum III at BSC. Frioul has one KNL-card per node while the hybrid partition of MareNostrum III is equipped with two KNC per node. The data on Frioul are generated by using the bash-scripts provided by the second implementation of QCD and are done for the two test cases "strong-scaling" with a lattice size of 32x32x32x96 and 64x64x64x128. In case of the data generated at MareNostrum, data for the "strong-scaling" mode on a 32x32x32x96 lattice are shown. The benchmark kernel uses a random gauge configuration and the conjugated gradient solver to solve a linear equation involving the clover Wilson Dirac operator.</para><figure><title>Figure 18 Result of second implementation of QCD on KNC<anchor role="start" xml:id="_Ref478368691"><?latex \label{ref-0138}?></anchor><anchor role="start" xml:id="_Toc478379032"><?latex \label{ref-0139}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image25.png" css:width="432pt" css:height="324pt"/></imageobject></mediaobject></figure><para role="NormalPRACE" css:page-break-after="avoid"><link linkend="_Ref478368691"><?latex {\hyperref[ref-0138]{Figure 18}}?></link> shows strong scaling of the conjugate gradient solver on KNC's on the hybrid partition on MareNostrum III. The lattice size is given by 32x32x32x96, which corresponds to a moderate lattice size nowadays. The test is performed with a conjugate gradient solver in single precision by using the native mode and 60 OpenMP tasks per MPI process. The run is done on one KNC per node (filled) and two KNC node per node (non-filled).</para><figure><title>Figure 19 Result of second implementation of QCD on KNL<anchor role="start" xml:id="_Ref478368762"><?latex \label{ref-0140}?></anchor><anchor role="start" xml:id="_Toc478379033"><?latex \label{ref-0141}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image26.png" css:width="432pt" css:height="324pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref478368762"><?latex {\hyperref[ref-0140]{Figure 19}}?></link> shows strong scaling results of the conjugate gradient solver on KNL's on Frioul. The lattice size is given by 32x32x32x96 which is similar to the strong scaling run on the KNC on MareNostrum III. The run is performed in quadrantic cache mode with 68 OpenMP processes per KNL. The test is performed with a conjugate gradient solver in single precision.</para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.9</phrase>Quantum Espresso<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379011"><?latex \label{ref-0142}?></anchor></para><para role="NormalPRACE">Here are sample results for Quantum Espresso. This code has run on Cartesius (see section <link linkend="_Ref477768402"><?latex {\hyperref[ref-0046]{2.2.1}}?></link>) and Marconi (1 node is 1 standalone KNL Xeon Phi 7250, 68 core 1.40 GHz, 16BG MCDRAM, 96BG DDR4 RAM, interconnect is Intel OmniPath).</para><para role="NormalPRACE"><phrase role="Emphasis">Runs on GPU</phrase></para><figure><title>Figure 20 Scalability of Quantum Espresso on GPU for test case 1<anchor role="start" xml:id="_Ref477769024"><?latex \label{ref-0143}?></anchor><anchor role="start" xml:id="_Toc478379034"><?latex \label{ref-0144}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image27.png" css:width="459.49307086614175pt" css:height="298.0372440944882pt"/></imageobject></mediaobject></figure><figure><title>Figure 21 Scalability of Quantum Espresso on GPU for test case 2<anchor role="start" xml:id="_Ref477769025"><?latex \label{ref-0145}?></anchor><anchor role="start" xml:id="_Toc478379035"><?latex \label{ref-0146}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image28.png" css:width="453.5pt" css:height="267.6pt"/></imageobject></mediaobject></figure><para role="NormalPRACE">Test cases (<link linkend="_Ref477769024"><?latex {\hyperref[ref-0143]{Figure 20}}?></link> and <link linkend="_Ref477769025"><?latex {\hyperref[ref-0145]{Figure 21}}?></link>) show no appreciable speed-up with GPU. Inputs are probably too small, they should evolve in the future of this benchmark suite.</para><para role="NormalPRACE"><phrase role="Emphasis">Runs on KNL</phrase></para><figure><title>Figure 22 Scalability of Quantum Espresso on KNL for test case 1<anchor role="start" xml:id="_Ref477769092"><?latex \label{ref-0147}?></anchor><anchor role="start" xml:id="_Toc478379036"><?latex \label{ref-0148}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image29.png" css:width="453.5pt" css:height="313.8pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref477769092"><?latex {\hyperref[ref-0147]{Figure 22}}?></link> shows the usual pw.x with the small test case A (AUSURF), comparing Marconi Broadwell (36 cores/node) with KNL (68 cores/node) - this test case is probably small for testing on KNL.</para><figure><title>Figure 23 Quantum Espresso - KNL vs BDW vs BGQ (at scale)<anchor role="start" xml:id="_Ref477998355"><?latex \label{ref-0149}?></anchor><anchor role="start" xml:id="_Toc478379037"><?latex \label{ref-0150}?></anchor></title><mediaobject><imageobject><imagedata fileref="d7.5_4IP_1.0.docx.tmp/word/media/image30.png" css:width="453.5pt" css:height="396.5pt"/></imageobject></mediaobject></figure><para role="NormalPRACE"><link linkend="_Ref477998355"><?latex {\hyperref[ref-0149]{Figure 23}}?></link> <phrase>presents CNT10POR8 which is the large test case, even though it is using the cp.x executable (i.e. Car-parinello) rather than the usual pw.x (PW SCF calculation).</phrase></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.10</phrase>Synthetic benchmarks (SHOC)<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref478378712"><?latex \label{ref-0151}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379012"><?latex \label{ref-0152}?></anchor></para><para role="NormalPRACE">The SHOC benchmark has been run on Cartesius, Ouessant and MareNostrum. <link linkend="_Ref477773433"><?latex {\hyperref[ref-0153]{Table 9}}?></link> presents the results:</para><informaltable css:border-collapse="collapse" css:margin-left="5.4pt" css:width="455pt"><tgroup cols="7"><colspec colnum="1" colname="col1" colwidth="41.0986137mm"/><colspec colnum="2" colname="col2" colwidth="21.166668mm"/><colspec colnum="3" colname="col3" colwidth="21.166668mm"/><colspec colnum="4" colname="col4" colwidth="21.166668mm"/><colspec colnum="5" colname="col5" colwidth="16.88041773mm"/><colspec colnum="6" colname="col6" colwidth="19.24402899mm"/><colspec colnum="7" colname="col7" colwidth="21.166668mm"/><tbody><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:width="118.9pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="175.3pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" namest="col2" nameend="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">NVIDIA GPU</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="160.8pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" namest="col5" nameend="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Intel Xeon Phi</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">K40 CUDA</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">K40 OpenCL</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Power 8 + P100 CUDA</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">KNC Offload</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">KNC OpenCL</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Haswell OpenCL</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">BusSpeedDownload</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10.5 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10.56 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">32.23 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">6.6 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">6.8 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">12.4 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">BusSpeedReadback</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10.5 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10.56 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">34.00 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">6.7 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">6.8 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">12.5 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">maxspflops</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3716 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3658 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10424 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">21581</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">2314 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1647 GFLOPS</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">maxdpflops</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1412 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1411 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">5315 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">16017</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">2318 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">884 GFLOPS</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">gmem_readbw</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">177 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">179 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">575.16 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">170 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">49.7 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">20.2 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">gmem_readbw_strided</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">18 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">20 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">99.15 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">35 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">156 GB/s</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">gmem_writebw</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">175 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">188 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">436 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">72 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">41 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">13.6 GB/s</phrase></para></entry></row><row css:min-height="48pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">gmem_writebw_strided</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">7 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">7 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">26.3 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">25 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">163 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">lmem_readbw</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1168 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1156 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">4239 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">442 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">238 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">lmem_writebw</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1194 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1162 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">5488 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">477 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">295 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">BFS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">49,236,500 Edges/s</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">42,088,000 Edges/s</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">91,935,100 Edges/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1,635,330 Edges/s</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">14,225,600 Edges/s</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">FFT_sp</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">523 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">377 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1472 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">135 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">71 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">80 GFLOPS</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">FFT_dp</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">262 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">61 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">733 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">69.5 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">31 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">55 GFLOPS</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">SGEMM</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">2900-2990 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">694/761 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">8604-8720 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">640/645 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">179/217 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">419-554 GFLOPS</phrase> </para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">DGEMM</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1025-1083 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">411/433 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3635-3785 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">179/190 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">76/100 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">189-196 GFLOPS</phrase> </para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">MD (SP)</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">185 GFLOPS</phrase> </para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">91 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">483 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">28 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">33 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">114 GFLOPS</phrase> </para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">MD5Hash</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3.38 GH/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3.36 GH/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">15.77 GH/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1.7 GH/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1.29 GH/s</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Reduction</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">137 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">150 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">271 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">99 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">10 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">91 GB/s</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Scan</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">47 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">39 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">99.2 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">11 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">4.5 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">15 GB/s</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Sort</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3.08 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">0.54 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">12.54 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">0.11 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">0.35 GB/s</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Spmv</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">4-23 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">3-17 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">23-65 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#FF0000">1-17944 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">N/A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">1-10 GFLOPS</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Stencil2D</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">123 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">135 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">465 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">89 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">8.95 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">34 GFLOPS</phrase></para></entry></row><row css:min-height="32pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Stencil2D_dp</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">57 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">67 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">258 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">16 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">7.92 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">30 GFLOPS</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">Triad</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">13.5 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">9.9 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">43 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">5.76 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">5.57 GB/s</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">8 GB/s</phrase></para></entry></row><row css:min-height="16pt"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="118.9pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="none" css:background-color="#D9D9D9" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">S3D (level2)</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.4pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">94 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">91 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col4"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">294 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="48.4pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col5"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">109 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="53.95pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col6"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">18 GFLOPS</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:width="58.45pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col7"><para css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000">27 GFLOPS</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Caption">Table 9 Synthetic benchmarks results on GPU and Xeon Phi<anchor role="start" xml:id="_Ref477773433"><?latex \label{ref-0153}?></anchor><anchor role="start" xml:id="_Toc478379046"><?latex \label{ref-0154}?></anchor></para><para role="NormalPRACE">Measures marked red are not relevant and should not be considered:</para><itemizedlist mark="bullet"><listitem><para role="NormalPRACE">KNC MaxFlops (both SP and DP): In this case the compiler optimizes away some of the computation (although it shouldn't) <link linkend="_Ref477999206"><?latex {\hyperref[ref-0034]{[19]}}?></link>.</para></listitem><listitem><para role="NormalPRACE">KNC SpMV: For these benchmarks it is a known bug currently being addressed <link linkend="_Ref477999262"><?latex {\hyperref[ref-0035]{[20]}}?></link>.</para></listitem><listitem><para role="NormalPRACE">Haswell gmem_readbw_strided and gmem_writebw_strided: strided read/write benchmarks doesn't make too much sense in the case of the CPU, as the data will be cache in the large L3 caches. It is reason why we see high number only in the Haswell case.</para></listitem></itemizedlist><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading3" docx2tex:config="headline"><phrase role="docx2tex:identifier">4.11</phrase>SPECFEM3D<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379013"><?latex \label{ref-0155}?></anchor></para><para role="NormalPRACE">Tests have been carried out on Ouessant and Firoul.</para><para role="NormalPRACE">So far it has only been possible to run on one fixed core count for each test case, so scaling curves are not available. Test case A ran on 4 KNL and 4 P100. Test case B ran on 10 KNL and 4 P100.</para><informaltable css:border-collapse="collapse" css:text-align="center" css:text-align-last="center" css:width="195pt"><tgroup cols="3"><colspec colnum="1" colname="col1" colwidth="27.60486285mm"/><colspec colnum="2" colname="col2" colwidth="18.25625115mm"/><colspec colnum="3" colname="col3" colwidth="22.930557mm"/><tbody><row css:min-height="16pt" css:text-align="center" css:text-align-last="center"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="78.25pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="none" css:border-right-style="none" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"/><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="51.75pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">KNL</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="65pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">P100</phrase></para></entry></row><row css:min-height="16pt" css:text-align="center" css:text-align-last="center"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="78.25pt" css:border-top-style="solid" css:border-top-width="0.5pt" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Test case A</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="51.75pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">66</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="65pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">105</phrase></para></entry></row><row css:min-height="16pt" css:text-align="center" css:text-align-last="center"><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="78.25pt" css:border-top-style="none" css:border-left-style="solid" css:border-left-width="0.5pt" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:background-color="#D9D9D9" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col1"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">Test case B</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="51.75pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col2"><para css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">21.4</phrase></para></entry><entry xmlns:docx2hub="http://transpect.io/docx2hub" css:text-align="center" css:text-align-last="center" css:width="65pt" css:border-top-style="none" css:border-left-style="none" css:border-bottom-style="solid" css:border-bottom-width="0.5pt" css:border-right-style="solid" css:border-right-width="0.5pt" css:white-space="nowrap" css:vertical-align="middle" docx2hub:generated-hideMark="" colname="col3"><para css:page-break-after="avoid" css:text-align="center" css:text-align-last="center"><phrase css:font-family="Calibri" css:color="#000000" xml:lang="en">68</phrase></para></entry></row></tbody></tgroup></informaltable><para role="Caption" css:text-align="center" css:text-align-last="center">Table 10 SPECFEM 3D GLOBE results (run time in second)<anchor role="start" xml:id="_Toc478379047"><?latex \label{ref-0156}?></anchor></para><para xmlns:docx2tex="http://transpect.io/docx2tex" role="Heading2" docx2tex:config="headline"><phrase role="docx2tex:identifier" css:font-size="14pt">5</phrase>Conclusion and future work<anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Ref477340783"><?latex \label{ref-0157}?></anchor><anchor xmlns:hub="http://transpect.io/hub" role="start" xml:id="_Toc478379014"><?latex \label{ref-0158}?></anchor></para><para role="NormalPRACE">The work presented here stand as a first sight for application benchmarking on accelerators. Most codes have been selected among the main Unified European Application Benchmark Suite. This paper describes each of them as well as implementation, relevance to European science community and test cases. We have presented results on leading edge systems</para><para role="NormalPRACE">The suite will be publicly available on the PRACE web site <link linkend="_Ref477156108"><?latex {\hyperref[ref-0016]{[1]}}?></link> where links to download sources and test cases will be published along with compilation and run instructions.</para><para role="NormalPRACE">Task 7.2B in PRACE 4IP started to design a benchmark suite for accelerators. This work has been done aiming at integrating it to the main UEABS one so that both can be maintained and evolve together. As PCP (PRACE-3IP) machines will soon be available, it will be very interesting to run the benchmark suite on them. First because these machines will be larger, but also because they will feature energy consumption probes.</para></hub>