f8829881 · f8829881 · f8829881 · f8829881 · f8829881 · f8829881
--- a/nemo/architecture_files/XIOS/arch-Mare.path
+++ b/nemo/architecture_files/XIOS/arch-Mare.path
+NETCDF_INCDIR="-I$NETCDF_INC_DIR -I$NETCDFF_INC_DIR -I/apps/NETCDF/4.4.1.1/INTEL/IMPI/include/"
+NETCDF_LIBDIR="-Wl,--allow-multiple-definition -L$NETCDF_LIB_DIR -L$NETCDFF_LIB_DIR -L/apps/NETCDF/4.4.1.1/INTEL/IMPI/lib/"
+NETCDF_LIB="-lnetcdff -lnetcdf"
+
+MPI_INCDIR=""
+MPI_LIBDIR=""
+MPI_LIB=""
+
+HDF5_INCDIR="-I$HDF5_INC_DIR -I/apps/HDF5/1.8.19/INTEL/IMPI/include/"
+HDF5_LIBDIR="-L$HDF5_LIB_DIR -L/apps/HDF5/1.8.19/INTEL/IMPI/lib/"
+HDF5_LIB="-lhdf5_hl -lhdf5 -lz -lcurl"
+
+BOOST_INCDIR="-I $BOOST_INC_DIR"
+BOOST_LIBDIR="-L $BOOST_LIB_DIR"
+BOOST_LIB=""
+
+OASIS_INCDIR="-I$PWD/../../oasis3-mct/BLD/build/lib/psmile.MPI1"
+OASIS_LIBDIR="-L$PWD/../../oasis3-mct/BLD/lib"
+OASIS_LIB="-lpsmile.MPI1 -lscrip -lmct -lmpeu"
+
+
--- a/nemo/architecture_files/XIOS/arch-SuperMUC.env
+++ b/nemo/architecture_files/XIOS/arch-SuperMUC.env
+module load slurm_setup
+module load netcdf-hdf5-all/4.7_hdf5-1.10-intel19-impi
+###module load netcdf-hdf5-all/
+
+##module load hdf5
+##module load netcdf
+##module load netcdf-fortran
--- a/nemo/architecture_files/XIOS/arch-SuperMUC.fcm
+++ b/nemo/architecture_files/XIOS/arch-SuperMUC.fcm
+################################################################################
+###################                Projet XIOS               ###################
+################################################################################
+
+%CCOMPILER      mpicc
+%FCOMPILER      mpif90
+%LINKER         mpif90 -nofor-main
+
+%BASE_CFLAGS    -ansi -w  
+%PROD_CFLAGS    -O3 -DBOOST_DISABLE_ASSERTS
+%DEV_CFLAGS     -g -O2 
+%DEBUG_CFLAGS   -g 
+
+%BASE_FFLAGS    -D__NONE__ -ffree-line-length-none 
+%PROD_FFLAGS    -O3
+%DEV_FFLAGS     -g -O2
+%DEBUG_FFLAGS   -g 
+
+%BASE_INC       -D__NONE__
+%BASE_LD        -lstdc++
+
+%CPP            cpp
+%FPP            cpp -P
+%MAKE           make
--- a/nemo/architecture_files/XIOS/arch-SuperMUC.path
+++ b/nemo/architecture_files/XIOS/arch-SuperMUC.path
+NETCDF_INCDIR="-I$NETCDF_INC_DIR -I$NETCDFF_INC_DIR"
+NETCDF_LIBDIR="-Wl,'--allow-multiple-definition' -L$NETCDF_LIB_DIR -L$NETCDFF_LIB_DIR"
+NETCDF_LIB="-lnetcdff -lnetcdf"
+
+MPI_INCDIR=""
+MPI_LIBDIR=""
+MPI_LIB=""
+
+HDF5_INCDIR="-I $HDF5_INC_DIR"
+HDF5_LIBDIR="-L $HDF5_LIB_DIR"
+HDF5_LIB="-L/dss/dsshome1/lrz/sys/spack/release/21.1.1/opt/x86_64/curl/7.68.0-gcc-b2wrnof/lib/ -lhdf5_hl -lhdf5 -lz -lcurl"
+
+BOOST_INCDIR="-I $BOOST_INC_DIR"
+BOOST_LIBDIR="-L $BOOST_LIB_DIR"
+BOOST_LIB=""
+
+OASIS_INCDIR="-I$PWD/../../oasis3-mct/BLD/build/lib/psmile.MPI1"
+OASIS_LIBDIR="-L$PWD/../../oasis3-mct/BLD/lib"
+OASIS_LIB="-lpsmile.MPI1 -lscrip -lmct -lmpeu"
--- a/nemo/architecture_files/XIOS/arch-X64_IRENE.env
+++ b/nemo/architecture_files/XIOS/arch-X64_IRENE.env
+module unload netcdf-c netcdf-fortran hdf5 flavor perl hdf5 boost blitz mpi
+module load mpi/openmpi/4.0.5.2
+module load flavor/hdf5/parallel
+module load netcdf-fortran/4.4.4
+module load hdf5/1.8.20
+module load boost
+module load blitz
+module load feature/bridge/heterogenous_mpmd
--- a/nemo/architecture_files/XIOS/arch-X64_IRENE.fcm
+++ b/nemo/architecture_files/XIOS/arch-X64_IRENE.fcm
+################################################################################
+###################                Projet XIOS               ###################
+################################################################################
+
+%CCOMPILER      mpicc
+%FCOMPILER      mpif90
+%LINKER         mpif90  -nofor-main
+
+%BASE_CFLAGS    -diag-disable 1125 -diag-disable 279 -D BOOST_NO_CXX11_DEFAULTED_FUNCTIONS -D BOOST_NO_CXX11_DELETED_FUNCTIONS
+%PROD_CFLAGS    -O3 -D BOOST_DISABLE_ASSERTS
+#%DEV_CFLAGS     -g -traceback
+%DEV_CFLAGS     -g 
+%DEBUG_CFLAGS   -DBZ_DEBUG -g -traceback -fno-inline
+
+%BASE_FFLAGS    -D__NONE__ 
+%PROD_FFLAGS    -O3
+#%DEV_FFLAGS     -g -traceback
+%DEV_FFLAGS     -g 
+%DEBUG_FFLAGS   -g -traceback
+
+%BASE_INC       -D__NONE__
+%BASE_LD        -lstdc++
+
+%CPP            mpicc -EP
+%FPP            cpp -P
+%MAKE           gmake
--- a/nemo/architecture_files/XIOS/arch-X64_IRENE.path
+++ b/nemo/architecture_files/XIOS/arch-X64_IRENE.path
+NETCDF_INCDIR="-I $NETCDFC_INCDIR -I $NETCDFFORTRAN_INCDIR"
+NETCDF_LIBDIR="-L $NETCDFC_LIBDIR -L $NETCDFFORTRAN_LIBDIR"
+NETCDF_LIB="-lnetcdf -lnetcdff"
+
+MPI_INCDIR=""
+MPI_LIBDIR=""
+MPI_LIB=""
+
+HDF5_INCDIR="-I$HDF5_INCDIR"
+HDF5_LIBDIR="-L$HDF5_LIBDIR"
+HDF5_LIB="-lhdf5_hl -lhdf5 -lz -lcurl"
+
+BOOST_INCDIR="-I $BOOST_INCDIR"
+BOOST_LIBDIR="-L $BOOST_LIBDIR"
+BOOST_LIB=""
+
+BLITZ_INCDIR="-I $BLITZ_INCDIR"
+BLITZ_LIBDIR="-L $BLITZ_LIBDIR"
+BLITZ_LIB=""
+
+OASIS_INCDIR="-I$PWD/../../oasis3-mct/BLD/build/lib/psmile.MPI1"
+OASIS_LIBDIR="-L$PWD/../../oasis3-mct/BLD/lib"
+OASIS_LIB="-lpsmile.MPI1 -lscrip -lmct -lmpeu"
+
+
+#only for MEMTRACK debuging : developper only
+ADDR2LINE_LIBDIR="-L${WORKDIR}/ADDR2LINE_LIB"
+ADDR2LINE_LIB="-laddr2line"
--- a/nemo/fixfcm.bash
+++ b/nemo/fixfcm.bash
-#!/usr/bin/env bash
-
-# A tool to modify XML files used by FCM
-
-# This is just a regexp search and replace, not a proper XML
-# parser. Use at own risk.
-
-fixfcm() {
-    local name value prog=""
-    for arg in "$@"; do
-        name="${arg%%=*}"
-	value=$(printf %q "${arg#*=}")
-	value="${value//\//\/}"
-        prog="s/(^%${name} )(.*)/\\1 ${value}/"$'\n'"$prog"
-    done
-    sed -r -e "$prog"
-}
--- a/nemo/nemogcm.F90
+++ b/nemo/nemogcm.F90
+MODULE nemogcm
+   !!======================================================================
+   !!                       ***  MODULE nemogcm   ***
+   !! Ocean system   : NEMO GCM (ocean dynamics, on-line tracers, biochemistry and sea-ice)
+   !!======================================================================
+   !! History :  OPA  ! 1990-10  (C. Levy, G. Madec)  Original code
+   !!            7.0  ! 1991-11  (M. Imbard, C. Levy, G. Madec)
+   !!            7.1  ! 1993-03  (M. Imbard, C. Levy, G. Madec, O. Marti, M. Guyon, A. Lazar,
+   !!                             P. Delecluse, C. Perigaud, G. Caniaux, B. Colot, C. Maes) release 7.1
+   !!             -   ! 1992-06  (L.Terray)  coupling implementation
+   !!             -   ! 1993-11  (M.A. Filiberti) IGLOO sea-ice
+   !!            8.0  ! 1996-03  (M. Imbard, C. Levy, G. Madec, O. Marti, M. Guyon, A. Lazar,
+   !!                             P. Delecluse, L.Terray, M.A. Filiberti, J. Vialar, A.M. Treguier, M. Levy) release 8.0
+   !!            8.1  ! 1997-06  (M. Imbard, G. Madec)
+   !!            8.2  ! 1999-11  (M. Imbard, H. Goosse)  sea-ice model
+   !!                 ! 1999-12  (V. Thierry, A-M. Treguier, M. Imbard, M-A. Foujols)  OPEN-MP
+   !!                 ! 2000-07  (J-M Molines, M. Imbard)  Open Boundary Conditions  (CLIPPER)
+   !!   NEMO     1.0  ! 2002-08  (G. Madec)  F90: Free form and modules
+   !!             -   ! 2004-06  (R. Redler, NEC CCRLE, Germany) add OASIS[3/4] coupled interfaces
+   !!             -   ! 2004-08  (C. Talandier) New trends organization
+   !!             -   ! 2005-06  (C. Ethe) Add the 1D configuration possibility
+   !!             -   ! 2005-11  (V. Garnier) Surface pressure gradient organization
+   !!             -   ! 2006-03  (L. Debreu, C. Mazauric)  Agrif implementation
+   !!             -   ! 2006-04  (G. Madec, R. Benshila)  Step reorganization
+   !!             -   ! 2007-07  (J. Chanut, A. Sellar) Unstructured open boundaries (BDY)
+   !!            3.2  ! 2009-08  (S. Masson)  open/write in the listing file in mpp
+   !!            3.3  ! 2010-05  (K. Mogensen, A. Weaver, M. Martin, D. Lea) Assimilation interface
+   !!             -   ! 2010-10  (C. Ethe, G. Madec) reorganisation of initialisation phase
+   !!            3.3.1! 2011-01  (A. R. Porter, STFC Daresbury) dynamical allocation
+   !!             -   ! 2011-11  (C. Harris) decomposition changes for running with CICE
+   !!            3.6  ! 2012-05  (C. Calone, J. Simeon, G. Madec, C. Ethe) Add grid coarsening 
+   !!             -   ! 2014-12  (G. Madec) remove KPP scheme and cross-land advection (cla)
+   !!            4.0  ! 2016-10  (G. Madec, S. Flavoni)  domain configuration / user defined interface
+   !!----------------------------------------------------------------------
+
+   !!----------------------------------------------------------------------
+   !!   nemo_gcm      : solve ocean dynamics, tracer, biogeochemistry and/or sea-ice
+   !!   nemo_init     : initialization of the NEMO system
+   !!   nemo_ctl      : initialisation of the contol print
+   !!   nemo_closefile: close remaining open files
+   !!   nemo_alloc    : dynamical allocation
+   !!----------------------------------------------------------------------
+   USE step_oce       ! module used in the ocean time stepping module (step.F90)
+   USE phycst         ! physical constant                  (par_cst routine)
+   USE domain         ! domain initialization   (dom_init & dom_cfg routines)
+   USE closea         ! treatment of closed seas (for ln_closea)
+   USE usrdef_nam     ! user defined configuration
+   USE tideini        ! tidal components initialization   (tide_ini routine)
+   USE bdy_oce,  ONLY : ln_bdy
+   USE bdyini         ! open boundary cond. setting       (bdy_init routine)
+   USE istate         ! initial state setting          (istate_init routine)
+   USE ldfdyn         ! lateral viscosity setting      (ldfdyn_init routine)
+   USE ldftra         ! lateral diffusivity setting    (ldftra_init routine)
+   USE trdini         ! dyn/tra trends initialization     (trd_init routine)
+   USE asminc         ! assimilation increments     
+   USE asmbkg         ! writing out state trajectory
+   USE diaptr         ! poleward transports           (dia_ptr_init routine)
+   USE diadct         ! sections transports           (dia_dct_init routine)
+   USE diaobs         ! Observation diagnostics       (dia_obs_init routine)
+   USE diacfl         ! CFL diagnostics               (dia_cfl_init routine)
+   USE step           ! NEMO time-stepping                 (stp     routine)
+   USE icbini         ! handle bergs, initialisation
+   USE icbstp         ! handle bergs, calving, themodynamics and transport
+   USE cpl_oasis3     ! OASIS3 coupling
+   USE c1d            ! 1D configuration
+   USE step_c1d       ! Time stepping loop for the 1D configuration
+   USE dyndmp         ! Momentum damping
+   USE stopar         ! Stochastic param.: ???
+   USE stopts         ! Stochastic param.: ???
+   USE diurnal_bulk   ! diurnal bulk SST 
+   USE step_diu       ! diurnal bulk SST timestepping (called from here if run offline)
+   USE crsini         ! initialise grid coarsening utility
+   USE diatmb         ! Top,middle,bottom output
+   USE dia25h         ! 25h mean output
+   USE sbc_oce , ONLY : lk_oasis
+   USE wet_dry        ! Wetting and drying setting   (wad_init routine)
+#if defined key_top
+   USE trcini         ! passive tracer initialisation
+#endif
+#if defined key_nemocice_decomp
+   USE ice_domain_size, only: nx_global, ny_global
+#endif
+   !
+   USE lib_mpp        ! distributed memory computing
+   USE mppini         ! shared/distributed memory setting (mpp_init routine)
+   USE lbcnfd  , ONLY : isendto, nsndto, nfsloop, nfeloop   ! Setup of north fold exchanges 
+   USE lib_fortran    ! Fortran utilities (allows no signed zero when 'key_nosignedzero' defined)
+#if defined key_iomput
+   USE xios           ! xIOserver
+#endif
+#if defined key_agrif
+   USE agrif_all_update   ! Master Agrif update
+#endif
+
+   IMPLICIT NONE
+   PRIVATE
+
+   PUBLIC   nemo_gcm    ! called by model.F90
+   PUBLIC   nemo_init   ! needed by AGRIF
+   PUBLIC   nemo_alloc  ! needed by TAM
+
+   CHARACTER(lc) ::   cform_aaa="( /, 'AAAAAAAA', / ) "     ! flag for output listing
+
+#if defined key_mpp_mpi
+   INCLUDE 'mpif.h'
+#endif
+
+   !!----------------------------------------------------------------------
+   !! NEMO/OCE 4.0 , NEMO Consortium (2018)
+   !! $Id: nemogcm.F90 11098 2019-06-11 13:17:21Z agn $
+   !! Software governed by the CeCILL license (see ./LICENSE)
+   !!----------------------------------------------------------------------
+CONTAINS
+
+   SUBROUTINE nemo_gcm
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_gcm  ***
+      !!
+      !! ** Purpose :   NEMO solves the primitive equations on an orthogonal
+      !!              curvilinear mesh on the sphere.
+      !!
+      !! ** Method  : - model general initialization
+      !!              - launch the time-stepping (stp routine)
+      !!              - finalize the run by closing files and communications
+      !!
+      !! References : Madec, Delecluse, Imbard, and Levy, 1997:  internal report, IPSL.
+      !!              Madec, 2008, internal report, IPSL.
+      !!----------------------------------------------------------------------
+      INTEGER ::   istp   ! time step index
+      DOUBLE PRECISION :: mpi_wtime, sstart, send , tot_time , ssteptime , smstime
+      DOUBLE PRECISION :: gtot_time , gssteptime , gelapsed_time , step1time ,gstep1time,galltime
+      INTEGER :: rank, ierror, tag, status(MPI_STATUS_SIZE)
+      !!----------------------------------------------------------------------
+      !
+#if defined key_agrif
+      CALL Agrif_Init_Grids()      ! AGRIF: set the meshes
+#endif
+      !                            !-----------------------!
+      CALL nemo_init               !==  Initialisations  ==!
+      !                            !-----------------------!
+#if defined key_agrif
+      CALL Agrif_Declare_Var_dom   ! AGRIF: set the meshes for DOM
+      CALL Agrif_Declare_Var       !  "      "   "   "      "  DYN/TRA 
+# if defined key_top
+      CALL Agrif_Declare_Var_top   !  "      "   "   "      "  TOP
+# endif
+# if defined key_si3
+      CALL Agrif_Declare_Var_ice   !  "      "   "   "      "  Sea ice
+# endif
+#endif
+      ! check that all process are still there... If some process have an error,
+      ! they will never enter in step and other processes will wait until the end of the cpu time!
+      CALL mpp_max( 'nemogcm', nstop )
+
+      IF(lwp) WRITE(numout,cform_aaa)   ! Flag AAAAAAA
+
+      !                            !-----------------------!
+      !                            !==   time stepping   ==!
+      !                            !-----------------------!
+      istp = nit000
+      !
+#if defined key_c1d
+      DO WHILE ( istp <= nitend .AND. nstop == 0 )    !==  C1D time-stepping  ==!
+         CALL stp_c1d( istp )
+         istp = istp + 1
+      END DO
+#else
+      !
+# if defined key_agrif
+      !                                               !==  AGRIF time-stepping  ==!
+      CALL Agrif_Regrid()
+      !
+      ! Recursive update from highest nested level to lowest:
+      CALL Agrif_step_child_adj(Agrif_Update_All)
+      !
+      DO WHILE( istp <= nitend .AND. nstop == 0 )
+         CALL stp                         ! AGRIF: time stepping
+         istp = istp + 1
+      END DO
+      !
+      IF( .NOT. Agrif_Root() ) THEN
+         CALL Agrif_ParentGrid_To_ChildGrid()
+         IF( ln_diaobs )   CALL dia_obs_wri
+         IF( ln_timing )   CALL timing_finalize
+         CALL Agrif_ChildGrid_To_ParentGrid()
+      ENDIF
+      !
+# else
+      !
+      IF( .NOT.ln_diurnal_only ) THEN                 !==  Standard time-stepping  ==!
+         !
+         CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierror)
+         DO WHILE( istp <= nitend .AND. nstop == 0 )
+#if defined key_mpp_mpi
+            ncom_stp = istp
+            IF ( istp == ( nit000 + 1 ) ) elapsed_time = MPI_Wtime()
+            IF ( istp ==         nitend ) elapsed_time = MPI_Wtime() - elapsed_time
+#endif
+            sstart = MPI_Wtime()            
+            CALL stp        ( istp ) 
+            send = MPI_Wtime()
+
+            ssteptime = send-sstart
+            !==IF (rank == 0 ) print *, "Ozan Step ", istp, " - " , ssteptime , "s."==!
+            IF (istp == 1 ) THEN
+              step1time = ssteptime
+            ENDIF
+            IF (istp == 2 ) THEN
+              smstime = ssteptime
+              tot_time = ssteptime
+            ENDIF
+            IF (istp > 2 ) THEN
+              tot_time = tot_time+ssteptime
+              IF ( ssteptime>smstime ) smstime = ssteptime
+            ENDIF
+
+            istp = istp + 1
+         END DO
+         
+         !CALL MPI_REDUCE(tot_time,gtot_time, 1, mpi_double_precision, MPI_MAX, 0, mpi_comm_world,ierror)
+         !CALL MPI_REDUCE(smstime,gssteptime, 1, mpi_double_precision, MPI_MAX, 0, mpi_comm_world,ierror)
+         !CALL MPI_REDUCE(elapsed_time,gelapsed_time, 1, mpi_double_precision, MPI_MAX, 0, mpi_comm_world,ierror)
+         !CALL MPI_REDUCE(step1time,gstep1time, 1, mpi_double_precision, MPI_MAX, 0, mpi_comm_world,ierror)
+         !CALL MPI_REDUCE(step1time+tot_time,galltime, 1, mpi_double_precision, MPI_MAX, 0, mpi_comm_world,ierror)
+         !IF (rank == 0 ) print *, "BENCH DONE ",istp," " ,gstep1time," ", gssteptime , " " , gtot_time ," ",gelapsed_time, " ",galltime," s."  
+
+         print *, "BENCH DONE\t",istp,"\t" ,step1time,"\t", smstime , "\t" , tot_time ,"\t",elapsed_time
+         !
+      ELSE                                            !==  diurnal SST time-steeping only  ==!
+         !
+         DO WHILE( istp <= nitend .AND. nstop == 0 )
+            CALL stp_diurnal( istp )   ! time step only the diurnal SST 
+            istp = istp + 1
+         END DO
+         !
+      ENDIF
+      !
+# endif
+      !
+#endif
+      !
+      IF( ln_diaobs   )   CALL dia_obs_wri
+      !
+      IF( ln_icebergs )   CALL icb_end( nitend )
+
+      !                            !------------------------!
+      !                            !==  finalize the run  ==!
+      !                            !------------------------!
+      IF(lwp) WRITE(numout,cform_aaa)        ! Flag AAAAAAA
+      !
+      IF( nstop /= 0 .AND. lwp ) THEN        ! error print
+         WRITE(numout,cform_err)
+         WRITE(numout,*) '   ==>>>   nemo_gcm: a total of ', nstop, ' errors have been found'
+         WRITE(numout,*)
+      ENDIF
+      !
+      IF( ln_timing )   CALL timing_finalize
+      !
+      CALL nemo_closefile
+      !
+#if defined key_iomput
+                                    CALL xios_finalize  ! end mpp communications with xios
+      IF( lk_oasis     )            CALL cpl_finalize   ! end coupling and mpp communications with OASIS
+#else
+      IF    ( lk_oasis ) THEN   ;   CALL cpl_finalize   ! end coupling and mpp communications with OASIS
+      ELSEIF( lk_mpp   ) THEN   ;   CALL mppstop( ldfinal = .TRUE. )   ! end mpp communications
+      ENDIF
+#endif
+      !
+      IF(lwm) THEN
+         IF( nstop == 0 ) THEN   ;   STOP 0
+         ELSE                    ;   STOP 999
+         ENDIF
+      ENDIF
+      !
+   END SUBROUTINE nemo_gcm
+
+
+   SUBROUTINE nemo_init
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_init  ***
+      !!
+      !! ** Purpose :   initialization of the NEMO GCM
+      !!----------------------------------------------------------------------
+      INTEGER  ::   ji                 ! dummy loop indices
+      INTEGER  ::   ios, ilocal_comm   ! local integers
+      CHARACTER(len=120), DIMENSION(60) ::   cltxt, cltxt2, clnam
+      !!
+      NAMELIST/namctl/ ln_ctl   , sn_cfctl, nn_print, nn_ictls, nn_ictle,   &
+         &             nn_isplt , nn_jsplt, nn_jctls, nn_jctle,             &
+         &             ln_timing, ln_diacfl
+      NAMELIST/namcfg/ ln_read_cfg, cn_domcfg, ln_closea, ln_write_cfg, cn_domcfg_out, ln_use_jattr
+      !!----------------------------------------------------------------------
+      !
+      cltxt  = ''
+      cltxt2 = ''
+      clnam  = ''  
+      cxios_context = 'nemo'
+      !
+      !                             ! Open reference namelist and configuration namelist files
+      CALL ctl_opn( numnam_ref, 'namelist_ref', 'OLD', 'FORMATTED', 'SEQUENTIAL', -1, 6, .FALSE. )
+      CALL ctl_opn( numnam_cfg, 'namelist_cfg', 'OLD', 'FORMATTED', 'SEQUENTIAL', -1, 6, .FALSE. )
+      !
+      REWIND( numnam_ref )              ! Namelist namctl in reference namelist
+      READ  ( numnam_ref, namctl, IOSTAT = ios, ERR = 901 )
+901   IF( ios /= 0 )   CALL ctl_nam ( ios , 'namctl in reference namelist', .TRUE. )
+      REWIND( numnam_cfg )              ! Namelist namctl in confguration namelist
+      READ  ( numnam_cfg, namctl, IOSTAT = ios, ERR = 902 )
+902   IF( ios >  0 )   CALL ctl_nam ( ios , 'namctl in configuration namelist', .TRUE. )
+      !
+      REWIND( numnam_ref )              ! Namelist namcfg in reference namelist
+      READ  ( numnam_ref, namcfg, IOSTAT = ios, ERR = 903 )
+903   IF( ios /= 0 )   CALL ctl_nam ( ios , 'namcfg in reference namelist', .TRUE. )
+      REWIND( numnam_cfg )              ! Namelist namcfg in confguration namelist
+      READ  ( numnam_cfg, namcfg, IOSTAT = ios, ERR = 904 )
+904   IF( ios >  0 )   CALL ctl_nam ( ios , 'namcfg in configuration namelist', .TRUE. )   
+
+      !                             !--------------------------!
+      !                             !  Set global domain size  !   (control print return in cltxt2)
+      !                             !--------------------------!
+      IF( ln_read_cfg ) THEN              ! Read sizes in domain configuration file
+         CALL domain_cfg ( cltxt2,        cn_cfg, nn_cfg, jpiglo, jpjglo, jpkglo, jperio )
+         !
+      ELSE                                ! user-defined namelist
+         CALL usr_def_nam( cltxt2, clnam, cn_cfg, nn_cfg, jpiglo, jpjglo, jpkglo, jperio )
+      ENDIF
+      !
+      !
+      !                             !--------------------------------------------!
+      !                             !  set communicator & select the local node  !
+      !                             !  NB: mynode also opens output.namelist.dyn !
+      !                             !      on unit number numond on first proc   !
+      !                             !--------------------------------------------!
+#if defined key_iomput
+      IF( Agrif_Root() ) THEN
+         IF( lk_oasis ) THEN
+            CALL cpl_init( "oceanx", ilocal_comm )                               ! nemo local communicator given by oasis
+            CALL xios_initialize( "not used"       ,local_comm= ilocal_comm )    ! send nemo communicator to xios
+         ELSE
+            CALL xios_initialize( "for_xios_mpi_id",return_comm=ilocal_comm )    ! nemo local communicator given by xios
+         ENDIF
+      ENDIF
+      ! Nodes selection (control print return in cltxt)
+      narea = mynode( cltxt, 'output.namelist.dyn', numnam_ref, numnam_cfg, numond , nstop, ilocal_comm )
+#else
+      IF( lk_oasis ) THEN
+         IF( Agrif_Root() ) THEN
+            CALL cpl_init( "oceanx", ilocal_comm )          ! nemo local communicator given by oasis
+         ENDIF
+         ! Nodes selection (control print return in cltxt)
+         narea = mynode( cltxt, 'output.namelist.dyn', numnam_ref, numnam_cfg, numond , nstop, ilocal_comm )
+      ELSE
+         ilocal_comm = 0                                    ! Nodes selection (control print return in cltxt)
+         narea = mynode( cltxt, 'output.namelist.dyn', numnam_ref, numnam_cfg, numond , nstop )
+      ENDIF
+#endif
+
+      narea = narea + 1                                     ! mynode return the rank of proc (0 --> jpnij -1 )
+
+      IF( sn_cfctl%l_config ) THEN
+         ! Activate finer control of report outputs
+         ! optionally switch off output from selected areas (note this only
+         ! applies to output which does not involve global communications)
+         IF( ( narea < sn_cfctl%procmin .OR. narea > sn_cfctl%procmax  ) .OR. &
+           & ( MOD( narea - sn_cfctl%procmin, sn_cfctl%procincr ) /= 0 ) )    &
+           &   CALL nemo_set_cfctl( sn_cfctl, .FALSE., .FALSE. )
+      ELSE
+         ! Use ln_ctl to turn on or off all options.
+         CALL nemo_set_cfctl( sn_cfctl, ln_ctl, .TRUE. )
+      ENDIF
+
+      lwm = (narea == 1)                                    ! control of output namelists
+      lwp = (narea == 1) .OR. ln_ctl                        ! control of all listing output print
+
+      IF(lwm) THEN               ! write merged namelists from earlier to output namelist 
+         !                       ! now that the file has been opened in call to mynode. 
+         !                       ! NB: nammpp has already been written in mynode (if lk_mpp_mpi)
+         WRITE( numond, namctl )
+         WRITE( numond, namcfg )
+         IF( .NOT.ln_read_cfg ) THEN
+            DO ji = 1, SIZE(clnam)
+               IF( TRIM(clnam(ji)) /= '' )   WRITE(numond, * ) clnam(ji)     ! namusr_def print
+            END DO
+         ENDIF
+      ENDIF
+
+      IF(lwp) THEN                            ! open listing units
+         !
+         CALL ctl_opn( numout, 'ocean.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, 6, .FALSE., narea )
+         !
+         WRITE(numout,*)
+         WRITE(numout,*) '   CNRS - NERC - Met OFFICE - MERCATOR-ocean - INGV - CMCC'
+         WRITE(numout,*) '                       NEMO team'
+         WRITE(numout,*) '            Ocean General Circulation Model'
+         WRITE(numout,*) '                NEMO version 4.0  (2019) '
+         WRITE(numout,*)
+         WRITE(numout,*) "           ._      ._      ._      ._      ._    "
+         WRITE(numout,*) "       _.-._)`\_.-._)`\_.-._)`\_.-._)`\_.-._)`\_ "
+         WRITE(numout,*)
+         WRITE(numout,*) "           o         _,           _,             "
+         WRITE(numout,*) "            o      .' (        .-' /             "
+         WRITE(numout,*) "           o     _/..._'.    .'   /              "
+         WRITE(numout,*) "      (    o .-'`      ` '-./  _.'               "
+         WRITE(numout,*) "       )    ( o)           ;= <_         (       "
+         WRITE(numout,*) "      (      '-.,\\__ __.-;`\   '.        )      "
+         WRITE(numout,*) "       )  )       \) |`\ \)  '.   \      (   (   "
+         WRITE(numout,*) "      (  (           \_/       '-._\      )   )  "
+         WRITE(numout,*) "       )  ) jgs                     `    (   (   "
+         WRITE(numout,*) "     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ "
+         WRITE(numout,*)
+         
+         DO ji = 1, SIZE(cltxt)
+            IF( TRIM(cltxt (ji)) /= '' )   WRITE(numout,*) TRIM(cltxt(ji))    ! control print of mynode
+         END DO
+         WRITE(numout,*)
+         WRITE(numout,*)
+         DO ji = 1, SIZE(cltxt2)
+            IF( TRIM(cltxt2(ji)) /= '' )   WRITE(numout,*) TRIM(cltxt2(ji))   ! control print of domain size
+         END DO
+         !
+         WRITE(numout,cform_aaa)                                        ! Flag AAAAAAA
+         !
+      ENDIF
+      ! open /dev/null file to be able to supress output write easily
+      CALL ctl_opn( numnul, '/dev/null', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, 6, .FALSE. )
+      !
+      !                                      ! Domain decomposition
+      CALL mpp_init                          ! MPP
+
+      ! Now we know the dimensions of the grid and numout has been set: we can allocate arrays
+      CALL nemo_alloc()
+
+      !                             !-------------------------------!
+      !                             !  NEMO general initialization  !
+      !                             !-------------------------------!
+
+      CALL nemo_ctl                          ! Control prints
+      !
+      !                                      ! General initialization
+      IF( ln_timing    )   CALL timing_init     ! timing
+      IF( ln_timing    )   CALL timing_start( 'nemo_init')
+      !
+                           CALL     phy_cst         ! Physical constants
+                           CALL     eos_init        ! Equation of state
+      IF( lk_c1d       )   CALL     c1d_init        ! 1D column configuration
+                           CALL     wad_init        ! Wetting and drying options
+                           CALL     dom_init("OPA") ! Domain
+      IF( ln_crs       )   CALL     crs_init        ! coarsened grid: domain initialization 
+      IF( ln_ctl       )   CALL prt_ctl_init        ! Print control
+      
+      CALL diurnal_sst_bulk_init                ! diurnal sst
+      IF( ln_diurnal   )   CALL diurnal_sst_coolskin_init   ! cool skin   
+      !                            
+      IF( ln_diurnal_only ) THEN                   ! diurnal only: a subset of the initialisation routines
+         CALL  istate_init                            ! ocean initial state (Dynamics and tracers)
+         CALL     sbc_init                            ! Forcings : surface module
+         CALL tra_qsr_init                            ! penetrative solar radiation qsr
+         IF( ln_diaobs ) THEN                         ! Observation & model comparison
+            CALL dia_obs_init                            ! Initialize observational data
+            CALL dia_obs( nit000 - 1 )                   ! Observation operator for restart
+         ENDIF     
+         IF( lk_asminc )   CALL asm_inc_init          ! Assimilation increments
+         !
+         RETURN                                       ! end of initialization
+      ENDIF
+      
+                           CALL  istate_init    ! ocean initial state (Dynamics and tracers)
+
+      !                                      ! external forcing 
+                           CALL    tide_init    ! tidal harmonics
+                           CALL     sbc_init    ! surface boundary conditions (including sea-ice)
+                           CALL     bdy_init    ! Open boundaries initialisation
+
+      !                                      ! Ocean physics
+                           CALL zdf_phy_init    ! Vertical physics
+                                     
+      !                                         ! Lateral physics
+                           CALL ldf_tra_init      ! Lateral ocean tracer physics
+                           CALL ldf_eiv_init      ! eddy induced velocity param.
+                           CALL ldf_dyn_init      ! Lateral ocean momentum physics
+
+      !                                      ! Active tracers
+      IF( ln_traqsr    )   CALL tra_qsr_init      ! penetrative solar radiation qsr
+                           CALL tra_bbc_init      ! bottom heat flux
+                           CALL tra_bbl_init      ! advective (and/or diffusive) bottom boundary layer scheme
+                           CALL tra_dmp_init      ! internal tracer damping
+                           CALL tra_adv_init      ! horizontal & vertical advection
+                           CALL tra_ldf_init      ! lateral mixing
+
+      !                                      ! Dynamics
+      IF( lk_c1d       )   CALL dyn_dmp_init      ! internal momentum damping
+                           CALL dyn_adv_init      ! advection (vector or flux form)
+                           CALL dyn_vor_init      ! vorticity term including Coriolis
+                           CALL dyn_ldf_init      ! lateral mixing
+                           CALL dyn_hpg_init      ! horizontal gradient of Hydrostatic pressure
+                           CALL dyn_spg_init      ! surface pressure gradient
+
+#if defined key_top
+      !                                      ! Passive tracers
+                           CALL     trc_init
+#endif
+      IF( l_ldfslp     )   CALL ldf_slp_init    ! slope of lateral mixing
+
+      !                                      ! Icebergs
+                           CALL icb_init( rdt, nit000)   ! initialise icebergs instance
+
+      !                                      ! Misc. options
+                           CALL sto_par_init    ! Stochastic parametrization
+      IF( ln_sto_eos   )   CALL sto_pts_init    ! RRandom T/S fluctuations
+     
+      !                                      ! Diagnostics
+      IF( lk_floats    )   CALL     flo_init    ! drifting Floats
+      IF( ln_diacfl    )   CALL dia_cfl_init    ! Initialise CFL diagnostics
+                           CALL dia_ptr_init    ! Poleward TRansports initialization
+      IF( lk_diadct    )   CALL dia_dct_init    ! Sections tranports
+                           CALL dia_hsb_init    ! heat content, salt content and volume budgets
+                           CALL     trd_init    ! Mixed-layer/Vorticity/Integral constraints trends
+                           CALL dia_obs_init    ! Initialize observational data
+                           CALL dia_tmb_init    ! TMB outputs
+                           CALL dia_25h_init    ! 25h mean  outputs
+      IF( ln_diaobs    )   CALL dia_obs( nit000-1 )   ! Observation operator for restart
+
+      !                                      ! Assimilation increments
+      IF( lk_asminc    )   CALL asm_inc_init    ! Initialize assimilation increments
+      !
+      IF(lwp) WRITE(numout,cform_aaa)           ! Flag AAAAAAA
+      !
+      IF( ln_timing    )   CALL timing_stop( 'nemo_init')
+      !
+   END SUBROUTINE nemo_init
+
+
+   SUBROUTINE nemo_ctl
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_ctl  ***
+      !!
+      !! ** Purpose :   control print setting
+      !!
+      !! ** Method  : - print namctl information and check some consistencies
+      !!----------------------------------------------------------------------
+      !
+      IF(lwp) THEN                  ! control print
+         WRITE(numout,*)
+         WRITE(numout,*) 'nemo_ctl: Control prints'
+         WRITE(numout,*) '~~~~~~~~'
+         WRITE(numout,*) '   Namelist namctl'
+         WRITE(numout,*) '      run control (for debugging)     ln_ctl     = ', ln_ctl
+         WRITE(numout,*) '       finer control over o/p sn_cfctl%l_config  = ', sn_cfctl%l_config
+         WRITE(numout,*) '                              sn_cfctl%l_runstat = ', sn_cfctl%l_runstat
+         WRITE(numout,*) '                              sn_cfctl%l_trcstat = ', sn_cfctl%l_trcstat
+         WRITE(numout,*) '                              sn_cfctl%l_oceout  = ', sn_cfctl%l_oceout
+         WRITE(numout,*) '                              sn_cfctl%l_layout  = ', sn_cfctl%l_layout
+         WRITE(numout,*) '                              sn_cfctl%l_mppout  = ', sn_cfctl%l_mppout
+         WRITE(numout,*) '                              sn_cfctl%l_mpptop  = ', sn_cfctl%l_mpptop
+         WRITE(numout,*) '                              sn_cfctl%procmin   = ', sn_cfctl%procmin  
+         WRITE(numout,*) '                              sn_cfctl%procmax   = ', sn_cfctl%procmax  
+         WRITE(numout,*) '                              sn_cfctl%procincr  = ', sn_cfctl%procincr 
+         WRITE(numout,*) '                              sn_cfctl%ptimincr  = ', sn_cfctl%ptimincr 
+         WRITE(numout,*) '      level of print                  nn_print   = ', nn_print
+         WRITE(numout,*) '      Start i indice for SUM control  nn_ictls   = ', nn_ictls
+         WRITE(numout,*) '      End i indice for SUM control    nn_ictle   = ', nn_ictle
+         WRITE(numout,*) '      Start j indice for SUM control  nn_jctls   = ', nn_jctls
+         WRITE(numout,*) '      End j indice for SUM control    nn_jctle   = ', nn_jctle
+         WRITE(numout,*) '      number of proc. following i     nn_isplt   = ', nn_isplt
+         WRITE(numout,*) '      number of proc. following j     nn_jsplt   = ', nn_jsplt
+         WRITE(numout,*) '      timing by routine               ln_timing  = ', ln_timing
+         WRITE(numout,*) '      CFL diagnostics                 ln_diacfl  = ', ln_diacfl
+      ENDIF
+      !
+      nprint    = nn_print          ! convert DOCTOR namelist names into OLD names
+      nictls    = nn_ictls
+      nictle    = nn_ictle
+      njctls    = nn_jctls
+      njctle    = nn_jctle
+      isplt     = nn_isplt
+      jsplt     = nn_jsplt
+
+      IF(lwp) THEN                  ! control print
+         WRITE(numout,*)
+         WRITE(numout,*) '   Namelist namcfg'
+         WRITE(numout,*) '      read domain configuration file                ln_read_cfg      = ', ln_read_cfg
+         WRITE(numout,*) '         filename to be read                           cn_domcfg     = ', TRIM(cn_domcfg)
+         WRITE(numout,*) '         keep closed seas in the domain (if exist)     ln_closea     = ', ln_closea
+         WRITE(numout,*) '      create a configuration definition file        ln_write_cfg     = ', ln_write_cfg
+         WRITE(numout,*) '         filename to be written                        cn_domcfg_out = ', TRIM(cn_domcfg_out)
+         WRITE(numout,*) '      use file attribute if exists as i/p j-start   ln_use_jattr     = ', ln_use_jattr
+      ENDIF
+      IF( .NOT.ln_read_cfg )   ln_closea = .false.   ! dealing possible only with a domcfg file
+      !
+      !                             ! Parameter control
+      !
+      IF( ln_ctl ) THEN                 ! sub-domain area indices for the control prints
+         IF( lk_mpp .AND. jpnij > 1 ) THEN
+            isplt = jpni   ;   jsplt = jpnj   ;   ijsplt = jpni*jpnj   ! the domain is forced to the real split domain
+         ELSE
+            IF( isplt == 1 .AND. jsplt == 1  ) THEN
+               CALL ctl_warn( ' - isplt & jsplt are equal to 1',   &
+                  &           ' - the print control will be done over the whole domain' )
+            ENDIF
+            ijsplt = isplt * jsplt            ! total number of processors ijsplt
+         ENDIF
+         IF(lwp) WRITE(numout,*)'          - The total number of processors over which the'
+         IF(lwp) WRITE(numout,*)'            print control will be done is ijsplt : ', ijsplt
+         !
+         !                              ! indices used for the SUM control
+         IF( nictls+nictle+njctls+njctle == 0 )   THEN    ! print control done over the default area
+            lsp_area = .FALSE.
+         ELSE                                             ! print control done over a specific  area
+            lsp_area = .TRUE.
+            IF( nictls < 1 .OR. nictls > jpiglo )   THEN
+               CALL ctl_warn( '          - nictls must be 1<=nictls>=jpiglo, it is forced to 1' )
+               nictls = 1
+            ENDIF
+            IF( nictle < 1 .OR. nictle > jpiglo )   THEN
+               CALL ctl_warn( '          - nictle must be 1<=nictle>=jpiglo, it is forced to jpiglo' )
+               nictle = jpiglo
+            ENDIF
+            IF( njctls < 1 .OR. njctls > jpjglo )   THEN
+               CALL ctl_warn( '          - njctls must be 1<=njctls>=jpjglo, it is forced to 1' )
+               njctls = 1
+            ENDIF
+            IF( njctle < 1 .OR. njctle > jpjglo )   THEN
+               CALL ctl_warn( '          - njctle must be 1<=njctle>=jpjglo, it is forced to jpjglo' )
+               njctle = jpjglo
+            ENDIF
+         ENDIF
+      ENDIF
+      !
+      IF( 1._wp /= SIGN(1._wp,-0._wp)  )   CALL ctl_stop( 'nemo_ctl: The intrinsec SIGN function follows f2003 standard.',  &
+         &                                                'Compile with key_nosignedzero enabled:',   &
+         &                                                '--> add -Dkey_nosignedzero to the definition of %CPP in your arch file' )
+      !
+#if defined key_agrif
+      IF( ln_timing )   CALL ctl_stop( 'AGRIF not implemented with ln_timing = true')
+#endif
+      !
+   END SUBROUTINE nemo_ctl
+
+
+   SUBROUTINE nemo_closefile
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_closefile  ***
+      !!
+      !! ** Purpose :   Close the files
+      !!----------------------------------------------------------------------
+      !
+      IF( lk_mpp )   CALL mppsync
+      !
+      CALL iom_close                                 ! close all input/output files managed by iom_*
+      !
+      IF( numstp          /= -1 )   CLOSE( numstp          )   ! time-step file
+      IF( numrun          /= -1 )   CLOSE( numrun          )   ! run statistics file
+      IF( numnam_ref      /= -1 )   CLOSE( numnam_ref      )   ! oce reference namelist
+      IF( numnam_cfg      /= -1 )   CLOSE( numnam_cfg      )   ! oce configuration namelist
+      IF( lwm.AND.numond  /= -1 )   CLOSE( numond          )   ! oce output namelist
+      IF( numnam_ice_ref  /= -1 )   CLOSE( numnam_ice_ref  )   ! ice reference namelist
+      IF( numnam_ice_cfg  /= -1 )   CLOSE( numnam_ice_cfg  )   ! ice configuration namelist
+      IF( lwm.AND.numoni  /= -1 )   CLOSE( numoni          )   ! ice output namelist
+      IF( numevo_ice      /= -1 )   CLOSE( numevo_ice      )   ! ice variables (temp. evolution)
+      IF( numout          /=  6 )   CLOSE( numout          )   ! standard model output file
+      IF( numdct_vol      /= -1 )   CLOSE( numdct_vol      )   ! volume transports
+      IF( numdct_heat     /= -1 )   CLOSE( numdct_heat     )   ! heat transports
+      IF( numdct_salt     /= -1 )   CLOSE( numdct_salt     )   ! salt transports
+      !
+      numout = 6                                     ! redefine numout in case it is used after this point...
+      !
+   END SUBROUTINE nemo_closefile
+
+
+   SUBROUTINE nemo_alloc
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_alloc  ***
+      !!
+      !! ** Purpose :   Allocate all the dynamic arrays of the OPA modules
+      !!
+      !! ** Method  :
+      !!----------------------------------------------------------------------
+      USE diawri    , ONLY : dia_wri_alloc
+      USE dom_oce   , ONLY : dom_oce_alloc
+      USE trc_oce   , ONLY : trc_oce_alloc
+      USE bdy_oce   , ONLY : bdy_oce_alloc
+#if defined key_diadct 
+      USE diadct    , ONLY : diadct_alloc 
+#endif 
+      !
+      INTEGER :: ierr
+      !!----------------------------------------------------------------------
+      !
+      ierr =        oce_alloc    ()    ! ocean 
+      ierr = ierr + dia_wri_alloc()
+      ierr = ierr + dom_oce_alloc()    ! ocean domain
+      ierr = ierr + zdf_oce_alloc()    ! ocean vertical physics
+      ierr = ierr + trc_oce_alloc()    ! shared TRC / TRA arrays
+      ierr = ierr + bdy_oce_alloc()    ! bdy masks (incl. initialization)
+      !
+#if defined key_diadct 
+      ierr = ierr + diadct_alloc ()    ! 
+#endif 
+      !
+      CALL mpp_sum( 'nemogcm', ierr )
+      IF( ierr /= 0 )   CALL ctl_stop( 'STOP', 'nemo_alloc: unable to allocate standard ocean arrays' )
+      !
+   END SUBROUTINE nemo_alloc
+
+   SUBROUTINE nemo_set_cfctl(sn_cfctl, setto, for_all )
+      !!----------------------------------------------------------------------
+      !!                     ***  ROUTINE nemo_set_cfctl  ***
+      !!
+      !! ** Purpose :   Set elements of the output control structure to setto.
+      !!                for_all should be .false. unless all areas are to be
+      !!                treated identically.
+      !!
+      !! ** Method  :   Note this routine can be used to switch on/off some
+      !!                types of output for selected areas but any output types
+      !!                that involve global communications (e.g. mpp_max, glob_sum)
+      !!                should be protected from selective switching by the
+      !!                for_all argument
+      !!----------------------------------------------------------------------
+      LOGICAL :: setto, for_all
+      TYPE(sn_ctl) :: sn_cfctl
+      !!----------------------------------------------------------------------
+      IF( for_all ) THEN
+         sn_cfctl%l_runstat = setto
+         sn_cfctl%l_trcstat = setto
+      ENDIF
+      sn_cfctl%l_oceout  = setto
+      sn_cfctl%l_layout  = setto
+      sn_cfctl%l_mppout  = setto
+      sn_cfctl%l_mpptop  = setto
+   END SUBROUTINE nemo_set_cfctl
+
+   !!======================================================================
+END MODULE nemogcm
+
--- a/pfarm/PFARM_Build_Run_README.txt
+++ b/pfarm/PFARM_Build_Run_README.txt
 ========================================================================
-README file for PRACE Accelerator Benchmark Code PFARM (stage EXDIG, program RMX95)
+README file for PRACE Accelerator Benchmark Code PFARM (stage EXDIG)
 ========================================================================
 Author: Andrew Sunderland (andrew.sunderland@stfc.ac.uk).

 The code download should contain the following directories:

-benchmark/RMX_MPI_OMP: RMX source files for running on Host or KNL (using serial or threaded LAPACK or MKL)
-benchmark/RMX_MAGMA_GPU: RMX source for running on CPU/GPU nodes using MAGMA
-benchmark/run: Run directory with input files
-benchmark/xdr: XDR library src files and static XDR library file
-benchmark/data: Data files for the benchmark test cases
+pfarm/cpu: Source files and example scripts for running on CPUs (using serial or threaded LAPACK or MKL)
+pfarm/gpu: Source files and example scripts for running on CPU/GPU nodes (using serial or threaded LAPACK or MKL and MAGMA)
+benchmark/lib: Directory of library files used (static XDR library file)
+benchmark/src_xdr: XDR library src files
+benchmark/data: Data files for the benchmark test cases (to be created and downloaded separately (see below))
+
+* Download benchmark data files
+Create data directories:
+$> cd pfarm
+$> mkdir data
+$> cd data
+$> mkdir test_case_1_atom
+$> mkdir test_case_2_mol
+
+
+Copy files phzin.ctl, XJTARMOM and HX0J030 to the test_case_1_atom directory from:
+https://repository.prace-ri.eu/ueabs/PFARM/2.2/test_case_1_atom/phzin.ctl 
+https://repository.prace-ri.eu/ueabs/PFARM/2.2/test_case_1_atom/XJTARMOM 
+https://repository.prace-ri.eu/ueabs/PFARM/2.2/test_case_1_atom/HXJ030
+
+Copy files phzin.ctl and H to the test_case_2_mol directory from:
+https://repository.prace-ri.eu/ueabs/PFARM/2.2/test_case_2_mol/phzin.ctl 
+https://repository.prace-ri.eu/ueabs/PFARM/2.2/test_case_2_mol/H 

 The code uses the eXternal Data Representation library (XDR) for cross-platform
 compatibility of unformatted data files. The XDR source files are provided with this code bundle.
@@ -17,111 +35,116 @@ and can be obtained from various sources, including
 http://meteora.ucsd.edu/~pierce/fxdr_home_page.html
 http://people.redhat.com/rjones/portablexdr/

----------------------------------------------------------------------------
-* Installing (MAGMA GPU Only)
-Download MAGMA (current version magma-2.2.0)  from http://icl.utk.edu/magma/
-Install MAGMA : Modify the make.inc file to indicate your C/C++
- compiler, Fortran compiler, and determine where CUDA, CPU BLAS, and 
- LAPACK are installed on your system. Refer to MAGMA documentation for further details
----------------------------------------------------------------------------
 * Install XDR
 Build XDR library: 
 update DEFS file for your compiler and environment
-$> cd xdr
+$> cd src_xdr
 $> make
 (ignore warnings related to float/double type mismatches in xdr_rmat64.c - this is not relevant for this benchmark) 
 The validity of the XDR library can be tested by running test_xdr
 $> ./test_xdr
+rpc headers may not be available for XDR on the target platform, leading to compilation errors of the type:
+cannot open source file "rpc/rpc.h"
+  #include <rpc/rpc.h>

-ââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
-* Install RMX_MPI_OMP
-$> cd RMX_MPI_OMP
+For this case use the make include file DEFS_Intel_rpc
+
+
+* Install CPU version (MPI and OpenMP)
+$> cd cpu
 Update DEFS file for your setup, ensuring you are linking to a LAPACK or MKL library (or equivalent).
 This is usually facilitated by e.g. compiling with -mkl=parallel (Intel compiler) or loading the appropriate library modules.
+
+** To install the atomic version of the code (recommended as the default benchmark)
+$> cd src_mpi_omp_atom
 $> make

-* Install RMX_MAGMA_GPU
-Set MAGMADIR, CUDADIR environment variables to point to MAGMA and CUDA installations
-$> cd RMX_MAGMA_GPU
-Update DEFS file for your setup
+** To install the molecular version of the code
+$> cd src_mpi_omp_mol
 $> make
+The -ltirpc option for 'STATIC_LIBS' in 'DEFS' should only be included when the XDR library has used 'DEFS_Intel_rpc'. 

----------------------------------------------------------------------------
-* Run RMX
-==========
-The RMX application can be run by running the executable "rmx95"
-For the FEIII dataset, the program requires the following input files from the data directory linked to the run directory :
-phzin.ctl
-XJTARMOM
-HXJ030
+* Install GPU version (MPI / OpenMP / MAGMA / CUDA )
+Set the MAGMADIR, CUDADIR environment variables to point to MAGMA and CUDA installations.
+The numerical library MAGMA may be provided through the modules system of the platform.
+Please check target platform user guides for linking instructions.

-These files are located in benchmark/run
-A guide to each of the variables in the namelist in phzin.ctl can be found at:
-https://hpcforge.org/plugins/mediawiki/wiki/pfarm/images/9/99/Phz_rep.pdf
-However, it is recommended that these inputs are not changed for the benchmark runs and
-problem size, runtime etc, are better controlled via the environment variables listed below.
+$> load module magma
+If unavailable via a module, then MAGMA may need to be installed (see below)
+$> cd gpu

-* Run on CPUs (or KNLs)
-=======================
-A typical PBS script to run the RMX_MPI_OMP benchmark on 4 KNL nodes (4 MPI tasks with 64 threads per MPI task) is listed below:
-Settings will vary according to your local environment.
+Update DEFS file for your setup

-#PBS -N rmx95_4x64
-#PBS -l select=4
-#PBS -l walltime=01:00:00
-#PBS -A my_account_id
+* To install the atomic version of the code (recommended as the default benchmark)
+$> cd src_mpi_gpu_atom
+$> make

-cd $PBS_O_WORKDIR
-export OMP_NUM_THREADS=64
+** To install the molecular version of the code
+$> cd src_mpi_gpu_mol
+$> make
+The -ltirpc option for 'STATIC_LIBS' in 'DEFS' should only be included when the XDR library has used 'DEFS_Intel_rpc'.

-# Set some code-specific environment variables (for details see below) e.g.:
-export RMX_NSECT_FINE=4
-export RMX_NSECT_COARSE=4
-export RMX_NL_FINE=12
-export RMX_NL_COARSE=6
+----------------------------------------------------------------------------
+* Installing (MAGMA for GPU Only)
+Download MAGMA (current version magma-2.2.0)  from http://icl.utk.edu/magma/
+Install MAGMA : Modify the make.inc file to indicate your C/C++
+ compiler, Fortran compiler, and determine where CUDA, CPU BLAS, and 
+ LAPACK are installed on your system. Refer to MAGMA documentation for further details
+----------------------------------------------------------------------------

-# Run on 4 nodes with 1 MPI task per node and 24 OpenMP tasks per node
-aprun -N 1 -n 4 -d $OMP_NUM_THREADS ./rmx95

-* Run on CPU/GPU nodes
-======================
-A typical PBS script to run the RMX_MPI_GPU benchmark on 4 CPU nodes (with 2 GPUs per CPU) is listed below:
-Settings will vary according to your local environment.
+----------------------------------------------------------------------------
+* Running PFARM
+=================

-#PBS -N rmx95_4MPIx2GPU
-#PBS -l select=4
-#PBS -l walltime=01:00:00
-#PBS -A my_account_id
+For the atomic dataset, the program requires the following input files,
+located in data/test_case_1_atom:
+phzin.ctl
+XJTARMOM
+HXJ030

-cd $PBS_O_WORKDIR
-# Set number of GPUs per node to use (MAGMA auto-parallelises over these)
-export RMX_NGPU=2  
+For the molecular dataset, the program requires the following input files,
+located in data/test_case_2_mol:
+phzin.ctl
+H

-# Set some code-specific environment variables (for details see below) e.g.:
-export RMX_NSECT_FINE=4
-export RMX_NSECT_COARSE=4
-export RMX_NL_FINE=12
-export RMX_NL_COARSE=6
+It is recommended that the settings in the input file phzin.ctl are not changed for the benchmark runs and
+problem size, runtime etc, are better controlled via the environment variables listed below.

-# Run on 4 nodes with 1 MPI task per node and 2 GPUs per node
-aprun -N 1 -n 4 ./rmx95
+To setup run directories with the correct executables and datafiles, bash script files are provided:
+cpu/setup_run_cpu_atom.scr
+cpu/setup_run_cpu_mol.scr
+gpu/setup_run_gpu_atom.scr
+gpu/setup_run_gpu_mol.scr

----------------------------------------------------------------------------
+Example submission job scripts for cpu / gpu / atomic and molecular cases are provided in the directories
+cpu/example_job_scripts
+gpu/example_job_scripts

 * Run-time environment variable settings

+It is recommended that the RMX (PFARM) specific environment variables are set to those specified in
+the example scripts, as this provides a suitably sized, physically meaningful benchmark. However, if
+users wish to experiment with settings there is a guide here.
+
 The following environmental variables that e.g. can be set inside the script allow the H sector matrix 
 to easily change dimensions and also allows the number of sectors to change when undertaking benchmarks.
 These can be adapted by the user to suit benchmark load requirements e.g. short vs long runs.
-Each MPI Task will pickup a sector calculation which will then be distributed amongst available threads per node (for CPU and KNL) or offloaded (for GPU).
-The distribution among MPI tasks is simple round-robin.
+Each MPI Task will pickup a sector calculation which will then be distributed amongst available threads per node (for CPU and KNL)
+or offloaded (for GPU). The maximum number of MPI tasks for a region calculation should not exceed the number of sectors specified.
+There is no limit for threads, though for efficient performance on current hardware, it would be generaly recommended to set between
+16 to 64 threads per MPI tasks. The distribution of sectors among MPI tasks is simple round-robin.
 
 RMX_NGPU : refers to the number of shared GPUs per node (only for RMX_MAGMA_GPU)
-RMX_NSECT_FINE : sets the number of sectors for the Fine region (it is recommended to set this to a low number if the sector Hamiltonian matrix dimension is large).
-RMX_NSECT_COARSE : sets the number of sectors for the Coarse region (it is recommended to set this to a low number if the sector Hamiltonian matrix dimension is large).
-RMX_NL_FINE : sets the number of basis functions for the Fine region sector calculations (this will determine the size of the sector Hamiltonian matrix). 
-RMX_NL_COARSE : sets the number of basis functions for the Coarse region sector calculations (this will determine the size of the sector Hamiltonian matrix). 
-Hint: To aid scaling across nodes, the number of MPI tasks in the job script should ideally be a factor of RMX_NSECT_FINE.
+RMX_NSECT_FINE : sets the number of sectors for the Fine region (e.g. 16 for smaller runs, 256 for larger-scale runs).
+The molecular case is limited to a maximum of 512 sectors for this benchmark.
+RMX_NSECT_COARSE : sets the number of sectors for the Coarse region (e.g. 16 for smaller runs, 256 for larger-scale runs).
+The molecular case is limited to a maximum of 512 sectors for this benchmark.
+RMX_NL_FINE : sets the number of basis functions for the Fine region sector calculations
+(this will determine the size of the sector Hamiltonian matrix for the Fine region calculations). 
+RMX_NL_COARSE : sets the number of basis functions for the Coarse region sector calculations
+(this will determine the size of the sector Hamiltonian matrix for the Coarse region calculations). 
+Hint: To aid ideal scaling across nodes, the number of MPI tasks in the job script should ideally be a factor of RMX_NSECT_FINE.

 For representative test cases: 
 RMX_NL_FINE should take values in the range 6:25
@@ -144,17 +167,47 @@ RMX_NL_FINE=12
 RMX_NL_COARSE=6

 * Results
-
-1 AMPF file will be created for each fine-region sector
-1 AMPC file will be created for each coarse-region sector
-
+For the atomic case:
+1 AMPF output file will be generated for each fine-region sector
+1 AMPC output file will be generated for each coarse-region sector
 All output AMPF files will be the same size and all output AMPC files will be the same size (bytes).

+For the molecular case:
+1 AMPF output file will be generated for each MPI task
+1 AMPC output file will be generated for each MPI task
+
 The Hamiltonian matrix dimension will be output along 
-with the Wallclock time it takes to do each individual DSYEVD call.
+with the Wallclock time it takes to do each individual DSYEVD (eigensolver) call.

 Performance is measured in Wallclock time and is displayed 
-on the screen or output log at the end of the run. 
+on the screen or output log at the end of the run.
+
+** Validation of Results
+
+For the atomic dataset runs, run the atomic problem configuration supplied in the 'example_job_scripts' directory .
+From the results directory issue the command:
+
+awk '/Sector 16/ && /eigenvalues/' <stdout.filename>
+
+replacing <stdout.filename> with the stdout file produced by the run.
+
+The output should match the values below.

+Mesh 1, Sector 16: first five eigenvalues =     -4329.7161    -4170.9100    -4157.3112    -4100.9751    -4082.1108
+ Mesh 1, Sector 16: final five eigenvalues =      4100.9751     4157.3114     4170.9125     4329.7178     4370.5405
+ Mesh 2, Sector 16: first five eigenvalues =      -313.6307     -301.0096     -298.8824     -293.3929     -290.6190
+ Mesh 2, Sector 16: final five eigenvalues =       290.6190      293.3929      298.8824      301.0102      313.6307

----------------------------------------------------------------------------
\ No newline at end of file
+For the molecular dataset runs, run the molecular problem configuration supplied in 'example_job_scripts' directory.
+From the results directory issue the command:
+
+awk '/Sector 64/ && /eigenvalues/' <stdout.filename>
+
+replacing <stdout.filename> with the stdout file produced by the run.
+
+The output should match the values below.
+
+Mesh 1, Sector 64: first five eigenvalues =     -3850.8443    -3593.9843    -3483.8338    -3466.7307    -3465.7194
+ Mesh 1, Sector 64: final five eigenvalues =      3465.7194     3466.7307     3483.8338     3593.9855     3850.8443
+
+----------------------------------------------------------------------------
--- a/pfarm/README.md
+++ b/pfarm/README.md
@@ -7,10 +7,12 @@ PFARM is part of a suite of programs based on the ‘R-matrix’ ab-initio appro
 In this README we give information relevant for its use in the UEABS.

 ### Standard CPU version
-The PFARM outer-region application code EXDIG is domi-nated by the assembly of sector Hamiltonian matrices and their subsequent eigensolutions. The code is written in Fortran 2003 (or Fortran 2003-compliant Fortran 95), is parallelised using MPI and OpenMP and is designed to take advantage of highly optimised, numerical library routines. Hybrid MPI / OpenMP parallelisation has also been introduced into the code via shared memory enabled numerical library kernels. 
+The PFARM outer-region application code EXDIG is dominated by the assembly of sector Hamiltonian matrices and their subsequent eigensolutions. The code is written in Fortran 2003 (or Fortran 2003-compliant Fortran 95), is parallelised using MPI and OpenMP and is designed to take advantage of highly optimised, numerical library routines. Hybrid MPI / OpenMP parallelisation has also been introduced into the code via shared memory enabled numerical library kernels. 

 ### GPU version
-Accelerator-based implementations have been implemented for EXDIG, using off-loading (MKL or CuBLAS/CuSolver) for the standard (dense) eigensolver calculations that dominate overall run-time.
+Accelerator-based GPU versions of the code using the MAGMA library for eigensolver calculations.

+### Configure, Build and Run Instructions
+See PFARM_Build_Run_README.txt


--- a/pfarm/README_ACC.md
+++ b/pfarm/README_ACC.md
@@ -2,143 +2,21 @@ README file for PRACE Accelerator Benchmark Code PFARM (stage EXDIG, program RMX
 ===================================================================================
 Author: Andrew Sunderland (a.g.sunderland@stfc.ac.uk).

-The [code download](https://www.dropbox.com/sh/dlcpzr934r0wazy/AABlphkgEn9tgRlwHY2k3lqBa?dl=0
-) should contain the following directories:
+# PFARM in the United European Applications Benchmark Suite (UEABS)
+## Document Author: Andrew Sunderland (andrew.sunderland@stfc.ac.uk) , STFC, UK.


-```
-benchmark/RMX_HOST: RMX source files for running on Host or KNL (using LAPACK or MKL)
-benchmark/RMX_MAGMA_GPU: RMX source for running on GPUs using MAGMA
-benchmark/lib: 
-benchmark/run: run directory with input files
-benchmark/xdr: XDR library src files
-```
+## Introduction
+PFARM is part of a suite of programs based on the ‘R-matrix’ ab-initio approach to the vari-tional solution of the many-electron Schrödinger equation for electron-atom and electron-ion scattering. The package has been used to calculate electron collision data for astrophysical applications (such as: the interstellar medium, planetary atmospheres) with, for example, var-ious ions of Fe and Ni and neutral O, plus other applications such as data for plasma model-ling and fusion reactor impurities. The code has recently been adapted to form a compatible interface with the UKRmol suite of codes for electron (positron) molecule collisions thus ena-bling large-scale parallel ‘outer-region’ calculations for molecular systems as well as atomic systems. 
+In this README we give information relevant for its use in the UEABS.

-The code uses the eXternal Data Representation library (XDR) for cross-platform
-compatibility of unformatted data files. The XDR source files are provided with this code bundle.
-It can be obtained from various sources, including
-http://people.redhat.com/rjones/portablexdr/
+### Standard CPU version
+The PFARM outer-region application code EXDIG is dominated by the assembly of sector Hamiltonian matrices and their subsequent eigensolutions. The code is written in Fortran 2003 (or Fortran 2003-compliant Fortran 95), is parallelised using MPI and OpenMP and is designed to take advantage of highly optimised, numerical library routines. Hybrid MPI / OpenMP parallelisation has also been introduced into the code via shared memory enabled numerical library kernels. 

+### GPU version
+Accelerator-based GPU versions of the code using the MAGMA library for eigensolver calculations.

-Compilation
-***********
-Installing MAGMA (GPU Only)
---------------------------
-Download MAGMA (current version magma-2.2.0)  from http://icl.utk.edu/magma/
-Install MAGMA : Modify the make.inc file to indicate your C/C++
-compiler, Fortran compiler, and where CUDA, CPU BLAS, and 
-LAPACK are installed on your system. Refer to MAGMA documentation for further details
+### Configure, Build and Run Instructions
+See PFARM_Build_Run_README.txt

-Install XDR
-----------
-build XDR library: 
-update DEFS file for your compiler and environment
-
-```shell
-$> make
-```
-
-Install RMX_HOST
----------------
-Update DEFS file for your setup, ensuring you are linking to a LAPACK or MKL library.
-This is usually facilitated by e.g. compiling with `-mkl=parallel` (Intel compiler) or loading the appropriate library modules. 
-
-```shell
-$> cd RMX_HOST
-$> make
-```
-
-Install RMX_MAGMA_GPU 
---------------------
-Update DEFS file for your setup:
- - Set MAGMADIR, CUDADIR and OPENBLASDIR environment variables
- - Updating the fortran compiler and flags in DEFS file.
-
-```shell
-$> cd RMX_MAGMA_GPU
-$> make
-```
-
-
-Run instructions
-****************
-
-Run RMX
-------
-
-The RMX application can be run by running the executable `rmx95`
-For the FEIII dataset, the program requires the following input files to reside in the same directory as the executable:
-
-```
-phzin.ctl
-XJTARMOM
-HXJ030
-```
-
-These files are located in `benchmark/run`
-A guide to each of the variables in the namelist in phzin.ctl can be found at:
-https://hpcforge.org/plugins/mediawiki/wiki/pfarm/images/9/99/Phz_rep.pdf
-However, it is recommended that these inputs are not changed for the benchmark code and
-problem size, runtime etc, are controlled via the environment variables listed below.
-
-A typical PBS script to run the RMX_HOST benchmark on 4 KNL nodes (4 MPI tasks with 64 threads per MPI task) is listed below:
-Settings will vary according to your local environment.
-
-```shell
-#PBS -N rmx95_4x64
-#PBS -l select=4
-#PBS -l walltime=01:00:00
-#PBS -A my_account_id
-
-cd $PBS_O_WORKDIR
-export OMP_NUM_THREADS=64
-
-aprun -N 1 -n 4 -d $OMP_NUM_THREADS ./rmx95
-```
-
-Run-time environment variable settings
--------------------------------------
-The following environmental variables that e.g. can be set inside the script allow the H sector matrix 
-to easily change dimensions and also allows the number of sectors to change when undertaking benchmarks.
-These can be adapted by the user to suit benchmark load requirements e.g. short vs long runs.
-Each MPI Task will pickup a sector calculation which will then be distributed amongst available threads per node (for CPU and KNL) or offloaded (for GPU).
-The distribution among MPI tasks is simple round-robin.
- 
- - `RMX_NGPU` : refers to the number of shared GPUs per node (only for RMX_MAGMA_GPU)
- - `RMX_NSECT_FINE` : sets the number of sectors for the Fine region. 
- - `RMX_NSECT_COARSE` : sets the number of sectors for the Coarse region. 
- - `RMX_NL_FINE` : sets the number of basis functions for the Fine region sector calculations. 
- - `RMX_NL_COARSE` : sets the number of basis functions for the Coarse region sector calculations. 
-
-**Notes**:
-For a representative setup for the benchmark datasets:
-
- - `RMX_NL_FINE`  can take values in the range 6:25
- - `RMX_NL_COARSE`  can take values in the range 5:10 
- - For accuracy reasons, `RMX_NL_FINE` should always be great than `RMX_NL_COARSE`. 
- - The following value pairs for `RMX_NL_FINE` and `RMX_NL_COARSE` provide representative calculations:
-
-```
-12,6
-14,8
-16,10
-18,10
-20,10
-25,10
-```
-
-If `RMX_NSECT` and `RMX_NL` variables are not set, the benchmark code defaults to:
-
-```
-RMX_NSECT_FINE=5
-RMX_NSECT_COARSE=20
-RMX_NL_FINE=12
-RMX_NL_COARSE=6
-```
-
-The Hamiltonian matrix dimension will be output along 
-with the Wallclock time it takes to do each individual DSYEVD call.
-
-Performance is measured in Wallclock time and is displayed 
-on the screen or output log at the end of the run. 

--- a/pfarm/RMX_MAGMA_GPU_mol.tar.gz
+++ b/pfarm/RMX_MAGMA_GPU_mol.tar.gz
--- a/tensorflow/testcase_medium/.gitkeep
+++ b/tensorflow/testcase_medium/.gitkeep
--- a/pfarm/cpu/example_job_scripts/exdig_mpi_omp_atom_4x16.pbs
+++ b/pfarm/cpu/example_job_scripts/exdig_mpi_omp_atom_4x16.pbs
+#!/bin/bash --login
+
+#PBS -N exd_cpu_at_4x16
+#PBS -l select=4
+#PBS -l walltime=01:00:00
+#PBS -A c01-am
+
+# This shifts to the directory that you submitted the job from
+cd $PBS_O_WORKDIR
+
+# Set number of threads (used in mkl calls)
+export OMP_NUM_THREADS=16
+
+# Environment settings for exdig_atom 
+export RMX_NSECT_FINE=16
+export RMX_NSECT_COARSE=16
+export RMX_NL_FINE=22
+export RMX_NL_COARSE=10
+
+# for other environments replace aprun with mpirun etc ...
+aprun -N 1 -n 4 -d 16 ./exdig_mpi_omp_atom
+
+
+
+
+
--- a/pfarm/cpu/example_job_scripts/exdig_mpi_omp_atom_4x16.slurm
+++ b/pfarm/cpu/example_job_scripts/exdig_mpi_omp_atom_4x16.slurm
+#!/bin/bash --login
+# Slurm job options (job-name, compute nodes, job time)
+#SBATCH --job-name=exd_cpu_at_4x16
+#SBATCH --time=0:20:0
+#SBATCH --nodes=4
+#SBATCH --tasks-per-node=1
+#SBATCH --cpus-per-task=16
+
+# Replace [budget code] below with your project code (e.g. t01)
+#SBATCH --account=c01-hec
+#SBATCH --partition=standard
+#SBATCH --qos=standard
+
+# Set the number of threads to 16 and specify placement
+#   There are 16 OpenMP threads per MPI process
+#   We want one thread per physical core
+export OMP_NUM_THREADS=16
+export OMP_PLACES=cores
+
+# Environment settings for exdig_atom 
+export RMX_NSECT_FINE=16
+export RMX_NSECT_COARSE=16
+export RMX_NL_FINE=22
+export RMX_NL_COARSE=10
+
+# Launch the parallel job
+#   Using 4 MPI processes
+#   1 MPI processes per node
+#   16 OpenMP threads per MPI process
+#   Additional srun options to pin one thread per physical core
+srun --hint=nomultithread --distribution=block:block ./exdig_mpi_omp_atom
+
+
+
+
+
+
--- a/pfarm/cpu/example_job_scripts/exdig_mpi_omp_mol_4x16.pbs
+++ b/pfarm/cpu/example_job_scripts/exdig_mpi_omp_mol_4x16.pbs
+#!/bin/bash --login
+
+#PBS -N exd_cpu_mo_4x16
+#PBS -l select=4
+#PBS -l walltime=00:20:00
+#PBS -A c01-am
+
+# This shifts to the directory that you submitted the job from
+cd $PBS_O_WORKDIR
+
+# Set number of threads (used in mkl calls)
+export OMP_NUM_THREADS=16
+
+# Environment settings for exdig_atom (NSECT_FINE=NSECT_COARSE for the molecular case) 
+export RMX_NSECT_FINE=64
+export RMX_NSECT_COARSE=64
+export RMX_NL_FINE=10
+export RMX_NL_COARSE=10
+
+# for other parallel run environments replace aprun with mpirun etc ...
+aprun -N 1 -n 4 -d 16 ./exdig_mpi_omp_mol
+
+
+
+
--- a/pfarm/cpu/example_job_scripts/exdig_mpi_omp_mol_4x16.slurm
+++ b/pfarm/cpu/example_job_scripts/exdig_mpi_omp_mol_4x16.slurm
+#!/bin/bash --login
+# Slurm job options (job-name, compute nodes, job time)
+#SBATCH --job-name=exd_cpu_mo_4x16
+#SBATCH --time=0:20:0
+#SBATCH --nodes=4
+#SBATCH --tasks-per-node=1
+#SBATCH --cpus-per-task=16
+
+# Replace [budget code] below with your project code (e.g. t01)
+#SBATCH --account=c01-hec
+#SBATCH --partition=standard
+#SBATCH --qos=standard
+
+# Set the number of threads to 16 and specify placement
+#   There are 16 OpenMP threads per MPI process
+#   We want one thread per physical core
+export OMP_NUM_THREADS=16
+export OMP_PLACES=cores
+
+# Environment settings for exdig_atom 
+export RMX_NSECT_FINE=64
+export RMX_NSECT_COARSE=64
+export RMX_NL_FINE=10
+export RMX_NL_COARSE=10
+
+# Launch the parallel job
+#   Using 4 MPI processes
+#   1 MPI processes per node
+#   16 OpenMP threads per MPI process
+#   Additional srun options to pin one thread per physical core
+srun --hint=nomultithread --distribution=block:block ./exdig_mpi_omp_atom
+
+
+
+
+
+
--- a/pfarm/cpu/setup_run_cpu_atom.scr
+++ b/pfarm/cpu/setup_run_cpu_atom.scr
+#!/bin/bash
+
+# Use this script to set up new	PFARM EXDIG CPU-based runs for the atomic test case (1)
+# Usage: ./setup_run_cpu_atom.scr <new_run_dir>
+
+mkdir $1
+cd $1
+ln -s ../../data/test_case_1_atom/HXJ030 .
+ln -s ../../data/test_case_1_atom/XJTARMOM .
+cp ../../data/test_case_1_atom/phzin.ctl .
+ln -s ../bin/exdig_mpi_omp_atom .
+cp ../example_job_scripts/exdig_mpi_omp_atom_4x16.pbs .
+cp ../example_job_scripts/exdig_mpi_omp_atom_4x16.slurm .
+cd ..
+
+
--- a/pfarm/cpu/setup_run_cpu_mol.scr
+++ b/pfarm/cpu/setup_run_cpu_mol.scr
+#!/bin/bash
+
+# Use this script to set up new PFARM EXDIG CPU-based runs for the molecular test case (2)
+# Usage: ./setup_run_cpu_mol.scr <new_run_dir>
+
+mkdir $1
+cd $1
+ln -s ../../data/test_case_2_mol/H .
+cp ../../data/test_case_2_mol/phzin.ctl .
+ln -s ../bin/exdig_mpi_omp_mol .
+cp ../example_job_scripts/exdig_mpi_omp_mol_4x16.pbs .
+cp ../example_job_scripts/exdig_mpi_omp_mol_4x16.slurm .
+cd ..
+