GADGET-4
pinning.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <gsl/gsl_rng.h>
15 #include <math.h>
16 #include <mpi.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include "../system/pinning.h"
22 
23 #define MAX_CORES 4096
24 
26 {
27 #ifdef IMPOSE_PINNING
28  cpuset = hwloc_bitmap_alloc();
29  hwloc_get_proc_cpubind(topology, getpid(), cpuset, 0);
30 #endif
31 }
32 
34 {
35 #ifdef IMPOSE_PINNING
36  /* Allocate and initialize topology object. */
37  hwloc_topology_init(&topology);
38 
39  /* Perform the topology detection. */
40  hwloc_topology_load(topology);
41 
42  /* Get some additional topology information
43  in case we need the topology depth later. */
44  topodepth = hwloc_topology_get_depth(topology);
45 
46  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET);
47 
48  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
49  sockets = -1;
50  else
51  sockets = hwloc_get_nbobjs_by_depth(topology, depth);
52 
53  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_CORE);
54 
55  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
56  cores = -1;
57  else
58  cores = hwloc_get_nbobjs_by_depth(topology, depth);
59 
60  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
61 
62  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
63  pus = -1;
64  else
65  pus = hwloc_get_nbobjs_by_depth(topology, depth);
66 #endif
67 }
68 
70 {
71 #ifdef IMPOSE_PINNING
72  sc->mpi_printf("PINNING: We have %d sockets, %d physical cores and %d logical cores on the first MPI-task's node.\n", sockets, cores,
73  pus);
74  if(cores <= 0 || sockets <= 0 || pus <= 0)
75  {
76  sc->mpi_printf("PINNING: The topology cannot be recognized. We refrain from any pinning attempt.\n");
77  flag_pinning_error = 1;
78  return;
79  }
80 
81  hyperthreads_per_core = pus / cores;
82 
83  if(hyperthreads_per_core < 1)
84  Terminate("Need at least one logical thread per physical core\n");
85 
86  if(pus > cores)
87  sc->mpi_printf("PINNING: Looks like %d hyperthreads per physical core are in principle possible.\n", hyperthreads_per_core);
88 
89  cpuset_after_MPI_init = hwloc_bitmap_alloc();
90  hwloc_get_proc_cpubind(topology, getpid(), cpuset_after_MPI_init, 0);
91 
92  if(!hwloc_bitmap_isequal(cpuset, cpuset_after_MPI_init))
93  sc->mpi_printf("PINNING: Apparently, the MPI library set some pinning itself. We'll override this.\n");
94 
95  int available_pus = 0;
96 
97  for(int id = hwloc_bitmap_first(cpuset); id != -1; id = hwloc_bitmap_next(cpuset, id))
98  available_pus++;
99 
100  sc->mpi_printf("PINNING: Looks like %d logical cores are available.\n", available_pus);
101 
102  if(available_pus == pus)
103  {
104  sc->mpi_printf("PINNING: Looks like all available logical cores are at our disposal.\n");
105  }
106  else
107  {
108  if(available_pus >= 1)
109  {
110  sc->mpi_printf("PINNING: Looks like already before start of the code, a tight binding was imposed.\n");
111 #ifdef IMPOSE_PINNING_OVERRIDE_MODE
112  for(int id = 0; id < pus; id++)
113  hwloc_bitmap_set(cpuset, id);
114  available_pus = pus;
115  sc->mpi_printf("PINNING: We are overriding this and make all %d available to us.\n", available_pus);
116 #else
117  sc->mpi_printf(
118  "PINNING: We refrain from any pinning attempt ourselves. (This can be changed by setting the compile flag "
119  "IMPOSE_PINNING_OVERRIDE_MODE.)\n");
120  flag_pinning_error = 1;
121  return;
122 #endif
123  }
124  }
125 
126  char buf[MAX_CORES + 1];
127 
128  for(int i = 0; i < pus && i < MAX_CORES; i++)
129  if(hwloc_bitmap_isset(cpuset, i))
130  buf[i] = '1';
131  else
132  buf[i] = '-';
133  buf[pus] = 0;
134 
135  sc->mpi_printf("PINNING: Available logical cores on first node: %s\n", buf);
136 
137  int pus_per_task = available_pus / sc->TasksInThisNode;
138 
139  sc->mpi_printf("PINNING: %d logical cores are available per MPI Task.\n", pus_per_task);
140 
141  if(pus_per_task <= 0)
142  Terminate("Need at least one logical core per MPI task for pinning to make sense.\n");
143 
144  /* go through all logical cores in sequence of proximity */
145  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
146  int cores_before = 0;
147  int cid;
148 
149  for(cid = 0; cores_before < sc->RankInThisNode * pus_per_task && cid < pus; cid++)
150  {
151  hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, cid);
152 
153  hwloc_cpuset_t cpuset_core = hwloc_bitmap_dup(obj->cpuset);
154  if(hwloc_bitmap_isincluded(cpuset_core, cpuset))
155  {
156  cores_before++;
157  }
158  hwloc_bitmap_free(cpuset_core);
159  }
160 
161  /* cid should now be the logical index of the first PU for this MPI task */
162 
163  hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, cid);
164  hwloc_cpuset_t current_cpu = hwloc_bitmap_dup(obj->cpuset);
165 
166  hwloc_set_proc_cpubind(topology, getpid(), current_cpu, HWLOC_CPUBIND_PROCESS);
167 #endif
168 }
169 
171 {
172 #ifdef IMPOSE_PINNING
173  if(flag_pinning_error)
174  return;
175 
176  hwloc_get_cpubind(topology, cpuset, 0);
177 
178  char buf[MAX_CORES + 1];
179 
180  for(int i = 0; i < pus && i < MAX_CORES; i++)
181  if(hwloc_bitmap_isset(cpuset, i))
182  buf[i] = '1';
183  else
184  buf[i] = '-';
185  buf[pus] = 0;
186 
187  for(int i = 0; i < sc->NTask; i++)
188  {
189  if(sc->ThisTask == i && sc->ThisNode == 0)
190  printf("PINNING: Node=%4d: Task=%04d: %s\n", sc->ThisNode, sc->ThisTask, buf);
191  fflush(stdout);
192  MPI_Barrier(sc->Communicator);
193  }
194 #endif
195 }
void report_pinning(setcomm *sc)
Definition: pinning.cc:170
void get_core_set(void)
Definition: pinning.cc:25
void detect_topology(void)
Definition: pinning.cc:33
void pin_to_core_set(setcomm *sc)
Definition: pinning.cc:69
int ThisNode
Definition: setcomm.h:36
void mpi_printf(const char *fmt,...)
Definition: setcomm.h:55
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
int RankInThisNode
Definition: setcomm.h:39
MPI_Comm Communicator
Definition: setcomm.h:31
int TasksInThisNode
Definition: setcomm.h:38
#define Terminate(...)
Definition: macros.h:19
#define MAX_CORES
Definition: pinning.cc:23