GADGET-4
healthtest.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <math.h>
15 #include <mpi.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "../data/allvars.h"
21 #include "../data/dtypes.h"
22 #include "../data/mymalloc.h"
23 #include "../logs/logs.h"
24 #include "../main/simulation.h"
25 #include "../mpi_utils/mpi_utils.h"
26 #include "../system/system.h"
27 
28 #define TEST_PACKET_SIZE_IN_MB 5
29 #define WORK_LOOP_COUNTER 50000000
30 #define WORK_NUMBER_OF_IPROBE_TESTS 1000000
31 
32 #ifndef MAX_VARIATION_TOLERANCE
33 #define MAX_VARIATION_TOLERANCE 0.5
34 #endif
35 
36 void sim::healthtest(void)
37 {
38  mpi_printf("\n");
39 
40  measure_cpu_performance(Communicator);
41 
42  // Let's take a look at the communication speed in a global all-to-all data exchange realized through pairwise exchanges along a
43  // hypercube
44  if(NTask > 1)
45  measure_hyper_cube_speed("Full hypercube:", Communicator);
46 
47  // Let's take a look at inter-node communication speed
48  if(NumNodes > 1)
49  {
50  int CommSplitColor;
51 
52  if(RankInThisNode == 0)
53  CommSplitColor = 0;
54  else
55  CommSplitColor = 1;
56 
57  MPI_Comm comm;
58  MPI_Comm_split(Communicator, CommSplitColor, ThisTask, &comm);
59 
60  if(RankInThisNode == 0)
61  measure_hyper_cube_speed("Internode cube:", comm);
62 
63  MPI_Comm_free(&comm);
64  }
65 
66  // Now look at intra-node communication speed
67  if(NumNodes < NTask)
68  {
69  int CommSplitColor = ThisNode;
70  MPI_Comm comm;
71  MPI_Comm_split(Communicator, CommSplitColor, ThisTask, &comm);
72 
73  measure_hyper_cube_speed("Intranode cube, 1st node:", comm);
74 
75  MPI_Comm_free(&comm);
76  }
77 
78  measure_iprobe_performance("Iprobe for any message:");
79 
80  mpi_printf("\n");
81 }
82 
83 double sim::measure_cpu_performance(MPI_Comm Communicator)
84 {
85  int loc_ntask, loc_thistask, loc_ptask;
86 
87  double ta = Logs.second();
88 
89  MPI_Comm_rank(Communicator, &loc_thistask);
90  MPI_Comm_size(Communicator, &loc_ntask);
91 
92  for(loc_ptask = 0; loc_ntask > (1 << loc_ptask); loc_ptask++)
93  ;
94 
95  double sum = 0;
96 
97  MPI_Barrier(Communicator);
98 
99  double t0 = Logs.second();
100 
101  // do some computationally intense (but useless) work for a while
102  for(int i = 0; i < WORK_LOOP_COUNTER; i++)
103  sum += sin((i + 0.1) / WORK_LOOP_COUNTER) / (2.0 + cos(i - 0.1) / WORK_LOOP_COUNTER);
104 
105  double t1 = Logs.second();
106 
107  double tperf = Logs.timediff(t0, t1), tperfsum;
108 
109  MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
110  double tavg = tperfsum / loc_ntask;
111 
112  struct
113  {
114  double t;
115  int rank;
116  } local = {tperf, ThisTask}, localnode = {tperf, ThisNode}, min_time, max_time, min_timenode, max_timenode;
117 
118  MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
119  MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
120 
121  MPI_Allreduce(&localnode, &min_timenode, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
122  MPI_Allreduce(&localnode, &max_timenode, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
123 
124  double variation = (max_time.t - min_time.t) / tavg;
125 
126  double tb = Logs.second();
127 
128  mpi_printf(
129  "HEALTHTEST: %25s %8.3f sec %7.3f%% variation | Best=%g on Task=%d/Node=%d, Worst=%g on Task=%d/Node=%d, test "
130  "took %g sec\n",
131  "CPU performance:", tavg, 100.0 * variation, min_time.t, min_time.rank, min_timenode.rank, max_time.t, max_time.rank,
132  max_timenode.rank, Logs.timediff(ta, tb));
133 
134  if(variation >= MAX_VARIATION_TOLERANCE)
135  {
136  char name_maxnode[MPI_MAX_PROCESSOR_NAME];
137  int len;
138  if(ThisTask == max_time.rank)
139  MPI_Get_processor_name(name_maxnode, &len);
140 
141  MPI_Bcast(name_maxnode, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, max_time.rank, Communicator);
142 
143  char buf[1000 + MPI_MAX_PROCESSOR_NAME];
144  sprintf(buf, "processes_%s.txt", name_maxnode);
145 
146  mpi_printf("HEALTHTEST: We are dumping a process list to the file '%s'\n", buf);
147 
148  if(ThisTask == max_time.rank)
149  {
150  char cmd[10000 + MPI_MAX_PROCESSOR_NAME];
151  sprintf(cmd, "ps -ef >& %s", buf);
152  system(cmd);
153  }
154 
155  MPI_Barrier(Communicator);
156 
157  // only issue a warning for now instead of terminating the code
158  warn(
159  "\n\nHEALTHTEST: We issue a warning because the performance variation=%g of the CPUs lies above the prescribed tolerance "
160  "MAX_VARIATION_TOLERANCE=%g, possibly indicating a machine problem. (sum=%g)\n",
161  variation, MAX_VARIATION_TOLERANCE, sum);
162  }
163 
164  return sum;
165 }
166 
167 double sim::measure_hyper_cube_speed(const char *tag, MPI_Comm Communicator)
168 {
169  double ta = Logs.second();
170 
171  int loc_ntask, loc_thistask, loc_ptask;
172 
173  MPI_Comm_rank(Communicator, &loc_thistask);
174  MPI_Comm_size(Communicator, &loc_ntask);
175 
176  for(loc_ptask = 0; loc_ntask > (1 << loc_ptask); loc_ptask++)
177  ;
178 
179  int bytecount = (TEST_PACKET_SIZE_IN_MB * 1024L * 1024L) / loc_ntask;
180 
181  double tall = 0;
182  int count = 0;
183 
184  char *sendbuf = (char *)Mem.mymalloc_clear("send", bytecount * sizeof(char));
185  char *recvbuf = (char *)Mem.mymalloc_clear("recv", bytecount * sizeof(char));
186 
187  /* exchange the test data */
188  for(int ngrp = 1; ngrp < (1 << loc_ptask); ngrp++)
189  {
190  int recvTask = loc_thistask ^ ngrp;
191 
192  MPI_Barrier(Communicator);
193 
194  if(recvTask < loc_ntask)
195  {
196  double t0 = Logs.second();
197  MPI_Sendrecv(sendbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, recvbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A,
198  Communicator, MPI_STATUS_IGNORE);
199  double t1 = Logs.second();
200 
201  tall += Logs.timediff(t0, t1);
202  count++;
203  }
204  }
205 
206  Mem.myfree(recvbuf);
207  Mem.myfree(sendbuf);
208 
209  double tperf = 0.5 * tall / count, tperfsum;
210 
211  MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
212  double tavg = tperfsum / loc_ntask;
213 
214  struct
215  {
216  double t;
217  int rank;
218  } local = {tperf, ThisTask}, localnode = {tperf, ThisNode}, min_time, max_time, min_timenode, max_timenode;
219 
220  MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
221  MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
222 
223  MPI_Allreduce(&localnode, &min_timenode, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
224  MPI_Allreduce(&localnode, &max_timenode, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
225 
226  double tb = Logs.second();
227 
228  double variation = (bytecount / min_time.t - bytecount / max_time.t) / (bytecount / tavg);
229 
230  mpi_printf(
231  "HEALTHTEST: %25s %8.1f MB/s per pair %7.3f%% variation | Best=%g on Task=%d/Node=%d, Worst=%g on Task=%d/Node=%d, test "
232  "took %g sec\n",
233  tag, bytecount / tavg * TO_MBYTE_FAC, 100.0 * variation, bytecount / min_time.t * TO_MBYTE_FAC, min_time.rank, min_timenode.rank,
234  bytecount / max_time.t * TO_MBYTE_FAC, max_time.rank, max_timenode.rank, Logs.timediff(ta, tb));
235 
236  if(variation > MAX_VARIATION_TOLERANCE && ThisTask == 0)
237  warn(
238  "\nThe performance variation=%g of the communication speed lies above the prescribed tolerance MAX_VARIATION_TOLERANCE=%g, "
239  "possibly indicating a machine problem.\n",
240  variation, MAX_VARIATION_TOLERANCE);
241 
242  return tavg;
243 }
244 
245 void sim::measure_iprobe_performance(const char *tag)
246 {
247  double ta = Logs.second();
248 
249  for(int i = 0; i < WORK_NUMBER_OF_IPROBE_TESTS; i++)
250  {
251  int flag;
252  MPI_Status status;
253 
254  MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, Communicator, &flag, &status);
255  }
256 
257  double tb = Logs.second();
258 
259  double tperf = Logs.timediff(ta, tb) / WORK_NUMBER_OF_IPROBE_TESTS;
260 
261  struct
262  {
263  double t;
264  int rank;
265  } local = {tperf, ThisTask}, min_time, max_time;
266 
267  MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
268  MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
269 
270  double tperfsum;
271  MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
272  double tavg = tperfsum / NTask;
273 
274  char name_minnode[MPI_MAX_PROCESSOR_NAME];
275  char name_maxnode[MPI_MAX_PROCESSOR_NAME];
276 
277  int len;
278  if(ThisTask == min_time.rank)
279  MPI_Get_processor_name(name_minnode, &len);
280  if(ThisTask == max_time.rank)
281  MPI_Get_processor_name(name_maxnode, &len);
282 
283  MPI_Bcast(name_minnode, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, min_time.rank, Communicator);
284  MPI_Bcast(name_maxnode, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, max_time.rank, Communicator);
285 
286  double variation = (max_time.t - min_time.t) / tavg;
287 
288  mpi_printf(
289  "HEALTHTEST: %25s %g s per MPI_Ip%7.3f%% variation | Best=%g on Task=%d/Node=%s, Worst=%g on Task=%d/Node=%s, test took %g "
290  "sec\n",
291  tag, tavg, 100.0 * variation, min_time.t, min_time.rank, name_minnode, max_time.t, max_time.rank, name_maxnode,
292  Logs.timediff(ta, tb));
293 }
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
int ThisNode
Definition: setcomm.h:36
void mpi_printf(const char *fmt,...)
Definition: setcomm.h:55
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
int RankInThisNode
Definition: setcomm.h:39
int NumNodes
Definition: setcomm.h:37
MPI_Comm Communicator
Definition: setcomm.h:31
void healthtest(void)
Definition: healthtest.cc:36
#define TO_MBYTE_FAC
Definition: constants.h:59
#define TEST_PACKET_SIZE_IN_MB
Definition: healthtest.cc:28
#define WORK_NUMBER_OF_IPROBE_TESTS
Definition: healthtest.cc:30
#define MAX_VARIATION_TOLERANCE
Definition: healthtest.cc:33
#define WORK_LOOP_COUNTER
Definition: healthtest.cc:29
logs Logs
Definition: main.cc:43
#define warn(...)
Definition: macros.h:34
#define TAG_DENS_A
Definition: mpi_utils.h:50
memory Mem
Definition: main.cc:44
expr cos(half arg)
Definition: half.hpp:2823
expr sin(half arg)
Definition: half.hpp:2816