GADGET-4
domain.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
25 #include "gadgetconfig.h"
26 
27 #include <mpi.h>
28 #include <cmath>
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cstring>
32 
33 #include "../data/allvars.h"
34 #include "../data/dtypes.h"
35 #include "../data/mymalloc.h"
36 #include "../domain/domain.h"
37 #include "../logs/logs.h"
38 #include "../logs/timer.h"
39 #include "../main/simulation.h"
40 #include "../sort/peano.h"
41 #include "../system/system.h"
42 #include "../time_integration/timestep.h"
43 
50 template <typename partset>
52 {
53  Mode = mode;
54 
55  TIMER_START(CPU_DOMAIN);
56 
57  double t0 = Logs.second();
58 
59  domain_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep);
60 
61  /* map the particles by a shift vector back if desired */
62  do_box_wrapping();
63 
64  /* determine which time bins need to be balanced */
65  domain_init_sum_cost();
66 
67  /* find total cost factors, including a determination of MultipleDomains */
68  domain_find_total_cost();
69 
70  /* allocate some fields that need to stay */
71  domain_allocate();
72 
73  /* allocate some arrays we are going to use */
74  domain_key = (peanokey *)Mem.mymalloc_movable(&domain_key, "domain_key", (sizeof(peanokey) * Tp->MaxPart));
75  domain_leaf_cost =
76  (domain_cost_data *)Mem.mymalloc_movable(&domain_leaf_cost, "domain_leaf_cost", (MaxTopNodes * sizeof(domain_cost_data)));
77 
78  topNodes = (local_topnode_data *)Mem.mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(local_topnode_data)));
79 
80  /* determine top-level tree */
81  domain_determineTopTree();
82 
83  /* combine on each MPI task several of the domains (namely the number MultipleDomains) */
84  domain_combine_multipledomains();
85 
86  Mem.myfree(topNodes);
87  Mem.myfree(domain_leaf_cost);
88 
89  /* move stars out of gas block if present */
90  domain_rearrange_particle_sequence();
91 
92  /* finally, carry out the actual particle exchange */
93  if(Mode == STANDARD)
94  domain_exchange();
95  else if(Mode == COLL_SUBFIND)
96  domain_coll_subfind_prepare_exchange();
97 
98  double t1 = Logs.second();
99 
100  domain_printf("DOMAIN: domain decomposition done. (took in total %g sec)\n", Logs.timediff(t0, t1));
101 
102  if(Mode == STANDARD)
103  {
104  TIMER_STOPSTART(CPU_DOMAIN, CPU_PEANO);
105 
106  peano_hilbert_order(domain_key);
107 
108  TIMER_STOPSTART(CPU_PEANO, CPU_DOMAIN);
109  }
110 
111  Mem.myfree(domain_key);
112 
113  Mem.myfree(ListOfTopleaves);
114 
115  TaskOfLeaf = (int *)Mem.myrealloc_movable(TaskOfLeaf, NTopleaves * sizeof(int));
116  TopNodes = (topnode_data *)Mem.myrealloc_movable(TopNodes, NTopnodes * sizeof(topnode_data));
117 
118  ListOfTopleaves = (int *)Mem.mymalloc_movable(&ListOfTopleaves, "ListOfTopleaves", (NTopleaves * sizeof(int)));
119 
120  memset(NumTopleafOfTask, 0, NTask * sizeof(int));
121 
122  for(int i = 0; i < NTopleaves; i++)
123  NumTopleafOfTask[TaskOfLeaf[i]]++;
124 
125  FirstTopleafOfTask[0] = 0;
126  for(int i = 1; i < NTask; i++)
127  FirstTopleafOfTask[i] = FirstTopleafOfTask[i - 1] + NumTopleafOfTask[i - 1];
128 
129  memset(NumTopleafOfTask, 0, NTask * sizeof(int));
130 
131  for(int i = 0; i < NTopleaves; i++)
132  {
133  int task = TaskOfLeaf[i];
134  int off = FirstTopleafOfTask[task] + NumTopleafOfTask[task]++;
135  ListOfTopleaves[off] = i;
136  }
137 
138  if(Mode == STANDARD)
139  {
140  /* the following will reconstruct the timebins and report the balance
141  * in case we are dealing with simparticles, for lcparticles nothing will be done
142  */
143  domain_report_balance();
144  }
145 
146 #ifdef DOMAIN_SPECIAL_CHECK
147  if(ThisTask == 0 && All.NumCurrentTiStep == 4)
148  Terminate("stop");
149 #endif
150 
151  TIMER_STOP(CPU_DOMAIN);
152 }
153 
155 template <typename partset>
156 void domain<partset>::domain_allocate(int maxtopnodes)
157 {
158  MaxTopNodes = maxtopnodes;
159 
160  if(FirstTopleafOfTask)
161  Terminate("domain storage already allocated");
162 
163  FirstTopleafOfTask = (int *)Mem.mymalloc_movable(&FirstTopleafOfTask, "FirstTopleafOfTask", NTask * sizeof(int));
164  NumTopleafOfTask = (int *)Mem.mymalloc_movable(&NumTopleafOfTask, "NumTopleafOfTask", NTask * sizeof(int));
165  TopNodes = (topnode_data *)Mem.mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(topnode_data)));
166  TaskOfLeaf = (int *)Mem.mymalloc_movable(&TaskOfLeaf, "TaskOfLeaf", (MaxTopNodes * sizeof(int)));
167  ListOfTopleaves = (int *)Mem.mymalloc_movable(&ListOfTopleaves, "DomainListOfLocalTopleaves", (MaxTopNodes * sizeof(int)));
168 }
169 
171 template <typename partset>
173 {
174  int maxtopnodes = All.TopNodeAllocFactor * std::max<int>(All.TopNodeFactor * MultipleDomains * NTask, BASENUMBER);
175  domain_allocate(maxtopnodes);
176 }
177 
178 template <typename partset>
180 {
181  if(!FirstTopleafOfTask)
182  Terminate("domain storage not allocated");
183 
184  Mem.myfree_movable(ListOfTopleaves);
185  Mem.myfree_movable(TaskOfLeaf);
186  Mem.myfree_movable(TopNodes);
187  Mem.myfree_movable(NumTopleafOfTask);
188  Mem.myfree_movable(FirstTopleafOfTask);
189 
190  ListOfTopleaves = NULL;
191  TaskOfLeaf = NULL;
192  TopNodes = NULL;
193  NumTopleafOfTask = NULL;
194  FirstTopleafOfTask = NULL;
195 }
196 
197 template <typename partset>
198 void domain<partset>::domain_printf(char *buf)
199 {
201  {
202  fprintf(Logs.FdDomain, "%s", buf);
203  }
204 }
205 
206 template <>
208 {
209  /* for each timebin that should be balanced, collect the gravity cost of
210  * the particles active in that timebin
211  */
212 
213  for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
214  {
215  GravCostPerListedTimeBin[n] = 0.0;
216  MaxGravCostPerListedTimeBin[n] = 0.0;
217  /* do the same for the hydrodynamics
218  */
219  HydroCostPerListedTimeBin[n] = 0.0;
220  }
221 
222  for(int i = 0; i < Tp->NumPart; i++)
223  {
224  if(Tp->P[i].GravCost == 0)
225  Tp->P[i].GravCost = 1;
226 
227  for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
228  {
229  int bin = ListOfTimeBinsToBeBalanced[n];
230 
231  if(bin >= Tp->P[i].TimeBinGrav)
232  {
233  GravCostPerListedTimeBin[n] += Tp->P[i].GravCost;
234  if(MaxGravCostPerListedTimeBin[n] < Tp->P[i].GravCost)
235  MaxGravCostPerListedTimeBin[n] = Tp->P[i].GravCost;
236  }
237 
238  if(Tp->P[i].getType() == 0)
239  {
240  if(bin >= Tp->P[i].getTimeBinHydro())
241  HydroCostPerListedTimeBin[n] += 1.0;
242  }
243  }
244  }
245 
246  long long sum[2] = {Tp->NumPart, Tp->NumGas};
247 
248  MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
249 
250  NormFactorLoad = 1.0 / sum[0];
251  NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
252 
253  MultipleDomains = 0;
254  TotalCost = 0.0;
255 
256  if(NormFactorLoad > 0.0)
257  {
258  MultipleDomains += 1;
259  TotalCost += 1.0;
260  }
261 
262  if(NormFactorLoadSph > 0.0)
263  {
264  MultipleDomains += 1;
265  TotalCost += 1.0;
266  }
267 
268  MPI_Allreduce(MPI_IN_PLACE, GravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
269  MPI_Allreduce(MPI_IN_PLACE, HydroCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
270 
271  MPI_Allreduce(MPI_IN_PLACE, MaxGravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_MAX, Communicator);
272 
273  double limit = 1.0 / (All.TopNodeFactor * MultipleDomains * NTask);
274 
275  for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
276  {
277  if(GravCostPerListedTimeBin[n] > 0.0)
278  MultipleDomains += 1;
279 
280  if(HydroCostPerListedTimeBin[n] > 0.0)
281  MultipleDomains += 1;
282 
283  GravCostNormFactors[n] = GravCostPerListedTimeBin[n] > 0.0 ? 1.0 / GravCostPerListedTimeBin[n] : 0.0;
284  HydroCostNormFactors[n] = HydroCostPerListedTimeBin[n] > 0.0 ? 1.0 / HydroCostPerListedTimeBin[n] : 0.0;
285 
286  if(MaxGravCostPerListedTimeBin[n] * GravCostNormFactors[n] > limit)
287  GravCostNormFactors[n] = limit / MaxGravCostPerListedTimeBin[n];
288 
289  if(HydroCostNormFactors[n] > limit)
290  HydroCostNormFactors[n] = limit;
291 
292  TotalCost += GravCostPerListedTimeBin[n] * GravCostNormFactors[n];
293  TotalCost += HydroCostPerListedTimeBin[n] * HydroCostNormFactors[n];
294  }
295 }
296 
297 #ifdef LIGHTCONE_PARTICLES
298 template <>
300 {
301  long long sum[2] = {Tp->NumPart, Tp->NumGas};
302 
303  MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
304 
305  NormFactorLoad = 1.0 / sum[0];
306  NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
307 
308  MultipleDomains = 0;
309  TotalCost = 0.0;
310 
311  if(NormFactorLoad > 0.0)
312  {
313  MultipleDomains += 1;
314  TotalCost += 1.0;
315  }
316 
317  if(NormFactorLoadSph > 0.0)
318  {
319  MultipleDomains += 1;
320  TotalCost += 1.0;
321  }
322 
323  NumTimeBinsToBeBalanced = 0;
324 }
325 #endif
326 
327 template <>
329 {
330  if(Mode != STANDARD)
331  return;
332 
333  for(int i = 0; i < Tp->NumGas; i++)
334  if(Tp->P[i].getType() != 0) /*If not a gas particle, swap to the end of the list */
335  {
336  particle_data psave = Tp->P[i];
337  peanokey key = domain_key[i];
338 
339  Tp->P[i] = Tp->P[Tp->NumGas - 1];
340  Tp->SphP[i] = Tp->SphP[Tp->NumGas - 1];
341  domain_key[i] = domain_key[Tp->NumGas - 1];
342 
343  Tp->P[Tp->NumGas - 1] = psave;
344  domain_key[Tp->NumGas - 1] = key;
345 
346  Tp->NumGas--;
347  i--;
348  }
349  /*Now we have rearranged the particles,
350  *we don't need to do it again unless there are more stars*/
351 }
352 
353 template <>
355 {
356  Tp->reconstruct_timebins();
357 
358  TIMER_STOPSTART(CPU_DOMAIN, CPU_LOGS);
359 
360  if(Mode != STANDARD)
361  return;
362 
363  /* now execute code to report balance */
364 
365  /* get total particle counts */
366  long long loc_count[2 * TIMEBINS], glob_count[2 * TIMEBINS];
367 
368  for(int i = 0; i < TIMEBINS; i++)
369  {
370  loc_count[i] = Tp->TimeBinsGravity.TimeBinCount[i];
371  loc_count[TIMEBINS + i] = Tp->TimeBinsHydro.TimeBinCount[i];
372  }
373 
374  MPI_Reduce(loc_count, glob_count, 2 * TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, Communicator);
375 
376  double loc_max_data[2 * TIMEBINS + 3], glob_max_data[2 * TIMEBINS + 3];
377  loc_max_data[2 * TIMEBINS + 0] = Tp->NumPart;
378  loc_max_data[2 * TIMEBINS + 1] = Tp->NumGas;
379  loc_max_data[2 * TIMEBINS + 2] = Tp->NumPart - Tp->NumGas;
380 
381  double glob_sum_data[2 * TIMEBINS];
382 
383  double *loc_HydroCost = &loc_max_data[0];
384  double *loc_GravCost = &loc_max_data[TIMEBINS];
385  double *max_HydroCost = &glob_max_data[0];
386  double *max_GravCost = &glob_max_data[TIMEBINS];
387  double *glob_HydroCost = &glob_sum_data[0];
388  double *glob_GravCost = &glob_sum_data[TIMEBINS];
389 
390  for(int i = 0; i < TIMEBINS; i++)
391  {
392  loc_GravCost[i] = 0;
393  loc_HydroCost[i] = 0;
394  }
395 
396 #ifdef SELFGRAVITY
397  for(int i = 0; i < Tp->NumPart; i++)
398  {
399  for(int bin = Tp->P[i].TimeBinGrav; bin <= All.HighestOccupiedTimeBin; bin++)
400  {
401  loc_GravCost[bin] += MIN_FLOAT_NUMBER + domain_grav_weight[bin] * Tp->P[i].GravCost;
402  }
403  }
404 #endif
405 
406  for(int i = 0; i < Tp->NumPart; i++)
407  if(Tp->P[i].getType() == 0)
408  loc_HydroCost[Tp->P[i].getTimeBinHydro()] += 1.0;
409 
410  /* now determine the cumulative cost for the hydrodynamics */
411  for(int i = 1; i <= All.HighestOccupiedTimeBin; i++)
412  loc_HydroCost[i] += loc_HydroCost[i - 1];
413 
414  MPI_Reduce(loc_max_data, glob_sum_data, 2 * TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, Communicator);
415  MPI_Reduce(loc_max_data, glob_max_data, 2 * TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, Communicator);
416 
417  if(ThisTask == 0)
418  {
419  double max_tot = glob_max_data[2 * TIMEBINS + 0];
420  double max_sph = glob_max_data[2 * TIMEBINS + 1];
421  double max_dm = glob_max_data[2 * TIMEBINS + 2];
422 
423  long long *tot_count = &glob_count[0];
424  long long *tot_count_sph = &glob_count[TIMEBINS];
425 
426  long long tot_cumulative[TIMEBINS];
427  tot_cumulative[0] = tot_count[0];
428 
429  for(int i = 1; i < TIMEBINS; i++)
430  tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1];
431 
432  double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0;
433 
434  for(int i = 0; i < TIMEBINS; i++)
435  {
436  tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask;
437  max_gravcost += domain_to_be_balanced[i] * max_GravCost[i];
438 
439  tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask;
440  max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i];
441  }
442 
443  double bal_grav_bin[TIMEBINS], bal_grav_bin_rel[TIMEBINS];
444  double bal_hydro_bin[TIMEBINS], bal_hydro_bin_rel[TIMEBINS];
445 
446  for(int i = 0; i < TIMEBINS; i++)
447  {
448  if(tot_count[i] > 0)
449  {
450  bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask + SMALLNUM);
451  bal_grav_bin_rel[i] =
452  (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost + SMALLNUM);
453  }
454  else
455  {
456  bal_grav_bin[i] = 0.0;
457  bal_grav_bin_rel[i] = 0.0;
458  }
459 
460  if(tot_count_sph[i] > 0)
461  {
462  bal_hydro_bin[i] = max_HydroCost[i] / (glob_HydroCost[i] / NTask + SMALLNUM);
463  bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) /
464  (tot_hydrocost + SMALLNUM);
465  }
466  else
467  {
468  bal_hydro_bin[i] = 0.0;
469  bal_hydro_bin_rel[i] = 0.0;
470  }
471  }
472 
473  char buf[MAXLEN_PATH];
474  sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time);
475  domain_printf(buf);
476  sprintf(buf, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n");
477  domain_printf(buf);
478 
479  long long tot = 0, tot_sph = 0;
480 
481  for(int i = TIMEBINS - 1; i >= 0; i--)
482  {
483 #if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY))
484  if(tot_count_sph[i] > 0 || tot_count[i] > 0)
485 #else
486  if(tot_count[i] > 0)
487  tot += tot_count[i];
488 
489  if(tot_count_sph[i] > 0)
490 #endif
491  {
492  char buf[MAXLEN_PATH];
493  sprintf(buf, "%c%cbin=%2d %10llu %10llu %10llu %6.3f |%6.3f %c %6.3f |%6.3f\n",
494  i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i,
495  tot_count[i], tot_count_sph[i], tot_cumulative[i], bal_grav_bin[i], bal_grav_bin_rel[i],
496  domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]);
497  domain_printf(buf);
498 
499  tot += tot_count[i];
500  tot_sph += tot_count_sph[i];
501  }
502  }
503 
504  sprintf(buf, "-------------------------------------------------------------------------------------\n");
505  domain_printf(buf);
506  sprintf(buf, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n",
507  max_dm / (tot - tot_sph + SMALLNUM) * NTask, max_sph / (tot_sph + SMALLNUM) * NTask, max_tot / (tot + SMALLNUM) * NTask,
508  max_gravcost / (tot_gravcost + SMALLNUM), max_hydrocost / (tot_hydrocost + SMALLNUM));
509  domain_printf(buf);
510  sprintf(buf, "-------------------------------------------------------------------------------------\n");
511  domain_printf(buf);
512  sprintf(buf, "\n");
513  domain_printf(buf);
515  }
516 
517  TIMER_STOPSTART(CPU_LOGS, CPU_DOMAIN);
518 }
519 
520 #ifdef LIGHTCONE_PARTICLES
521 
522 template <>
524 {
525 }
526 
527 template <>
529 {
530 }
531 
532 #endif
533 
534 #include "../data/simparticles.h"
535 template class domain<simparticles>;
536 
537 #ifdef LIGHTCONE_PARTICLES
538 #include "../data/lcparticles.h"
539 template class domain<lcparticles>;
540 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
void domain_free(void)
Definition: domain.cc:179
int MultipleDomains
Definition: domain.h:55
void domain_allocate(void)
Definition: domain.cc:172
domain_options Mode
Definition: domain.h:50
void domain_decomposition(domain_options mode)
Definition: domain.cc:51
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
FILE * FdDomain
Definition: logs.h:52
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
MPI_Comm Communicator
Definition: setcomm.h:31
#define TIMEBINS
Definition: constants.h:332
#define SMALLNUM
Definition: constants.h:83
#define MIN_FLOAT_NUMBER
Definition: constants.h:80
#define MAXLEN_PATH
Definition: constants.h:300
#define BASENUMBER
Definition: constants.h:303
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
@ STANDARD
Definition: domain.h:23
@ RST_STARTFROMSNAP
Definition: dtypes.h:315
@ RST_BEGIN
Definition: dtypes.h:313
@ RST_RESUME
Definition: dtypes.h:314
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:19
memory Mem
Definition: main.cc:44
enum restart_options RestartFlag
Definition: allvars.h:68
int SmallestTimeBinWithDomainDecomposition
Definition: allvars.h:160
void myflush(FILE *fstream)
Definition: system.cc:125