GADGET-4
domain_balance.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <mpi.h>
15 #include <cmath>
16 #include <cstdio>
17 #include <cstdlib>
18 #include <cstring>
19 
20 #include "../data/allvars.h"
21 #include "../data/dtypes.h"
22 #include "../data/mymalloc.h"
23 #include "../domain/domain.h"
24 #include "../logs/timer.h"
25 #include "../main/simulation.h"
26 #include "../mpi_utils/mpi_utils.h"
27 #include "../sort/cxxsort.h"
28 #include "../system/system.h"
29 #include "../time_integration/timestep.h"
30 
35 #ifdef DOMAIN_SPECIAL_CHECK
36 
37 template <typename partset>
38 void domain<partset>::domain_special_check(int mode, int ndomains)
39 {
40  double *cost_data = (double *)Mem.mymalloc_clear("cost_data", sizeof(double) * 2 * ndomains * (NumTimeBinsToBeBalanced + 1));
41 
42  double *load = cost_data;
43  double *loadsph = cost_data + ndomains;
44  double *binGravCost = cost_data + 2 * ndomains;
45  double *binHydroCost = cost_data + 2 * ndomains + ndomains * NumTimeBinsToBeBalanced;
46 
47  for(int i = 0; i < Tp->NumPart; i++)
48  {
49  int no = n_to_no(i);
50 
51  int n;
52 
53  if(mode == 0)
54  n = no;
55  else
56  n = TaskOfLeaf[no];
57 
58  if(n < 0 || n >= ndomains)
59  Terminate("strange");
60 
61 #ifdef SELFGRAVITY
62  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
63  {
64  int bin = ListOfTimeBinsToBeBalanced[k];
65 
66  if(bin >= Tp->P[i].getTimeBinGrav())
67  binGravCost[k * ndomains + n] += GravCostNormFactors[k] * Tp->P[i].getGravCost();
68  }
69 #endif
70 
71  load[n] += NormFactorLoad;
72 
73  if(Tp->P[i].getType() == 0)
74  {
75  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
76  {
77  int bin = ListOfTimeBinsToBeBalanced[k];
78 
79  if(bin >= Tp->P[i].getTimeBinHydro())
80  binHydroCost[k * ndomains + n] += HydroCostNormFactors[k];
81  }
82 
83  loadsph[n] += NormFactorLoadSph;
84  }
85  }
86 
87  MPI_Allreduce(MPI_IN_PLACE, cost_data, 2 * ndomains * (NumTimeBinsToBeBalanced + 1), MPI_DOUBLE, MPI_SUM, Communicator);
88 
90  {
91  if(ThisTask == 0)
92  {
93  char buf[1000];
94  sprintf(buf, "%s/domain_data_%d_step%d.txt", All.OutputDir, mode, All.NumCurrentTiStep);
95  FILE *fd = fopen(buf, "w");
96  fprintf(fd, "%d %d\n", ndomains, NumTimeBinsToBeBalanced);
97  for(int n = 0; n < ndomains; n++)
98  {
99  fprintf(fd, "%g ", load[n]);
100  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
101  fprintf(fd, "%g ", binGravCost[k * ndomains + n]);
102  fprintf(fd, "\n");
103  }
104  fclose(fd);
105  }
106  }
107 
108  Mem.myfree(cost_data);
109 }
110 
111 #endif
112 
122 template <typename partset>
124 {
125  double t0 = Logs.second();
126 
127  /* we first determine the detailed cost of all the domain pieces (which are the top leaves in the tree), so that we can combine them
128  * later on efficiently for different choices of nextra
129  */
130 
131  double *cost_data = (double *)Mem.mymalloc_clear("cost_data", sizeof(double) * 2 * NTopleaves * (NumTimeBinsToBeBalanced + 1));
132 
133  double *load = cost_data;
134  double *loadsph = cost_data + NTopleaves;
135  double *binGravCost = cost_data + 2 * NTopleaves;
136  double *binHydroCost = cost_data + 2 * NTopleaves + NTopleaves * NumTimeBinsToBeBalanced;
137 
138  for(int i = 0; i < Tp->NumPart; i++)
139  {
140  int no = n_to_no(i); // get the leave node
141 
142 #ifdef SELFGRAVITY
143  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
144  {
145  int bin = ListOfTimeBinsToBeBalanced[k];
146 
147  if(bin >= Tp->P[i].getTimeBinGrav())
148  binGravCost[k * NTopleaves + no] += GravCostNormFactors[k] * Tp->P[i].getGravCost();
149  }
150 #endif
151 
152  load[no] += NormFactorLoad;
153 
154  if(Tp->P[i].getType() == 0)
155  {
156  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
157  {
158  int bin = ListOfTimeBinsToBeBalanced[k];
159 
160  if(bin >= Tp->P[i].getTimeBinHydro())
161  binHydroCost[k * NTopleaves + no] += HydroCostNormFactors[k];
162  }
163 
164  loadsph[no] += NormFactorLoadSph;
165  }
166  }
167 
168  allreduce_sum<double>(cost_data, 2 * NTopleaves * (NumTimeBinsToBeBalanced + 1), Communicator);
169  /*
170  MPI_Allreduce(MPI_IN_PLACE, cost_data, 2 * NTopleaves * (NumTimeBinsToBeBalanced + 1), MPI_DOUBLE, MPI_SUM, Communicator);
171 */
172 
173 #ifdef DOMAIN_SPECIAL_CHECK
175  {
176  if(ThisTask == 0)
177  {
178  char buf[1000];
179  sprintf(buf, "%s/domain_data_0_step%d.txt", All.OutputDir, All.NumCurrentTiStep);
180  FILE *fd = fopen(buf, "w");
181  fprintf(fd, "%d %d\n", NTopleaves, NumTimeBinsToBeBalanced);
182  for(int n = 0; n < NTopleaves; n++)
183  {
184  fprintf(fd, "%g ", load[n]);
185  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
186  fprintf(fd, "%g ", binGravCost[k * NTopleaves + n]);
187  fprintf(fd, "\n");
188  }
189  fclose(fd);
190  }
191  }
192 #endif
193 
194  /* let's now find the optimum combination */
195 
196  /* first, enumerate the possibilities that we are going to try */
197 
198  int cnt_try = 0;
199  balance_try_data *balance_try = NULL;
200 
201  for(int rep = 0; rep < 2; rep++) // we repeat this twice, first just counting, then allocating and filling the list
202  {
203  cnt_try = 0;
204 
205  double fac = 0;
206  int nextra = 0;
207  int nextra_prev = -1;
208 
209  while(nextra <= NTask)
210  {
211  if(nextra != nextra_prev)
212  {
213  double base_balance = ((double)(MultipleDomains * NTask)) / (MultipleDomains * NTask - nextra);
214 
215  double excess_balance = 0.01;
216 
217  while(base_balance + excess_balance < 2.5)
218  {
219  if(rep == 1)
220  {
221  balance_try[cnt_try].nextra = nextra;
222  balance_try[cnt_try].try_balance = base_balance + excess_balance;
223  }
224 
225  cnt_try++;
226 
227  excess_balance *= 1.25;
228  }
229 
230  nextra_prev = nextra;
231  }
232 
233  if(fac == 0)
234  fac = 0.01;
235  else
236  fac *= 1.25;
237 
238  nextra = fac * NTask;
239  }
240 
241  if(rep == 0)
242  balance_try = (balance_try_data *)Mem.mymalloc("balance_try", cnt_try * sizeof(balance_try_data));
243  }
244 
245  if(NumNodes == 0)
246  determine_compute_nodes();
247 
248  domain_printf("DOMAIN: we are going to try at most %d different settings for combining the domains on tasks=%d, nnodes=%d\n",
249  cnt_try, NTask, NumNodes);
250 
251  /* sort the combinations such that we first try those yielding a lower imbalance */
252  mycxxsort(balance_try, balance_try + cnt_try, domain_compare_trybalance);
253 
254  int start_try = 0;
255  int completed = 0;
256  int niter = 0;
257 
258  while(completed == 0 && start_try < cnt_try)
259  {
260  double glob_max_cost = 0;
261 
262  if(start_try + ThisTask < cnt_try)
263  {
264  int nextra = balance_try[start_try + ThisTask].nextra;
265  double try_balance = balance_try[start_try + ThisTask].try_balance;
266 
267  int ndomains = MultipleDomains * NTask - nextra;
268 
269  domain_segments_data *domainAssign =
270  (domain_segments_data *)Mem.mymalloc_clear("domainAssign", ndomains * sizeof(domain_segments_data));
271 
272  /* consolidate the finely split domains pieces into larger chunks, exactly ndomains of them */
273  {
274  double max_cost = 0;
275  double costhalfnode = (0.5 * TotalCost) / NTopleaves;
276  double costavg = TotalCost / ndomains;
277  double cost_before = 0;
278  double costavg_before = 0;
279  int start = 0;
280 
281  double total_cost = 0, total_load = 0;
282 
283  for(int n = 0; n < ndomains; n++)
284  {
285  int end = start;
286 
287  double cost = domain_leaf_cost[end].Cost;
288 
289  while((cost + cost_before + (end + 1 < NTopleaves ? domain_leaf_cost[end + 1].Cost : 0) <
290  costavg + costavg_before + costhalfnode) ||
291  (n == ndomains - 1 && end < NTopleaves - 1))
292  {
293  if((NTopleaves - end) > (ndomains - n))
294  end++;
295  else
296  break;
297 
298  cost += domain_leaf_cost[end].Cost;
299  }
300 
301  domainAssign[n].start = start;
302  domainAssign[n].end = end;
303 
304  /* let's also determine the grav-cost and hydro-cost separately for each timebin of all the domain-pieces */
305  for(int no = domainAssign[n].start; no <= domainAssign[n].end; no++)
306  {
307  domainAssign[n].load += load[no];
308  domainAssign[n].loadsph += loadsph[no];
309 
310  total_load += load[no] + loadsph[no];
311  total_cost += load[no] + loadsph[no];
312 
313  for(int i = 0; i < NumTimeBinsToBeBalanced; i++)
314  {
315  domainAssign[n].bin_GravCost[i] += binGravCost[i * NTopleaves + no];
316  domainAssign[n].bin_HydroCost[i] += binHydroCost[i * NTopleaves + no];
317 
318  total_cost += binGravCost[i * NTopleaves + no] + binHydroCost[i * NTopleaves + no];
319  }
320  }
321 
322  cost_before += cost;
323  costavg_before += costavg;
324 
325  start = end + 1;
326 
327  if(max_cost < cost)
328  max_cost = cost;
329  }
330 
331  domain_printf("DOMAIN: total_cost=%g total_load=%g\n", total_cost, total_load);
332  }
333 
334  /* now start to map the domain pieces onto different tasks
335  */
336 
337  struct tasklist_data
338  {
339  double bin_GravCost[TIMEBINS];
340  double bin_HydroCost[TIMEBINS];
341  double load;
342  double loadsph;
343  };
344 
345  tasklist_data *tasklist = (tasklist_data *)Mem.mymalloc_clear("tasklist", NTask * sizeof(tasklist_data));
346 
347  int n_cost_items = 0;
348  cost_queue_data *cost_queues[2 * TIMEBINS + 2];
349  int first_unusued_in_cost_queue[2 * TIMEBINS + 2];
350 
351  for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
352  {
353  if(GravCostPerListedTimeBin[n] > 0.0)
354  {
355  cost_queues[n_cost_items] = (cost_queue_data *)Mem.mymalloc("cost_queues", ndomains * sizeof(cost_queue_data));
356  for(int i = 0; i < ndomains; i++)
357  {
358  cost_queues[n_cost_items][i].value = domainAssign[i].bin_GravCost[n];
359  cost_queues[n_cost_items][i].index = i;
360  }
361 #ifdef SIMPLE_DOMAIN_AGGREGATION
362  domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
363 #endif
364  mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
365  first_unusued_in_cost_queue[n_cost_items] = 0;
366 
367  n_cost_items++;
368  }
369 
370  if(HydroCostNormFactors[n] > 0.0)
371  {
372  cost_queues[n_cost_items] = (cost_queue_data *)Mem.mymalloc("cost_queues", ndomains * sizeof(cost_queue_data));
373  for(int i = 0; i < ndomains; i++)
374  {
375  cost_queues[n_cost_items][i].value = domainAssign[i].bin_HydroCost[n];
376  cost_queues[n_cost_items][i].index = i;
377  }
378 #ifdef SIMPLE_DOMAIN_AGGREGATION
379  domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
380 #endif
381  mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
382  first_unusued_in_cost_queue[n_cost_items] = 0;
383 
384  n_cost_items++;
385  }
386  }
387 
388  if(NormFactorLoad > 0.0)
389  {
390  cost_queues[n_cost_items] = (cost_queue_data *)Mem.mymalloc("cost_queues", ndomains * sizeof(cost_queue_data));
391  for(int i = 0; i < ndomains; i++)
392  {
393  cost_queues[n_cost_items][i].value = domainAssign[i].load;
394  cost_queues[n_cost_items][i].index = i;
395  }
396 #ifdef SIMPLE_DOMAIN_AGGREGATION
397  domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
398 #endif
399  mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
400  first_unusued_in_cost_queue[n_cost_items] = 0;
401 
402  n_cost_items++;
403  }
404 
405  if(NormFactorLoadSph > 0.0)
406  {
407  cost_queues[n_cost_items] = (cost_queue_data *)Mem.mymalloc("cost_queues", ndomains * sizeof(cost_queue_data));
408  for(int i = 0; i < ndomains; i++)
409  {
410  cost_queues[n_cost_items][i].value = domainAssign[i].loadsph;
411  cost_queues[n_cost_items][i].index = i;
412  }
413 #ifdef SIMPLE_DOMAIN_AGGREGATION
414  domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
415 #endif
416  mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
417  first_unusued_in_cost_queue[n_cost_items] = 0;
418 
419  n_cost_items++;
420  }
421 
422  int nextqueue = 0;
423  int ndomains_left = ndomains;
424  int target = 0;
425 
426  for(target = 0; target < NTask && ndomains_left > 0; target++)
427  {
428  int count = 0; // number of pieces added to this target task
429  int failures = 0;
430 
431  while(ndomains_left > 0)
432  {
433  int k = first_unusued_in_cost_queue[nextqueue];
434  while(domainAssign[cost_queues[nextqueue][k].index].used)
435  {
436  if(k == ndomains - 1)
437  Terminate("target=%d nextqueue=%d ndomains_left=%d k == ndomains - 1", target, nextqueue, ndomains_left);
438 
439  k++;
440  first_unusued_in_cost_queue[nextqueue]++;
441  }
442 
443  /* this is our next candidate for adding */
444  int n = cost_queues[nextqueue][k].index;
445 
446  nextqueue = (nextqueue + 1) % n_cost_items;
447 
448  // Let's see what imbalance we would get
449  double max_cost = 0;
450 
451  if(max_cost < tasklist[target].load + domainAssign[n].load)
452  max_cost = tasklist[target].load + domainAssign[n].load;
453 
454  if(max_cost < tasklist[target].loadsph + domainAssign[n].loadsph)
455  max_cost = tasklist[target].loadsph + domainAssign[n].loadsph;
456 
457  for(int bin = 0; bin < NumTimeBinsToBeBalanced; bin++)
458  {
459  if(max_cost < tasklist[target].bin_GravCost[bin] + domainAssign[n].bin_GravCost[bin])
460  max_cost = tasklist[target].bin_GravCost[bin] + domainAssign[n].bin_GravCost[bin];
461 
462  if(max_cost < tasklist[target].bin_HydroCost[bin] + domainAssign[n].bin_HydroCost[bin])
463  max_cost = tasklist[target].bin_HydroCost[bin] + domainAssign[n].bin_HydroCost[bin];
464  }
465 
466  if(count > 0)
467  {
468  if(max_cost * NTask > try_balance)
469  {
470  failures++;
471 
472  if(failures > n_cost_items)
473  break;
474  else
475  continue;
476  }
477  }
478 
479  if(max_cost > glob_max_cost)
480  glob_max_cost = max_cost;
481 
482  domainAssign[n].task = target;
483  domainAssign[n].used = 1;
484 
485  tasklist[target].load += domainAssign[n].load;
486  tasklist[target].loadsph += domainAssign[n].loadsph;
487  for(int bin = 0; bin < NumTimeBinsToBeBalanced; bin++)
488  {
489  tasklist[target].bin_GravCost[bin] += domainAssign[n].bin_GravCost[bin];
490  tasklist[target].bin_HydroCost[bin] += domainAssign[n].bin_HydroCost[bin];
491  }
492 
493  ndomains_left--;
494  count++;
495 
496  // if the following condition holds, no reason any further to try to stuff more than one piece together
497  if(ndomains_left < NTask - target)
498  break;
499  }
500 
501  // do an extra skip, so that we do not typically start with the same queue
502  nextqueue = (nextqueue + 1) % n_cost_items;
503  }
504 
505  if(ndomains_left == 0)
506  {
507  domain_printf("DOMAIN: combining multiple-domains succeeded, target=%d NTask=%d\n", target, NTask);
508  completed = 1;
509 
510 #ifdef DOMAIN_SPECIAL_CHECK
512  {
513  char buf[1000];
514  sprintf(buf, "%s/domain_data_1_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, ThisTask);
515  FILE *fd = fopen(buf, "w");
516  fprintf(fd, "%d %d\n", ndomains, NumTimeBinsToBeBalanced);
517  for(int n = 0; n < ndomains; n++)
518  {
519  fprintf(fd, "%g ", domainAssign[n].load);
520  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
521  fprintf(fd, "%g ", domainAssign[n].bin_GravCost[k]);
522  fprintf(fd, "\n");
523  }
524  fclose(fd);
525  }
527  {
528  char buf[1000];
529  sprintf(buf, "%s/domain_data_2_step%d_task%d.txt", All.OutputDir, All.NumCurrentTiStep, ThisTask);
530  FILE *fd = fopen(buf, "w");
531  fprintf(fd, "%d %d\n", NTask, NumTimeBinsToBeBalanced);
532  for(int n = 0; n < NTask; n++)
533  {
534  fprintf(fd, "%g ", tasklist[n].load);
535  for(int k = 0; k < NumTimeBinsToBeBalanced; k++)
536  fprintf(fd, "%g ", tasklist[n].bin_GravCost[k]);
537  fprintf(fd, "\n");
538  }
539  fprintf(fd, "%g\n", glob_max_cost * NTask);
540  fclose(fd);
541  }
542 #endif
543 
544  /* store the mapping of the topleaves to tasks */
545  for(int n = 0; n < ndomains; n++)
546  {
547  for(int i = domainAssign[n].start; i <= domainAssign[n].end; i++)
548  TaskOfLeaf[i] = domainAssign[n].task;
549  }
550  }
551  else
552  {
553  glob_max_cost = MAX_DOUBLE_NUMBER;
554  }
555 
556  for(int num = n_cost_items - 1; num >= 0; num--)
557  Mem.myfree(cost_queues[num]);
558 
559  Mem.myfree(tasklist);
560  Mem.myfree(domainAssign);
561  }
562  else
563  glob_max_cost = MAX_DOUBLE_NUMBER; /* this processor was not doing anything */
564 
565  struct
566  {
567  double cost;
568  int rank;
569  } global = {glob_max_cost, ThisTask};
570 
571  MPI_Allreduce(MPI_IN_PLACE, &global, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
572 
573  MPI_Allreduce(MPI_IN_PLACE, &completed, 1, MPI_INT, MPI_MAX, Communicator);
574 
575  niter++;
576 
577  if(completed)
578  {
579  domain_printf(
580  "DOMAIN: best solution found after %d iterations by task=%d for nextra=%d, reaching maximum imbalance of %g|%g\n", niter,
581  global.rank, balance_try[start_try + global.rank].nextra, global.cost * NTask,
582  balance_try[start_try + global.rank].try_balance);
583 
584  MPI_Bcast(TaskOfLeaf, NTopleaves, MPI_INT, global.rank, Communicator);
585  break;
586  }
587 
588  start_try += NTask;
589  }
590 
591  if(completed == 0)
592  Terminate("domain balancing failed");
593 
594  Mem.myfree(balance_try);
595 
596  Mem.myfree(cost_data);
597 
598  double t1 = Logs.second();
599  domain_printf("DOMAIN: combining multiple-domains took %g sec\n", Logs.timediff(t0, t1));
600 }
601 
602 #ifdef SIMPLE_DOMAIN_AGGREGATION
603 template <typename partset>
604 void domain<partset>::domain_determinate_aggregated_value(cost_queue_data *data, int ndomains)
605 {
606  if(NumNodes < 1)
607  Terminate("NumNodes=%d\n", NumNodes);
608 
609  int nbase = ndomains / NumNodes;
610  int additional = ndomains % NumNodes;
611  int start = 0;
612  int end = 0;
613 
614  for(int i = 0; i < NumNodes; i++)
615  {
616  start = end;
617  end = start + nbase;
618 
619  if(additional > 0)
620  {
621  end++;
622  additional--;
623  }
624 
625  double aggregated_value = 0;
626  for(int n = start; n < end; n++)
627  aggregated_value += data[n].value;
628 
629  for(int n = start; n < end; n++)
630  data[n].aggregated_value = aggregated_value;
631  }
632 
633  if(end != ndomains)
634  Terminate("end != ndomains");
635 }
636 #endif
637 
645 template <>
647 {
648  long long tot_count[TIMEBINS], tot_count_sph[TIMEBINS];
649 
650  sumup_large_ints(TIMEBINS, Tp->TimeBinsGravity.TimeBinCount, tot_count, Communicator);
651  sumup_large_ints(TIMEBINS, Tp->TimeBinsHydro.TimeBinCount, tot_count_sph, Communicator);
652 
653  for(int i = 0; i < TIMEBINS; i++)
654  {
655  domain_to_be_balanced[i] = 0;
656  domain_grav_weight[i] = 1;
657  domain_hydro_weight[i] = 1;
658  }
659 
660  domain_to_be_balanced[All.HighestActiveTimeBin] = 1;
661  domain_grav_weight[All.HighestActiveTimeBin] = 1;
662  domain_hydro_weight[All.HighestActiveTimeBin] = 1;
663 
664  ListOfTimeBinsToBeBalanced[0] = All.HighestActiveTimeBin;
665  NumTimeBinsToBeBalanced = 1;
666 
667  if(Mode == COLL_SUBFIND)
668  return;
669 
670 #ifdef HIERARCHICAL_GRAVITY
671 
672  for(int j = All.HighestActiveTimeBin - 1; j >= All.LowestOccupiedTimeBin; j--)
673  {
674  if(tot_count[j] > 0 || tot_count_sph[j] > 0)
675  {
676  ListOfTimeBinsToBeBalanced[NumTimeBinsToBeBalanced++] = j;
677 
678  domain_to_be_balanced[j] = 1;
679  }
680 
681  domain_grav_weight[j] += 2;
682  }
683 
684  for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2)
685  {
686  if(tot_count[i] > 0)
687  {
688  domain_grav_weight[i] = weight;
689 
690  for(int j = i - 1; j >= All.LowestOccupiedTimeBin; j--)
691  domain_grav_weight[j] += 2 * weight;
692  }
693 
694  if(tot_count_sph[i] > 0)
695  domain_hydro_weight[i] = weight;
696  }
697 
698 #else
699 
700  for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2)
701  {
702  if(tot_count[i] > 0 || tot_count_sph[i] > 0)
703  {
704  ListOfTimeBinsToBeBalanced[NumTimeBinsToBeBalanced++] = i;
705  domain_to_be_balanced[i] = 1;
706  }
707 
708  if(tot_count[i] > 0)
709  domain_grav_weight[i] = weight;
710 
711  if(tot_count_sph[i] > 0)
712  domain_hydro_weight[i] = weight;
713  }
714 
715 #endif
716 }
717 
718 #if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES)
719 
720 template <>
722 {
723  for(int i = 0; i < TIMEBINS; i++)
724  {
725  domain_to_be_balanced[i] = 0;
726  domain_grav_weight[i] = 1;
727  domain_hydro_weight[i] = 1;
728  }
729 
730  domain_to_be_balanced[0] = 1;
731 }
732 
733 #endif
734 
735 #include "../data/simparticles.h"
736 template class domain<simparticles>;
737 
738 #ifdef LIGHTCONE_PARTICLES
739 #include "../data/lcparticles.h"
740 template class domain<lcparticles>;
741 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
domain_options Mode
Definition: domain.h:50
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
MPI_Comm Communicator
Definition: setcomm.h:31
#define TIMEBINS
Definition: constants.h:332
#define MAX_DOUBLE_NUMBER
Definition: constants.h:81
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
@ COLL_SUBFIND
Definition: domain.h:24
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:19
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44
int SmallestTimeBinWithDomainDecomposition
Definition: allvars.h:160
char OutputDir[MAXLEN_PATH]
Definition: allvars.h:272