GADGET-4
gwalk.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <math.h>
15 #include <mpi.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include "../data/allvars.h"
20 #include "../data/dtypes.h"
21 #include "../data/intposconvert.h"
22 #include "../data/mymalloc.h"
23 #include "../domain/domain.h"
24 #include "../gravity/ewald.h"
25 #include "../gravtree/gravtree.h"
26 #include "../gravtree/gwalk.h"
27 #include "../logs/logs.h"
28 #include "../logs/timer.h"
29 #include "../main/simulation.h"
30 #include "../mpi_utils/mpi_utils.h"
31 #include "../pm/pm.h"
32 #include "../sort/cxxsort.h"
33 #include "../sort/peano.h"
34 #include "../system/system.h"
35 #include "../time_integration/timestep.h"
36 
45 inline void gwalk::evaluate_particle_particle_interaction(const pinfo &pdat, const int no, const char jtype, int shmrank)
46 {
47 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
48  if(skip_actual_force_computation)
49  return;
50 #endif
51 
52  /* first, let's get the relevant information of our partner particle, which can be in three different lists */
53 
54  MyIntPosType *intpos;
55  MyReal mass;
56 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
57  int overlap_check;
58 #endif
59 
60  MyReal hmax = pdat.h_i;
61 
62  if(jtype == NODE_TYPE_LOCAL_PARTICLE)
63  {
64  particle_data *P = get_Pp(no, shmrank);
65 
66  intpos = P->IntPos;
67  mass = P->getMass();
68 
69 #if NSOFTCLASSES > 1
71  if(h_j > hmax)
72  hmax = h_j;
73 #endif
74 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
75  overlap_check = P->InsideOutsideFlag;
76 #endif
77  }
78  else if(jtype == NODE_TYPE_TREEPOINT_PARTICLE)
79  {
80  int n = no - ImportedNodeOffset;
81 
82  gravpoint_data *Pointp = get_pointsp(n, shmrank);
83 
84  intpos = Pointp->IntPos;
85  mass = Pointp->Mass;
86 
87 #if NSOFTCLASSES > 1
88  MyReal h_j = All.ForceSoftening[Pointp->getSofteningClass()];
89  if(h_j > hmax)
90  hmax = h_j;
91 #endif
92 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
93  overlap_check = Pointp->InsideOutsideFlag;
94 #endif
95  }
96  else /* a point that was fetched */
97  {
98  int n = no - EndOfForeignNodes;
99 
100  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(n, shmrank);
101 
102  intpos = foreignpoint->IntPos;
103  mass = foreignpoint->Mass;
104 #if NSOFTCLASSES > 1
105  MyReal h_j = All.ForceSoftening[foreignpoint->getSofteningClass()];
106  if(h_j > hmax)
107  hmax = h_j;
108 #endif
109 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
110  overlap_check = foreignpoint->InsideOutsideFlag;
111 #endif
112  }
113 
114 #ifdef PMGRID
115  mesh_factors *mfp = &mf[LOW_MESH];
116 #if defined(PLACEHIGHRESREGION)
117  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
118  {
119  if(overlap_check == FLAG_INSIDE && pdat.InsideOutsideFlag == FLAG_INSIDE)
120  mfp = &mf[HIGH_MESH];
121  }
122 #endif
123 #endif
124 
125  vector<MyReal> dxyz;
126  Tp->nearest_image_intpos_to_pos(intpos, pdat.intpos, dxyz.da);
127 
128  MyReal r2 = dxyz.r2();
129  MyReal r = sqrt(r2);
130  MyReal rinv = (r > 0) ? 1 / r : 0;
131 
133 
134 #ifdef PMGRID
136  {
137  if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
138  return; // if we are outside the cut-off radius, we have no interaction
139  }
140 #endif
141 
142  get_gfactors_monopole(gfac, r, hmax, rinv);
143 
144 #ifdef EVALPOTENTIAL
145  *pdat.pot -= mass * gfac.fac0;
146 #endif
147  *pdat.acc -= (mass * gfac.fac1 * rinv) * dxyz;
148 
149  if(DoEwald)
150  {
151  // EWALD treatment, only done for periodic boundaries in case PM is not active
152 
153  ewald_data ew;
154  Ewald.ewald_gridlookup(intpos, pdat.intpos, ewald::POINTMASS, ew);
155 
156 #ifdef EVALPOTENTIAL
157  *pdat.pot += mass * ew.D0phi;
158 #endif
159  *pdat.acc += mass * ew.D1phi;
160  }
161 
162  if(MeasureCostFlag)
163  *pdat.GravCost += 1;
164 
165  interactioncountPP += 1;
166 }
167 
168 inline int gwalk::evaluate_particle_node_opening_criterion_and_interaction(const pinfo &pdat, gravnode *nop)
169 {
170  if(nop->level == 0) // always open the root node (note: full node length does not fit in the integer type)
171  return NODE_OPEN;
172 
173  MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop->level);
174  MyIntPosType intlen = halflen << 1;
175 
176 #if defined(PMGRID) || !defined(TREE_NO_SAFETY_BOX)
177  MyIntPosType dist[3];
178  Tp->nearest_image_intpos_to_absolute_intdist(nop->center.da, pdat.intpos, dist);
179 #endif
180 
181 #ifndef TREE_NO_SAFETY_BOX
182  // if we are close to the node, and therefore open it to protect against worst-case force errors
183  if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
184  return NODE_OPEN;
185 #endif
186 
187 #ifdef PMGRID
188  mesh_factors *mfp = &mf[LOW_MESH];
189 
191  {
192 #ifdef PLACEHIGHRESREGION
193  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
194  {
195  int overlap_check = nop->overlap_flag;
196 
197  if(pdat.InsideOutsideFlag == FLAG_INSIDE && overlap_check == FLAG_BOUNDARYOVERLAP)
198  Terminate(
199  "this case should not happen: node center=(%g|%g|%g) len=%g particle=(%g|%g|%g) "
200  "rel-dist=(%g|%g|%g)\n",
201  nop->center[0] * Tp->FacIntToCoord, nop->center[1] * Tp->FacIntToCoord, nop->center[2] * Tp->FacIntToCoord,
202  intlen * Tp->FacIntToCoord, pdat.intpos[0] * Tp->FacIntToCoord, pdat.intpos[1] * Tp->FacIntToCoord,
203  pdat.intpos[2] * Tp->FacIntToCoord, pdat.intpos[0] * Tp->FacIntToCoord - nop->center[0] * Tp->FacIntToCoord,
204  pdat.intpos[1] * Tp->FacIntToCoord - nop->center[1] * Tp->FacIntToCoord,
205  pdat.intpos[2] * Tp->FacIntToCoord - nop->center[2] * Tp->FacIntToCoord);
206 
207  if(overlap_check == FLAG_INSIDE && pdat.InsideOutsideFlag == FLAG_INSIDE)
208  mfp = &mf[HIGH_MESH];
209  }
210 #endif
211 
212  /* check whether we can stop walking along this branch */
213  if(dist[0] > mfp->intrcut[0] + halflen)
214  return NODE_DISCARD;
215 
216  /* check whether we can stop walking along this branch */
217  if(dist[1] > mfp->intrcut[1] + halflen)
218  return NODE_DISCARD;
219 
220  /* check whether we can stop walking along this branch */
221  if(dist[2] > mfp->intrcut[2] + halflen)
222  return NODE_DISCARD;
223  }
224 #endif
225 
226  /* converts the integer distance to floating point */
227  vector<MyReal> dxyz;
228  Tp->nearest_image_intpos_to_pos(nop->s.da, pdat.intpos, dxyz.da);
229 
230  MyReal r2 = dxyz.r2();
231 
232  MyReal mass = nop->mass;
233 
234  MyReal len = intlen * Tp->FacIntToCoord;
235  MyReal len2 = len * len;
236 
237  if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
238  {
239  if(len2 > r2 * theta2)
240  return NODE_OPEN;
241  }
242  else /* check relative opening criterion */
243  {
244 #if(MULTIPOLE_ORDER <= 2)
245  if(mass * len2 > r2 * r2 * errTolForceAcc * pdat.aold)
246  return NODE_OPEN;
247 #elif(MULTIPOLE_ORDER == 3)
248  if(square(mass * len * len2) > r2 * square(r2 * r2 * errTolForceAcc * pdat.aold))
249  return NODE_OPEN;
250 #elif(MULTIPOLE_ORDER == 4)
251  if(mass * len2 * len2 > r2 * r2 * r2 * errTolForceAcc * pdat.aold)
252  return NODE_OPEN;
253 #elif(MULTIPOLE_ORDER == 5)
254  if(square(mass * len2 * len2 * len) > r2 * square(r2 * r2 * r2 * errTolForceAcc * pdat.aold))
255  return NODE_OPEN;
256 #endif
257  // carry out an additional test to protect against pathological force errors for very large opening angles
258  if(len2 > r2 * thetamax2)
259  return NODE_OPEN;
260  }
261 
262  MyReal hmax = pdat.h_i;
263 
264 #if NSOFTCLASSES > 1
265  MyReal h_j = All.ForceSoftening[nop->maxsofttype];
266 
267  if(h_j > hmax)
268  {
269  if(r2 < h_j * h_j)
270  {
272  return NODE_OPEN;
273  }
274  hmax = h_j;
275  }
276 #endif
277 
278  /**************************/
279 
280  // now evaluate the multipole moment interaction
281 
282 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
283  if(skip_actual_force_computation)
284  return NODE_USE;
285 #endif
286 
287  MyReal r = sqrt(r2);
288  MyReal rinv = (r > 0) ? 1 / r : 0;
289 
291 
292 #ifdef PMGRID
294  {
295  if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp))
296  return NODE_DISCARD; // if we are outside the cut-off radius, we have no interaction
297  }
298 #endif
299 
300  get_gfactors_multipole(gfac, r, hmax, rinv);
301 
302 #ifdef EVALPOTENTIAL
303  MyReal g0 = gfac.fac0;
304  *pdat.pot -= mass * g0; // monopole potential
305 #endif
306 
307  MyReal g1 = gfac.fac1 * rinv;
308  *pdat.acc -= (mass * g1) * dxyz; // monopole force
309 
310 #if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
311  MyReal g2 = gfac.fac2 * gfac.rinv2;
312  vector<MyReal> Q2dxyz = nop->Q2Tensor * dxyz;
313  MyReal Q2dxyz2 = Q2dxyz * dxyz;
314  MyReal Q2trace = nop->Q2Tensor.trace();
315 #if(MULTIPOLE_ORDER >= 3)
316  MyReal g3 = gfac.fac3 * gfac.rinv3;
317  *pdat.acc -= static_cast<MyReal>(0.5) * (g2 * Q2trace + g3 * Q2dxyz2) * dxyz + g2 * Q2dxyz; // quadrupole force
318 #endif
319 #ifdef EVALPOTENTIAL
320  *pdat.pot -= static_cast<MyReal>(0.5) * (g1 * Q2trace + g2 * Q2dxyz2); // quadrupole potential
321 #endif
322 #endif
323 
324 #if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
325  symtensor3<MyDouble> &Q3 = nop->Q3Tensor;
326 
327  symtensor2<MyDouble> Q3dxyz = Q3 * dxyz;
328  vector<MyDouble> Q3dxyz2 = Q3dxyz * dxyz;
329  MyReal Q3dxyz3 = Q3dxyz2 * dxyz;
330  MyReal Q3dxyzTrace = Q3dxyz.trace();
331 
332  vector<MyDouble> Q3vec;
333  Q3vec[0] = Q3[dXXX] + Q3[dXYY] + Q3[dXZZ];
334  Q3vec[1] = Q3[dYXX] + Q3[dYYY] + Q3[dYZZ];
335  Q3vec[2] = Q3[dZXX] + Q3[dZYY] + Q3[dZZZ];
336 
337 #if(MULTIPOLE_ORDER >= 4)
338  MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
339  *pdat.acc -=
340  static_cast<MyReal>(0.5) *
341  (g2 * Q3vec + g3 * Q3dxyz2 + (static_cast<MyReal>(1.0 / 3) * g4 * Q3dxyz3 + g3 * Q3dxyzTrace) * dxyz); // octupole force
342 #endif
343 #ifdef EVALPOTENTIAL
344  *pdat.pot -= static_cast<MyReal>(1.0 / 6) * (3 * g2 * Q3dxyzTrace + g3 * Q3dxyz3); // octupole potential
345 #endif
346 #endif
347 
348 #if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
349  // now compute the hexadecupole force
350  symtensor4<MyDouble> &Q4 = nop->Q4Tensor;
351 
352  symtensor3<MyDouble> Q4dxyz = Q4 * dxyz;
353  symtensor2<MyDouble> Q4dxyz2 = Q4dxyz * dxyz;
354  vector<MyDouble> Q4dxyz3 = Q4dxyz2 * dxyz;
355  MyReal Q4dxyz4 = Q4dxyz3 * dxyz;
356  MyReal Q4dxyz2trace = Q4dxyz2.trace();
357 
359  QT[qXX] = Q4[sXXXX] + Q4[sXXYY] + Q4[sXXZZ];
360  QT[qYY] = Q4[sYYXX] + Q4[sYYYY] + Q4[sYYZZ];
361  QT[qZZ] = Q4[sZZXX] + Q4[sZZYY] + Q4[sZZZZ];
362  QT[qXY] = Q4[sXYXX] + Q4[sXYYY] + Q4[sXYZZ];
363  QT[qXZ] = Q4[sXZXX] + Q4[sXZYY] + Q4[sXZZZ];
364  QT[qYZ] = Q4[sYZXX] + Q4[sYZYY] + Q4[sYZZZ];
365  MyReal QTtrace = QT.trace();
366 
367 #if(MULTIPOLE_ORDER >= 5)
368  vector<MyDouble> QTdxyz = QT * dxyz;
369  MyReal g5 = gfac.fac5 * gfac.rinv2 * gfac.rinv3;
370  *pdat.acc -=
371  static_cast<MyReal>(1.0 / 24) * (g3 * (3 * QTtrace * dxyz + 12 * QTdxyz) + g4 * (6 * Q4dxyz2trace * dxyz + 4 * Q4dxyz3) +
372  g5 * Q4dxyz4 * dxyz); // hexadecupole force
373 #endif
374 #ifdef EVALPOTENTIAL
375  *pdat.pot -= static_cast<MyReal>(1.0 / 24) * (g2 * 3 * QTtrace + g3 * 6 * Q4dxyz2trace + g4 * Q4dxyz4); // hexadecupole potential
376 #endif
377 #endif
378 
379 #if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
380  symtensor5<MyDouble> &Q5 = nop->Q5Tensor;
381 
382  symtensor4<MyDouble> Q5dxyz = Q5 * dxyz;
383  symtensor3<MyDouble> Q5dxyz2 = Q5dxyz * dxyz;
384  symtensor2<MyDouble> Q5dxyz3 = Q5dxyz2 * dxyz;
385  vector<MyDouble> Q5dxyz4 = Q5dxyz3 * dxyz;
386  MyReal Q5dxyz5 = Q5dxyz4 * dxyz;
387 
388  MyReal Q5dxyzTtrace = Q5dxyz[sXXXX] + Q5dxyz[sYYYY] + Q5dxyz[sZZZZ] + 2 * (Q5dxyz[sXXYY] + Q5dxyz[sXXZZ] + Q5dxyz[sYYZZ]);
389 
390  // now compute the triakontadipole potential term
391  *pdat.pot -= static_cast<MyReal>(1.0 / 120) * (g3 * 15 * Q5dxyzTtrace + g4 * 10 * Q5dxyz3.trace() + g5 * Q5dxyz5);
392 #endif
393 
394  if(DoEwald)
395  {
396  // EWALD treatment, only done for periodic boundaries in case PM is not active
397 
398  ewald_data ew;
399  Ewald.ewald_gridlookup(nop->s.da, pdat.intpos, ewald::MULTIPOLES, ew);
400 
401 #ifdef EVALPOTENTIAL
402  *pdat.pot += mass * ew.D0phi;
403 #if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
404  *pdat.pot += static_cast<MyReal>(0.5) * (nop->Q2Tensor * ew.D2phi);
405 #endif
406 #if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
407  *pdat.pot += static_cast<MyReal>(1.0 / 6) * (nop->Q3Tensor * ew.D3phi);
408 #endif
409 #if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
410  *pdat.pot += static_cast<MyReal>(1.0 / 24) * (nop->Q4Tensor * ew.D4phi);
411 #endif
412 #if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
413  *pdat.pot += static_cast<MyReal>(1.0 / 120) * (nop->Q5Tensor * ew.D5phi);
414 #endif
415 #endif
416  *pdat.acc += mass * ew.D1phi;
417 #if(MULTIPOLE_ORDER >= 3)
418  *pdat.acc += static_cast<MyReal>(0.5) * (ew.D3phi * nop->Q2Tensor);
419 #endif
420 #if(MULTIPOLE_ORDER >= 4)
421  *pdat.acc += static_cast<MyReal>(1.0 / 6) * (ew.D4phi * nop->Q3Tensor);
422 #endif
423 #if(MULTIPOLE_ORDER >= 5)
424  *pdat.acc += static_cast<MyReal>(1.0 / 24) * (ew.D5phi * nop->Q4Tensor);
425 #endif
426  }
427 
428  interactioncountPN += 1;
429 
430  if(MeasureCostFlag)
431  *pdat.GravCost += 1;
432 
433  return NODE_USE;
434 }
435 
436 inline void gwalk::gwalk_open_node(const pinfo &pdat, int i, char ptype, gravnode *nop, int mintopleafnode, int committed)
437 {
438  /* open node */
439  int p = nop->nextnode;
440  unsigned char shmrank = nop->nextnode_shmrank;
441 
442  while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
443  {
444  if(p < 0)
445  Terminate(
446  "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d mass=%g "
447  "first_nontoplevelnode=%d",
448  p, nop->sibling, nop->nextnode, shmrank, nop->sibling_shmrank, nop->OriginTask, nop->mass, MaxPart + D->NTopnodes);
449 
450  int next;
451  unsigned char next_shmrank;
452  char type;
453 
454  if(p < MaxPart) /* a local particle */
455  {
456  /* note: here shmrank cannot change */
457  next = get_nextnodep(shmrank)[p];
458  next_shmrank = shmrank;
460  }
461  else if(p < MaxPart + MaxNodes) /* an internal node */
462  {
463  gravnode *nop = get_nodep(p, shmrank);
464  next = nop->sibling;
465  next_shmrank = nop->sibling_shmrank;
466  type = NODE_TYPE_LOCAL_NODE;
467  }
468  else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
469  {
470  /* note: here shmrank cannot change */
471  next = get_nextnodep(shmrank)[p - MaxNodes];
472  next_shmrank = shmrank;
474  }
475  else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
476  {
477  gravnode *nop = get_nodep(p, shmrank);
478  next = nop->sibling;
479  next_shmrank = nop->sibling_shmrank;
480  type = NODE_TYPE_FETCHED_NODE;
481  }
482  else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
483  {
484  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
485 
486  next = foreignpoint->Nextnode;
487  next_shmrank = foreignpoint->Nextnode_shmrank;
489  }
490  else
491  {
492  /* a pseudo point */
493  Terminate(
494  "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
495  "shmrank=%d",
497  }
498 
499  gravity_force_interact(pdat, i, p, ptype, type, shmrank, mintopleafnode, committed);
500 
501  p = next;
502  shmrank = next_shmrank;
503  }
504 }
505 
506 void gwalk::gravity_force_interact(const pinfo &pdat, int i, int no, char ptype, char no_type, unsigned char shmrank,
507  int mintopleafnode, int committed)
508 {
509  if(no_type <= NODE_TYPE_FETCHED_PARTICLE) // we are interacting with a particle
510  {
511  evaluate_particle_particle_interaction(pdat, no, no_type, shmrank);
512  }
513  else // we are interacting with a node
514  {
515  gravnode *nop = get_nodep(no, shmrank);
516 
517  if(nop->not_empty == 0)
518  return;
519 
520  if(no < MaxPart + MaxNodes) // we have a top-level node
521  if(nop->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
522  {
523  mintopleafnode = no;
524 
525 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
526  // if the leaf node is on this shared memory, we have all the data, so we for sure don't need to import anything on this
527  // branch
528  if(skip_actual_force_computation)
530  return;
531 #endif
532  }
533 
534  int openflag = evaluate_particle_node_opening_criterion_and_interaction(pdat, nop);
535 
536  if(openflag == NODE_OPEN) /* cell can't be used, need to open it */
537  {
538  if(nop->cannot_be_opened_locally.load(std::memory_order_acquire))
539  {
540  // are we in the same shared memory node?
542  {
543  Terminate("this should not happen any more");
544  }
545  else
546  {
547  tree_add_to_fetch_stack(nop, no, shmrank); // will only add unique copies
548 
549  tree_add_to_work_stack(i, no, shmrank, mintopleafnode);
550  }
551  }
552  else
553  {
554  int min_buffer_space =
556 
557  if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
558  gwalk_open_node(pdat, i, ptype, nop, mintopleafnode, committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
559  else
560  tree_add_to_work_stack(i, no, shmrank, mintopleafnode);
561  }
562  }
563  }
564 }
565 
566 /*
567 
568 */
569 
600 void gwalk::gravity_tree(int timebin)
601 {
602  interactioncountPP = 0;
603  interactioncountPN = 0;
604 
605  TIMER_STORE;
606  TIMER_START(CPU_TREE);
607 
608  D->mpi_printf("GRAVTREE: Begin tree force. timebin=%d (presently allocated=%g MB)\n", timebin, Mem.getAllocatedBytesInMB());
609 
610 #ifdef PMGRID
611  set_mesh_factors();
612 #endif
613 
614  TIMER_START(CPU_TREESTACK);
615 
616  // Create list of targets (the work queue). There are initially two possible sources of points, local ones, and imported ones.
617 
618  NumOnWorkStack = 0;
622 
623  WorkStack = (workstack_data *)Mem.mymalloc("WorkStack", AllocWorkStackBaseHigh * sizeof(workstack_data));
624  ResultIndexList = (int *)Mem.mymalloc("ResultIndexList", NumPartImported * sizeof(int));
625 
626  for(int i = 0; i < Tp->TimeBinsGravity.NActiveParticles; i++)
627  {
628  int target = Tp->TimeBinsGravity.ActiveParticleList[i];
629 
630  // if we have exported particles, we need to explicitly check whether this particle is among them
631  if(NumPartExported > 0)
632  {
633  MyIntPosType xxb = Tp->P[target].IntPos[0];
634  MyIntPosType yyb = Tp->P[target].IntPos[1];
635  MyIntPosType zzb = Tp->P[target].IntPos[2];
636  MyIntPosType mask = (((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1));
637  unsigned char shiftx = (BITS_FOR_POSITIONS - 3);
638  unsigned char shifty = (BITS_FOR_POSITIONS - 2);
639  unsigned char shiftz = (BITS_FOR_POSITIONS - 1);
640  unsigned char level = 0;
641  unsigned char rotation = 0;
642 
643  int no = 0;
644  while(D->TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */
645  {
646  unsigned char pix = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
647  ((unsigned char)((zzb & mask) >> (shiftz--))));
648  unsigned char subnode = peano_incremental_key(pix, &rotation);
649  mask >>= 1;
650  level++;
651  no = D->TopNodes[no].Daughter + subnode;
652  }
653 
654  no = D->TopNodes[no].Leaf;
655  int task = D->TaskOfLeaf[no];
656 
657  if(task == D->ThisTask)
658  {
659  WorkStack[NumOnWorkStack].Target = target;
662  WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
663  NumOnWorkStack++;
664  }
665  }
666  else
667  {
668  WorkStack[NumOnWorkStack].Target = target;
671  WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
672  NumOnWorkStack++;
673  }
674 
675  /* let's do a safety check here to protect against accidental use of zero softening lengths */
676  int softtype = Tp->P[target].getSofteningClass();
677  if(All.ForceSoftening[softtype] == 0)
678  Terminate("Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
679  (long long)Tp->P[target].ID.get(), Tp->P[target].getType(), softtype);
680  }
681 
682  int ncount = 0;
683 
684  for(int i = 0; i < NumPartImported; i++)
685  {
686 #ifndef HIERARCHICAL_GRAVITY
687  if(Points[i].ActiveFlag)
688 #endif
689  {
690  ResultIndexList[i] = ncount++;
691 
695  WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
696  NumOnWorkStack++;
697  }
698  }
699 
700 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
701  workstack_data *WorkStackBak = (workstack_data *)Mem.mymalloc("WorkStackBak", NumOnWorkStack * sizeof(workstack_data));
702  int NumOnWorkStackBak = NumOnWorkStack;
703  memcpy(WorkStackBak, WorkStack, NumOnWorkStack * sizeof(workstack_data));
704 #endif
705 
707  (resultsactiveimported_data *)Mem.mymalloc_clear("ResultsActiveImported", ncount * sizeof(resultsactiveimported_data));
708 
709  /******************************************/
710  /* now execute the tree walk calculations */
711  /******************************************/
712 
713  theta2 = All.ErrTolTheta * All.ErrTolTheta;
714  thetamax2 = All.ErrTolThetaMax * All.ErrTolThetaMax;
715  errTolForceAcc = All.ErrTolForceAcc;
716 
719 
720  // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
722  StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
723 
724  // let's grab at most half the still available memory for imported points and nodes
725  int nspace = (0.5 * Mem.FreeBytes) / (sizeof(gravnode) + 8 * sizeof(foreign_gravpoint_data));
726 
727  MaxForeignNodes = nspace;
728  MaxForeignPoints = 8 * nspace;
729  NumForeignNodes = 0;
730  NumForeignPoints = 0;
731 
732  /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
733  Foreign_Nodes = (gravnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(gravnode));
734  Foreign_Points = (foreign_gravpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
736 
738 
739  TIMER_STOP(CPU_TREESTACK);
740 
741  double t0 = Logs.second();
742  int max_ncycles = 0;
743 
745 
746 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
747  for(int rep = 0; rep < 2; rep++)
748  {
749  if(rep == 0)
750  {
751  skip_actual_force_computation = true;
752  }
753  else
754  {
755  skip_actual_force_computation = false;
756  NumOnWorkStack = NumOnWorkStackBak;
757  memcpy(WorkStack, WorkStackBak, NumOnWorkStack * sizeof(workstack_data));
758  }
759 #endif
760 
761  while(NumOnWorkStack > 0) // repeat until we are out of work
762  {
763  NewOnWorkStack = 0; // gives the new entries
764  NumOnFetchStack = 0;
766 
767  TIMER_START(CPU_TREEWALK);
768 
769  int item = 0;
770 
771  while(item < NumOnWorkStack)
772  {
773  int committed = 8 * TREE_NUM_BEFORE_NODESPLIT;
774  int min_buffer_space =
776  if(min_buffer_space >= committed)
777  {
778  int target = WorkStack[item].Target;
779  int no = WorkStack[item].Node;
780  int shmrank = WorkStack[item].ShmRank;
781  int mintopleaf = WorkStack[item].MinTopLeafNode;
782  item++;
783 
784  pinfo pdat;
785  int ptype = get_pinfo(target, pdat);
786 
787  if(no == MaxPart)
788  {
789  // we have a pristine particle that's processed for the first time
790  gravity_force_interact(pdat, target, no, ptype, NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
791  }
792  else
793  {
794  // we have a node that we previously could not open
795  gravnode *nop = get_nodep(no, shmrank);
796 
797  if(nop->cannot_be_opened_locally)
798  {
799  Terminate("item=%d: no=%d now we should be able to open it!", item, no);
800  }
801  else
802  gwalk_open_node(pdat, target, ptype, nop, mintopleaf, committed);
803  }
804  }
805  else
806  break;
807  }
808 
809  if(item == 0 && NumOnWorkStack > 0)
810  Terminate("Can't even process a single particle");
811 
812  TIMER_STOP(CPU_TREEWALK);
813 
814  TIMER_START(CPU_TREEFETCH);
815 
817 
818  TIMER_STOP(CPU_TREEFETCH);
819 
820  TIMER_START(CPU_TREESTACK);
821 
822  /* now reorder the workstack such that we are first going to do residual pristine particles, and then
823  * imported nodes that hang below the first leaf nodes */
825  memmove(WorkStack, WorkStack + item, NumOnWorkStack * sizeof(workstack_data));
826 
827  /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
829 
830  TIMER_STOP(CPU_TREESTACK);
831 
832  max_ncycles++;
833  }
834 
835 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
836  }
837 #endif
838 
839  TIMER_START(CPU_TREEIMBALANCE);
840 
841  MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
842 
843  TIMER_STOP(CPU_TREEIMBALANCE);
844 
846 
847  /* free temporary buffers */
848 
849  Mem.myfree(Foreign_Points);
850  Mem.myfree(Foreign_Nodes);
851  Mem.myfree(StackToFetch);
852 
853  double t1 = Logs.second();
854 
855  D->mpi_printf("GRAVTREE: tree-forces are calculated, with %d cycles took %g sec\n", max_ncycles, Logs.timediff(t0, t1));
856 
857  /* now communicate the forces in ResultsActiveImported */
859 
860  Mem.myfree(ResultsActiveImported);
861 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
862  Mem.myfree(WorkStackBak);
863 #endif
864  Mem.myfree(ResultIndexList);
865  Mem.myfree(WorkStack);
866 
867  TIMER_STOP(CPU_TREE);
868 
869  D->mpi_printf("GRAVTREE: tree-force is done.\n");
870 
871  /* gather some diagnostic information */
872 
873  TIMER_START(CPU_LOGS);
874 
875  struct detailed_timings
876  {
877  double tree, wait, fetch, stack, all, lastpm;
878  double costtotal, numnodes;
879  double interactioncountPP, interactioncountPN;
881  double fillfacFgnNodes, fillfacFgnPoints;
882  };
883  detailed_timings timer, tisum, timax;
884 
885  timer.tree = TIMER_DIFF(CPU_TREEWALK);
886  timer.wait = TIMER_DIFF(CPU_TREEIMBALANCE);
887  timer.fetch = TIMER_DIFF(CPU_TREEFETCH);
888  timer.stack = TIMER_DIFF(CPU_TREESTACK);
889  timer.all = timer.tree + timer.wait + timer.fetch + timer.stack + TIMER_DIFF(CPU_TREE);
890  timer.lastpm = All.CPUForLastPMExecution;
891  timer.costtotal = interactioncountPP + interactioncountPN;
892  timer.numnodes = NumNodes;
893  timer.interactioncountPP = interactioncountPP;
894  timer.interactioncountPN = interactioncountPN;
895  timer.NumForeignNodes = NumForeignNodes;
896  timer.NumForeignPoints = NumForeignPoints;
897  timer.fillfacFgnNodes = NumForeignNodes / ((double)MaxForeignNodes);
898  timer.fillfacFgnPoints = NumForeignPoints / ((double)MaxForeignPoints);
899 
900  MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
901  D->Communicator);
902  MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
903  D->Communicator);
904 
906 
907  if(D->ThisTask == 0)
908  {
909  fprintf(Logs.FdTimings, "Nf=%9lld timebin=%d total-Nf=%lld\n", Tp->TimeBinsGravity.GlobalNActiveParticles, timebin,
911  fprintf(Logs.FdTimings, " work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g)\n",
912  timax.tree / ((tisum.tree + 1e-20) / D->NTask), Tp->TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
913  Tp->TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * D->NTask),
914  tisum.costtotal / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
915  tisum.interactioncountPP / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
916  tisum.interactioncountPN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20));
917  fprintf(Logs.FdTimings,
918  " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
919  "fill=%g cycles=%d\n",
920  timax.numnodes, timax.numnodes / MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes / D->NTask,
921  timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints / D->NTask, timax.fillfacFgnPoints, max_ncycles);
922  fprintf(Logs.FdTimings,
923  " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
924  "(lastpm=%g) sec\n",
925  tisum.all / D->NTask, tisum.tree / D->NTask, tisum.wait / D->NTask, tisum.fetch / D->NTask, tisum.stack / D->NTask,
926  tisum.lastpm / D->NTask);
927  fprintf(Logs.FdTimings, " total interaction cost: %g (imbalance=%g)\n", tisum.costtotal,
928  timax.costtotal / (tisum.costtotal / D->NTask));
930  }
931 
932  TIMER_STOP(CPU_LOGS);
933 }
934 
935 /* make sure that we instantiate the template */
936 #include "../data/simparticles.h"
937 template class gravtree<simparticles>;
global_data_all_processes All
Definition: main.cc:40
MyIDType get(void) const
Definition: idstorage.h:87
int NTopnodes
Definition: domain.h:52
int * TaskOfLeaf
Definition: domain.h:63
topnode_data * TopNodes
Definition: domain.h:74
@ POINTMASS
Definition: ewald.h:67
@ MULTIPOLES
Definition: ewald.h:68
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:304
void gravity_exchange_forces(void)
Definition: gravtree.cc:134
mesh_factors mf[2]
Definition: gravtree.h:271
resultsactiveimported_data * ResultsActiveImported
Definition: gravtree.h:293
void get_gfactors_multipole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:405
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:525
void gravity_tree(int timebin)
This function computes the gravitational forces for all active particles.
Definition: gwalk.cc:600
void nearest_image_intpos_to_absolute_intdist(const MyIntPosType *a, const MyIntPosType *b, MyIntPosType *delta)
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MyReal FacIntToCoord
Definition: intposconvert.h:37
double timediff(double t0, double t1)
Definition: logs.cc:488
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:68
size_t FreeBytes
Definition: mymalloc.h:48
void mpi_printf(const char *fmt,...)
Definition: setcomm.h:55
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
MPI_Comm Communicator
Definition: setcomm.h:31
int * GetNodeIDForSimulCommRank
int Island_ThisTask
TimeBinData TimeBinsGravity
Definition: simparticles.h:205
particle_data * P
Definition: simparticles.h:54
T trace(void)
Definition: symtensors.h:285
foreign_gravpoint_data * get_foreignpointsp(int n, unsigned char shmrank)
Definition: tree.h:275
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
Definition: tree.h:197
gravpoint_data * get_pointsp(int no, unsigned char shmrank)
Definition: tree.h:376
void tree_add_to_fetch_stack(gravnode *nop, int nodetoopen, unsigned char shmrank)
Definition: tree.h:207
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
Definition: tree.h:232
T r2(void)
Definition: symtensors.h:187
T da[3]
Definition: symtensors.h:106
#define FLAG_BOUNDARYOVERLAP
Definition: constants.h:31
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
double MyReal
Definition: dtypes.h:82
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_USE
Definition: gravtree.h:35
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define NODE_TYPE_TREEPOINT_PARTICLE
Definition: gravtree.h:30
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define TREE_ACTIVE_CUTTOFF_BASE_PM
Definition: gravtree.h:23
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
Definition: logs.h:257
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
Definition: logs.h:250
#define Terminate(...)
Definition: macros.h:19
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
unsigned char peano_incremental_key(unsigned char pix, unsigned char *rotation)
Definition: peano.cc:107
int NActiveParticles
Definition: timestep.h:18
int * ActiveParticleList
Definition: timestep.h:20
long long GlobalNActiveParticles
Definition: timestep.h:19
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
vector< MyReal > D1phi
Definition: gravtree.h:186
symtensor3< MyReal > D3phi
Definition: gravtree.h:188
MyReal D0phi
Definition: gravtree.h:185
symtensor2< MyReal > D2phi
Definition: gravtree.h:187
unsigned char getSofteningClass(void)
Definition: gravtree.h:143
unsigned char Nextnode_shmrank
Definition: gravtree.h:133
MyIntPosType IntPos[3]
Definition: gravtree.h:130
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
long long TotNumOfForces
Definition: allvars.h:103
MyDouble mass
Definition: gravtree.h:65
vector< MyIntPosType > s
Definition: gravtree.h:66
unsigned char maxsofttype
Definition: gravtree.h:83
unsigned char minsofttype
Definition: gravtree.h:84
unsigned char getSofteningClass(void)
Definition: gravtree.h:118
MyDouble Mass
Definition: gravtree.h:103
MyIntPosType IntPos[3]
Definition: gravtree.h:102
MyDouble getMass(void)
unsigned char getSofteningClass(void)
MyIDStorage ID
Definition: particle_data.h:70
unsigned char getType(void)
MyIntPosType IntPos[3]
Definition: particle_data.h:53
#define sXXYY
#define sXYXX
#define sZZZZ
#define dYZZ
#define sXXXX
#define sZZXX
#define sYYZZ
#define dXYY
#define sZZYY
#define qXX
#define dZZZ
#define dYYY
#define sXZZZ
#define qYY
#define qYZ
#define sYZXX
#define sYYYY
#define sXYYY
#define dXXX
#define sXYZZ
#define qZZ
#define sXZYY
#define dYXX
#define dZYY
#define sYZZZ
#define sXXZZ
#define sYYXX
#define qXY
#define dZXX
#define sXZXX
#define sYZYY
#define dXZZ
#define qXZ
void myflush(FILE *fstream)
Definition: system.cc:125
T square(T const value)
Definition: system.h:36
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16