GADGET-4
fmm.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef FMM
15 
16 #include <math.h>
17 #include <mpi.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "../data/allvars.h"
23 #include "../data/dtypes.h"
24 #include "../data/intposconvert.h"
25 #include "../data/mymalloc.h"
26 #include "../domain/domain.h"
27 #include "../fmm/fmm.h"
28 #include "../gravity/ewald.h"
29 #include "../gravtree/gravtree.h"
30 #include "../logs/logs.h"
31 #include "../logs/timer.h"
32 #include "../main/simulation.h"
33 #include "../mpi_utils/mpi_utils.h"
34 #include "../mpi_utils/shared_mem_handler.h"
35 #include "../pm/pm.h"
36 #include "../sort/cxxsort.h"
37 #include "../system/system.h"
38 #include "../time_integration/timestep.h"
39 
40 void fmm::fmm_force_passdown(int no, unsigned char no_shmrank, taylor_data taylor_current)
41 {
42  if(no >= MaxPart && no < MaxPart + MaxNodes) /* an internal node */
43  {
44  /* first, let's add the external field expansion to the local one accumulated for this node */
45 #ifdef EVALPOTENTIAL
46  taylor_current.coeff.phi += TaylorCoeff[no].coeff.phi;
47 #endif
48  taylor_current.coeff.dphi += TaylorCoeff[no].coeff.dphi;
49 #if(MULTIPOLE_ORDER >= 2)
50  taylor_current.coeff.d2phi += TaylorCoeff[no].coeff.d2phi;
51 #endif
52 #if(MULTIPOLE_ORDER >= 3)
53  taylor_current.coeff.d3phi += TaylorCoeff[no].coeff.d3phi;
54 #endif
55 #if(MULTIPOLE_ORDER >= 4)
56  taylor_current.coeff.d4phi += TaylorCoeff[no].coeff.d4phi;
57 #endif
58 #if(MULTIPOLE_ORDER >= 5)
59  taylor_current.coeff.d5phi += TaylorCoeff[no].coeff.d5phi;
60 #endif
61 
62  taylor_current.coeff.interactions += TaylorCoeff[no].coeff.interactions;
63  }
64  else
65  Terminate("this is not an internal node, which should not happen\n");
66 
67  gravnode *node_no = get_nodep(no, no_shmrank);
68 
69  int p = node_no->nextnode; /* open cell */
70 
71  unsigned char shmrank = node_no->nextnode_shmrank;
72 
73  while(p != node_no->sibling || (shmrank != node_no->sibling_shmrank && node_no->sibling >= MaxPart + D->NTopnodes))
74  {
75  if(p < MaxPart || (p >= ImportedNodeOffset && p < EndOfTreePoints)) /* we have found a single particle */
76  {
77  if(shmrank != Shmem.Island_ThisTask)
78  Terminate("odd");
79 
80  int m, mp;
81  MyIntPosType *intpos;
82 
83  if(p >= ImportedNodeOffset) /* an imported Treepoint particle */
84  {
85  m = p - ImportedNodeOffset;
86  intpos = Points[m].IntPos;
87  mp = -1;
88  p = get_nextnodep(shmrank)[p - MaxNodes];
89  }
90  else
91  {
92  m = p;
93  intpos = Tp->P[p].IntPos;
94  mp = p;
95  p = get_nextnodep(shmrank)[p];
96  }
97 
98  /* apply expansion to particle */
99 
100  vector<MyReal> dxyz;
101  Tp->nearest_image_intpos_to_pos(intpos, node_no->s.da, dxyz.da);
102 
103 #ifdef EVALPOTENTIAL
104  MyReal pot = taylor_current.coeff.phi + taylor_current.coeff.dphi * dxyz;
105 #endif
106  vector<MyReal> dphi = taylor_current.coeff.dphi;
107 
108 #if(MULTIPOLE_ORDER >= 2)
109  vector<MyReal> d2phidxyz = taylor_current.coeff.d2phi * dxyz;
110  dphi += d2phidxyz;
111 #ifdef EVALPOTENTIAL
112  pot += 0.5f * (d2phidxyz * dxyz);
113 #endif
114 #endif
115 #if(MULTIPOLE_ORDER >= 3)
116  vector<MyReal> d3phidxyz2 = contract_twice(taylor_current.coeff.d3phi, dxyz);
117  dphi += 0.5f * d3phidxyz2;
118 #ifdef EVALPOTENTIAL
119  pot += static_cast<MyReal>(1.0 / 6) * (dxyz * d3phidxyz2);
120 #endif
121 #endif
122 #if(MULTIPOLE_ORDER >= 4)
123  vector<MyReal> d4phidxyz3 = contract_thrice(taylor_current.coeff.d4phi, dxyz);
124  dphi += static_cast<MyReal>(1.0 / 6) * d4phidxyz3;
125 #ifdef EVALPOTENTIAL
126  pot += static_cast<MyReal>(1.0 / 24) * (dxyz * d4phidxyz3);
127 #endif
128 #endif
129 #if(MULTIPOLE_ORDER >= 5)
130  vector<MyReal> d5phidxyz4 = contract_fourtimes(taylor_current.coeff.d5phi, dxyz);
131  dphi += static_cast<MyReal>(1.0 / 24) * d5phidxyz4;
132 #ifdef EVALPOTENTIAL
133  pot += static_cast<MyReal>(1.0 / 120) * (dxyz * d5phidxyz4);
134 #endif
135 #endif
136 
137  if(mp >= 0)
138  {
139 #ifndef HIERARCHICAL_GRAVITY
140  if(Tp->TimeBinSynchronized[Tp->P[mp].TimeBinGrav])
141 #endif
142  {
143  Tp->P[mp].GravAccel -= dphi;
144 #ifdef EVALPOTENTIAL
145  Tp->P[mp].Potential += pot;
146 #endif
147 
148  if(MeasureCostFlag)
149  Tp->P[mp].GravCost += taylor_current.coeff.interactions;
150 
151  interactioncountEffective += taylor_current.coeff.interactions;
152  }
153  }
154  else
155  {
156 #ifndef HIERARCHICAL_GRAVITY
157  if(Points[m].ActiveFlag)
158 #endif
159  {
160  int idx = ResultIndexList[m];
161  ResultsActiveImported[idx].GravAccel -= dphi;
162 #ifdef EVALPOTENTIAL
163  ResultsActiveImported[idx].Potential += pot;
164 #endif
165  if(MeasureCostFlag)
166  ResultsActiveImported[idx].GravCost += taylor_current.coeff.interactions;
167 
168  interactioncountEffective += taylor_current.coeff.interactions;
169  }
170  }
171  }
172  else if(p < MaxPart + MaxNodes) /* an internal node */
173  {
174  gravnode *node_p = get_nodep(p, shmrank);
175 
176  if(fmm_depends_on_local_mass(p, shmrank))
177  {
178  taylor_data taylor_sub = taylor_current;
179 
180  vector<MyReal> dxyz;
181  Tp->nearest_image_intpos_to_pos(node_p->s.da, node_no->s.da, dxyz.da);
182 
183  /* now shift the expansion center */
184 
185 #ifdef EVALPOTENTIAL
186  taylor_sub.coeff.phi += taylor_current.coeff.dphi * dxyz;
187 #endif
188 
189 #if(MULTIPOLE_ORDER >= 2)
190  vector<MyReal> delta_dphi = taylor_current.coeff.d2phi * dxyz;
191  taylor_sub.coeff.dphi += delta_dphi;
192 #ifdef EVALPOTENTIAL
193  taylor_sub.coeff.phi += 0.5f * (delta_dphi * dxyz);
194 #endif
195 #endif
196 #if(MULTIPOLE_ORDER >= 3)
197  symtensor2<MyReal> delta_d2phi = taylor_current.coeff.d3phi * dxyz;
198 
199  taylor_sub.coeff.d2phi += delta_d2phi;
200 
201  delta_dphi = delta_d2phi * dxyz;
202 
203  taylor_sub.coeff.dphi += 0.5f * delta_dphi;
204 #ifdef EVALPOTENTIAL
205  taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 6) * (delta_dphi * dxyz);
206 #endif
207 #endif
208 #if(MULTIPOLE_ORDER >= 4)
209  symtensor3<MyReal> delta_d3phi = taylor_current.coeff.d4phi * dxyz;
210  taylor_sub.coeff.d3phi += delta_d3phi;
211 
212  delta_d2phi = delta_d3phi * dxyz;
213  taylor_sub.coeff.d2phi += 0.5f * delta_d2phi;
214 
215  delta_dphi = delta_d2phi * dxyz;
216  taylor_sub.coeff.dphi += static_cast<MyReal>(1.0 / 6) * delta_dphi;
217 #ifdef EVALPOTENTIAL
218  taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 24) * (delta_dphi * dxyz);
219 #endif
220 #endif
221 #if(MULTIPOLE_ORDER >= 5)
222  symtensor4<MyReal> delta_d4phi = taylor_current.coeff.d5phi * dxyz;
223  taylor_sub.coeff.d4phi += delta_d4phi;
224 
225  delta_d3phi = delta_d4phi * dxyz;
226  taylor_sub.coeff.d3phi += 0.5f * delta_d3phi;
227 
228  delta_d2phi = delta_d3phi * dxyz;
229  taylor_sub.coeff.d2phi += static_cast<MyReal>(1.0 / 6) * delta_d2phi;
230 
231  delta_dphi = delta_d2phi * dxyz;
232  taylor_sub.coeff.dphi += static_cast<MyReal>(1.0 / 24) * delta_dphi;
233 #ifdef EVALPOTENTIAL
234  taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 120) * (delta_dphi * dxyz);
235 #endif
236 #endif
237  fmm_force_passdown(p, shmrank, taylor_sub);
238  }
239 
240  p = node_p->sibling;
241  shmrank = node_p->sibling_shmrank;
242  }
243  else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
244  {
245  Terminate("A");
246  gravnode *nop = get_nodep(p, shmrank);
247  p = nop->sibling;
248  shmrank = nop->sibling_shmrank;
249  }
250  else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
251  {
252  Terminate("B");
253  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
254  p = foreignpoint->Nextnode;
255  shmrank = foreignpoint->Nextnode_shmrank;
256  }
257  else
258  {
259  /* a pseudo point */
260  Terminate(
261  "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
262  "shmrank=%d",
263  p, MaxPart, MaxNodes, ImportedNodeOffset, EndOfTreePoints, EndOfForeignNodes, shmrank);
264  }
265  }
266 }
267 
268 inline void fmm::fmm_open_both(gravnode *noptr_sink, gravnode *noptr_source, int mintopleafnode, int committed)
269 {
270  int self_flag = 0;
271  if(noptr_sink == noptr_source)
272  self_flag = 1;
273 
274  /* open node */
275  int p_sink = noptr_sink->nextnode;
276  unsigned char shmrank_sink = noptr_sink->nextnode_shmrank;
277 
278  while(p_sink != noptr_sink->sibling ||
279  (shmrank_sink != noptr_sink->sibling_shmrank && noptr_sink->sibling >= MaxPart + D->NTopnodes))
280  {
281  int next_sink;
282  unsigned char next_shmrank_sink;
283  char type_sink;
284 
285  if(p_sink < MaxPart) /* a local particle */
286  {
287  /* note: here shmrank cannot change */
288  next_sink = get_nextnodep(shmrank_sink)[p_sink];
289  next_shmrank_sink = shmrank_sink;
290  type_sink = NODE_TYPE_LOCAL_PARTICLE;
291  }
292  else if(p_sink < MaxPart + MaxNodes) /* an internal node */
293  {
294  gravnode *nop = get_nodep(p_sink, shmrank_sink);
295  next_sink = nop->sibling;
296  next_shmrank_sink = nop->sibling_shmrank;
297  type_sink = NODE_TYPE_LOCAL_NODE;
298  }
299  else if(p_sink >= ImportedNodeOffset && p_sink < EndOfTreePoints) /* an imported Treepoint particle */
300  {
301  /* note: here shmrank cannot change */
302  next_sink = get_nextnodep(shmrank_sink)[p_sink - MaxNodes];
303  next_shmrank_sink = shmrank_sink;
304  type_sink = NODE_TYPE_TREEPOINT_PARTICLE;
305  }
306  else if(p_sink >= EndOfTreePoints && p_sink < EndOfForeignNodes) /* an imported tree node */
307  {
308  gravnode *nop = get_nodep(p_sink, shmrank_sink);
309  next_sink = nop->sibling;
310  next_shmrank_sink = nop->sibling_shmrank;
311  type_sink = NODE_TYPE_FETCHED_NODE;
312  }
313  else if(p_sink >= EndOfForeignNodes)
314  {
315  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p_sink - EndOfForeignNodes, shmrank_sink);
316  next_sink = foreignpoint->Nextnode;
317  next_shmrank_sink = foreignpoint->Nextnode_shmrank;
318  type_sink = NODE_TYPE_FETCHED_PARTICLE;
319  }
320  else
321  {
322  /* a pseudo point */
323  next_sink = 0;
324  type_sink = 0;
325  Terminate("pseudo particle - should not happen");
326  }
327 
328  int p_source = noptr_source->nextnode; /* open cell */
329  unsigned char shmrank_source = noptr_source->nextnode_shmrank;
330 
331  while(p_source != noptr_source->sibling ||
332  (shmrank_source != noptr_source->sibling_shmrank && noptr_source->sibling >= MaxPart + D->NTopnodes))
333  {
334  int next_source;
335  unsigned char next_shmrank_source;
336  char type_source;
337 
338  if(p_source < MaxPart) /* a local particle */
339  {
340  /* note: here shmrank cannot change */
341  next_source = get_nextnodep(shmrank_source)[p_source];
342  next_shmrank_source = shmrank_source;
343  type_source = NODE_TYPE_LOCAL_PARTICLE;
344  }
345  else if(p_source < MaxPart + MaxNodes) /* an internal node */
346  {
347  gravnode *nop = get_nodep(p_source, shmrank_source);
348  next_source = nop->sibling;
349  next_shmrank_source = nop->sibling_shmrank;
350  type_source = NODE_TYPE_LOCAL_NODE;
351  }
352  else if(p_source >= ImportedNodeOffset && p_source < EndOfTreePoints) /* an imported Treepoint particle */
353  {
354  /* note: here shmrank cannot change */
355  next_source = get_nextnodep(shmrank_source)[p_source - MaxNodes];
356  next_shmrank_source = shmrank_source;
357  type_source = NODE_TYPE_TREEPOINT_PARTICLE;
358  }
359  else if(p_source >= EndOfTreePoints && p_source < EndOfForeignNodes) /* an imported tree node */
360  {
361  gravnode *nop = get_nodep(p_source, shmrank_source);
362  next_source = nop->sibling;
363  next_shmrank_source = nop->sibling_shmrank;
364  type_source = NODE_TYPE_FETCHED_NODE;
365  }
366  else if(p_source >= EndOfForeignNodes)
367  {
368  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p_source - EndOfForeignNodes, shmrank_source);
369  next_source = foreignpoint->Nextnode;
370  next_shmrank_source = foreignpoint->Nextnode_shmrank;
371  type_source = NODE_TYPE_FETCHED_PARTICLE;
372  }
373  else
374  {
375  /* a pseudo point */
376  next_source = 0;
377  type_source = 0;
378  Terminate("pseudo particle - should not happen");
379  }
380 
381  if(self_flag == 0 || p_source >= p_sink)
382  {
383  if(type_sink >= NODE_TYPE_LOCAL_NODE && type_source <= NODE_TYPE_FETCHED_PARTICLE)
384  {
385  /* in this case we have node-particle interaction, which we swap into a particle-node interaction */
386  fmm_force_interact(p_source, p_sink, type_source, type_sink, shmrank_source, shmrank_sink, mintopleafnode,
387  committed);
388  }
389  else
390  {
391  fmm_force_interact(p_sink, p_source, type_sink, type_source, shmrank_sink, shmrank_source, mintopleafnode,
392  committed);
393  }
394  }
395 
396  p_source = next_source;
397  shmrank_source = next_shmrank_source;
398  }
399 
400  p_sink = next_sink;
401  shmrank_sink = next_shmrank_sink;
402  }
403 }
404 
405 inline void fmm::fmm_open_node(int no_particle, gravnode *nop, char type_particle, unsigned char shmrank_particle, int mintopleafnode,
406  int committed)
407 {
408  int p = nop->nextnode;
409  unsigned char shmrank = nop->nextnode_shmrank;
410 
411  while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
412  {
413  if(p < 0)
414  Terminate("p=%d < 0", p);
415 
416  int next;
417  unsigned char next_shmrank;
418  char type;
419 
420  if(p < MaxPart) /* a local particle */
421  {
422  /* note: here shmrank cannot change */
423  next = get_nextnodep(shmrank)[p];
424  next_shmrank = shmrank;
426  }
427  else if(p < MaxPart + MaxNodes) /* an internal node */
428  {
429  gravnode *nop = get_nodep(p, shmrank);
430  next = nop->sibling;
431  next_shmrank = nop->sibling_shmrank;
432  type = NODE_TYPE_LOCAL_NODE;
433  }
434  else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
435  {
436  /* note: here shmrank cannot change */
437  next = get_nextnodep(shmrank)[p - MaxNodes];
438  next_shmrank = shmrank;
440  }
441  else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
442  {
443  gravnode *nop = get_nodep(p, shmrank);
444  next = nop->sibling;
445  next_shmrank = nop->sibling_shmrank;
446  type = NODE_TYPE_FETCHED_NODE;
447  }
448  else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
449  {
450  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
451 
452  next = foreignpoint->Nextnode;
453  next_shmrank = foreignpoint->Nextnode_shmrank;
455  }
456  else
457  {
458  /* a pseudo point */
459  Terminate(
460  "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
461  "shmrank=%d",
462  p, MaxPart, MaxNodes, ImportedNodeOffset, EndOfTreePoints, EndOfForeignNodes, shmrank);
463  }
464 
465  fmm_force_interact(no_particle, p, type_particle, type, shmrank_particle, shmrank, mintopleafnode, committed);
466 
467  p = next;
468  shmrank = next_shmrank;
469  }
470 }
471 
472 inline void fmm::fmm_particle_particle_interaction(int no_sink, int no_source, int type_sink, int type_source,
473  unsigned char shmrank_sink, unsigned char shmrank_source)
474 {
475 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
476  if(skip_actual_force_computation)
477  return;
478 #endif
479 
480  MyIntPosType *intpos_n, *intpos_m;
481  MyReal mass_n, mass_m;
482  MyReal h_n, h_m;
483 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
484  int test_n, test_m;
485 #endif
486 
487  /* in one of the following three cases we have a single particle on the sink side */
488  if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
489  {
490  particle_data *P = get_Pp(no_sink, shmrank_sink);
491 
492  intpos_n = P->IntPos;
493  mass_n = P->getMass();
495 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
496  test_n = P->InsideOutsideFlag;
497 #endif
498  }
499  else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
500  {
501  gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
502 
503  intpos_n = Pointp->IntPos;
504  mass_n = Pointp->Mass;
505  h_n = All.ForceSoftening[Pointp->getSofteningClass()];
506 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
507  test_n = Pointp->InsideOutsideFlag;
508 #endif
509  }
510  else /* a point that was fetched */
511  {
512  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
513 
514  intpos_n = foreignpoint->IntPos;
515  mass_n = foreignpoint->Mass;
516  h_n = All.ForceSoftening[foreignpoint->getSofteningClass()];
517 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
518  test_n = foreignpoint->InsideOutsideFlag;
519 #endif
520  }
521 
522  /* in one of the following three cases we have a single particle on the source side */
523  if(type_source == NODE_TYPE_LOCAL_PARTICLE)
524  {
525  particle_data *P = get_Pp(no_source, shmrank_source);
526 
527  intpos_m = P->IntPos;
528  mass_m = P->getMass();
530 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
531  test_m = P->InsideOutsideFlag;
532 #endif
533  }
534  else if(type_source == NODE_TYPE_TREEPOINT_PARTICLE)
535  {
536  gravpoint_data *Pointp = get_pointsp(no_source - ImportedNodeOffset, shmrank_source);
537 
538  intpos_m = Pointp->IntPos;
539  mass_m = Pointp->Mass;
540  h_m = All.ForceSoftening[Pointp->getSofteningClass()];
541 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
542  test_m = Pointp->InsideOutsideFlag;
543 #endif
544  }
545  else
546  {
547  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_source - EndOfForeignNodes, shmrank_source);
548 
549  intpos_m = foreignpoint->IntPos;
550  mass_m = foreignpoint->Mass;
551  h_m = All.ForceSoftening[foreignpoint->getSofteningClass()];
552 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
553  test_m = foreignpoint->InsideOutsideFlag;
554 #endif
555  }
556 
557  MyReal h_max = (h_m > h_n) ? h_m : h_n;
558 
559  vector<MyReal> dxyz;
560  Tp->nearest_image_intpos_to_pos(intpos_m, intpos_n, dxyz.da); /* converts the integer distance to floating point */
561 
562  MyReal r2 = dxyz.r2();
563  MyReal r = sqrt(r2);
564  MyReal rinv = (r > 0) ? 1 / r : 0;
565 
566  gfactors gfac;
567 
568 #ifdef PMGRID
569  if(DoPM)
570  {
571  mesh_factors *mfp = &mf[LOW_MESH];
572 
573 #ifdef PLACEHIGHRESREGION
574  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
575  {
576  if(test_m == FLAG_INSIDE && test_n == FLAG_INSIDE)
577  mfp = &mf[HIGH_MESH];
578  }
579 #endif
580  if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
581  return;
582  }
583 #endif
584 
585  get_gfactors_monopole(gfac, r, h_max, rinv);
586 
587 #ifdef EVALPOTENTIAL
588  MyReal D0 = gfac.fac0;
589 #endif
590  vector<MyReal> D1 = gfac.fac1 * rinv * dxyz;
591 
592  if(DoEwald)
593  {
594  ewald_data ew;
595  Ewald.ewald_gridlookup(intpos_m, intpos_n, ewald::POINTMASS, ew);
596 
597  D1 -= ew.D1phi;
598 
599 #ifdef EVALPOTENTIAL
600  D0 -= ew.D0phi;
601 #endif
602  }
603 
604  if(shmrank_sink == Shmem.Island_ThisTask)
605  {
606  if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
607  {
608 #ifndef HIERARCHICAL_GRAVITY
609  if(Tp->TimeBinSynchronized[Tp->P[no_sink].TimeBinGrav])
610 #endif
611  {
612  Tp->P[no_sink].GravAccel -= mass_m * D1;
613 #ifdef EVALPOTENTIAL
614  Tp->P[no_sink].Potential -= mass_m * D0;
615 #endif
616  if(MeasureCostFlag)
617  Tp->P[no_sink].GravCost++;
618 
619  interactioncountPP += 1;
620  }
621  }
622  else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
623  {
624 #ifndef HIERARCHICAL_GRAVITY
625  if(Points[no_sink - ImportedNodeOffset].ActiveFlag)
626 #endif
627  {
628  int idx = ResultIndexList[no_sink - ImportedNodeOffset];
629  ResultsActiveImported[idx].GravAccel -= mass_m * D1;
630 #ifdef EVALPOTENTIAL
631  ResultsActiveImported[idx].Potential -= mass_m * D0;
632 #endif
633  if(MeasureCostFlag)
634  ResultsActiveImported[idx].GravCost++;
635 
636  interactioncountPP += 1;
637  }
638  }
639  }
640 
641  if(shmrank_source == Shmem.Island_ThisTask)
642  {
643  if(type_source == NODE_TYPE_LOCAL_PARTICLE)
644  {
645 #ifndef HIERARCHICAL_GRAVITY
646  if(Tp->TimeBinSynchronized[Tp->P[no_source].TimeBinGrav])
647 #endif
648  {
649  Tp->P[no_source].GravAccel += mass_n * D1;
650 #ifdef EVALPOTENTIAL
651  Tp->P[no_source].Potential -= mass_n * D0;
652 #endif
653 
654  if(MeasureCostFlag)
655  Tp->P[no_source].GravCost++;
656 
657  interactioncountPP += 1;
658  }
659  }
660  else if(type_source == NODE_TYPE_TREEPOINT_PARTICLE)
661  {
662 #ifndef HIERARCHICAL_GRAVITY
663  if(Points[no_source - ImportedNodeOffset].ActiveFlag)
664 #endif
665  {
666  int idx = ResultIndexList[no_source - ImportedNodeOffset];
667  ResultsActiveImported[idx].GravAccel += mass_n * D1;
668 #ifdef EVALPOTENTIAL
669  ResultsActiveImported[idx].Potential -= mass_n * D0;
670 #endif
671  if(MeasureCostFlag)
672  ResultsActiveImported[idx].GravCost++;
673 
674  interactioncountPP += 1;
675  }
676  }
677  }
678 }
679 
680 inline void fmm::fmm_particle_node_interaction(int no_sink, int no_source, int type_sink, int type_source, unsigned char shmrank_sink,
681  unsigned char shmrank_source, gravnode *noptr_source, vector<MyReal> &dxyz, MyReal &r2)
682 {
683 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
684  if(skip_actual_force_computation)
685  return;
686 #endif
687 
688  /* 'sink' is a particle
689  * 'source' node is a node with multipole moments.
690  * 'dxyz' is the distance vector, pointing from sink to source, i.e. dxyz = pos(source) - pos(sink)
691  */
692 
693  MyReal mass_i, h_i;
694 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
695  int test_point;
696 #endif
697 
698  MyIntPosType *intpos_i;
699 
700  if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
701  {
702  particle_data *P = get_Pp(no_sink, shmrank_sink);
703 
704  intpos_i = P->IntPos;
705  mass_i = P->getMass();
707 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
708  test_point = P->InsideOutsideFlag;
709 #endif
710  }
711  else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
712  {
713  gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
714 
715  intpos_i = Pointp->IntPos;
716  mass_i = Pointp->Mass;
717  h_i = All.ForceSoftening[Pointp->getSofteningClass()];
718 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
719  test_point = Pointp->InsideOutsideFlag;
720 #endif
721  }
722  else /* a point that was fetched */
723  {
724  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
725 
726  intpos_i = foreignpoint->IntPos;
727  mass_i = foreignpoint->Mass;
728  h_i = All.ForceSoftening[foreignpoint->getSofteningClass()];
729 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
730  test_point = foreignpoint->InsideOutsideFlag;
731 #endif
732  }
733 
734  MyReal h_j = All.ForceSoftening[noptr_source->getSofteningClass()];
735  MyReal h_max = (h_j > h_i) ? h_j : h_i;
736 
737  /* do cell-particle interaction, node can be used */
738  MyReal r = sqrt(r2);
739 
740  MyReal rinv = (r > 0) ? 1 / r : 0;
741 
742  gfactors gfac;
743 
744 #ifdef PMGRID
745  if(DoPM)
746  {
747  mesh_factors *mfp = &mf[LOW_MESH];
748 
749 #ifdef PLACEHIGHRESREGION
750  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
751  {
752  int test_node = noptr_source->overlap_flag;
753  if(test_node == FLAG_INSIDE && test_point == FLAG_INSIDE)
754  mfp = &mf[HIGH_MESH];
755  }
756 #endif
757 
758  if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
759  return;
760  }
761 #endif
762 
763  get_gfactors_multipole(gfac, r, h_max, rinv);
764 
765 #ifdef EVALPOTENTIAL
766  MyReal g0 = gfac.fac0;
767  MyReal D0 = g0;
768 #endif
769 
770  MyReal g1 = gfac.fac1 * rinv;
771  vector<MyReal> D1 = g1 * dxyz;
772 
773 #if(MULTIPOLE_ORDER >= 2)
774  MyReal g2 = gfac.fac2 * gfac.rinv2;
775  symtensor2<MyReal> aux2 = dxyz % dxyz; // construct outer product of the two vectors
776  symtensor2<MyReal> D2 = g2 * aux2;
777  D2[qXX] += g1;
778  D2[qYY] += g1;
779  D2[qZZ] += g1;
780 #endif
781 #if(MULTIPOLE_ORDER >= 3)
782  MyReal g3 = gfac.fac3 * gfac.rinv3;
784  symtensor3<MyReal> aux3;
785  setup_D3(INIT, D3, dxyz, aux2, aux3, g2, g3);
786 #endif
787 
788 #if(MULTIPOLE_ORDER >= 4)
789  MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
791  symtensor4<MyReal> aux4;
792  setup_D4(INIT, D4, dxyz, aux2, aux3, aux4, g2, g3, g4);
793 #endif
794 
795 #if(MULTIPOLE_ORDER >= 5)
796  MyReal g5 = gfac.fac5 * gfac.rinv3 * gfac.rinv2;
798  symtensor5<MyReal> aux5;
799  setup_D5(INIT, D5, dxyz, aux3, aux4, aux5, g3, g4, g5);
800 #endif
801 
802  if(DoEwald)
803  {
804  ewald_data ew;
805  Ewald.ewald_gridlookup(noptr_source->s.da, intpos_i, ewald::MULTIPOLES, ew);
806 
807 #ifdef EVALPOTENTIAL
808  D0 -= ew.D0phi;
809 #endif
810  D1 -= ew.D1phi;
811 #if(MULTIPOLE_ORDER >= 2)
812  D2 -= ew.D2phi;
813 #endif
814 #if(MULTIPOLE_ORDER >= 3)
815  D3 -= ew.D3phi;
816 #endif
817 #if(MULTIPOLE_ORDER >= 4)
818  D4 -= ew.D4phi;
819 #endif
820 #if(MULTIPOLE_ORDER >= 5)
821  D5 -= ew.D5phi;
822 #endif
823  }
824 
825  /* finally store the force on the particle */
826  if(shmrank_sink == Shmem.Island_ThisTask)
827  if(type_sink == NODE_TYPE_LOCAL_PARTICLE || type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
828  {
829  MyReal mass_j = noptr_source->mass;
830 
831 #if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
832  symtensor2<MyDouble> &Q2_j = noptr_source->Q2Tensor;
833 #endif
834 #if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
835  symtensor3<MyDouble> &Q3_j = noptr_source->Q3Tensor;
836 #endif
837 #if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
838  symtensor4<MyDouble> &Q4_j = noptr_source->Q4Tensor;
839 #endif
840 #if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
841  symtensor5<MyDouble> &Q5_j = noptr_source->Q5Tensor;
842 #endif
843 
844 #ifdef EVALPOTENTIAL
845  MyReal pot = -mass_j * D0;
846 #if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
847  pot -= 0.5f * (D2 * Q2_j);
848 #endif
849 #if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
850  pot -= static_cast<MyReal>(1.0 / 6) * (D3 * Q3_j);
851 #endif
852 #if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
853  pot -= static_cast<MyReal>(1.0 / 24) * (D4 * Q4_j);
854 #endif
855 #if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
856 
857  pot -= static_cast<MyReal>(1.0 / 120) * (D5 * Q5_j);
858 #endif
859 #endif
860 
861  vector<MyReal> dphi = mass_j * D1;
862 
863 #if(MULTIPOLE_ORDER >= 3)
864  dphi += static_cast<MyReal>(0.5) * (D3 * Q2_j);
865 #endif
866 #if(MULTIPOLE_ORDER >= 4)
867  dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_j);
868 #endif
869 #if(MULTIPOLE_ORDER >= 5)
870  dphi += static_cast<MyReal>(1.0 / 24) * (D5 * Q4_j);
871 #endif
872 
873  if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
874  {
875 #ifndef HIERARCHICAL_GRAVITY
876  if(Tp->TimeBinSynchronized[Tp->P[no_sink].TimeBinGrav])
877 #endif
878  {
879  Tp->P[no_sink].GravAccel -= dphi;
880 #ifdef EVALPOTENTIAL
881  Tp->P[no_sink].Potential += pot;
882 #endif
883  if(MeasureCostFlag)
884  Tp->P[no_sink].GravCost++;
885 
886  interactioncountPN += 1;
887  interactioncountEffective += 1;
888  }
889  }
890  else
891  {
892 #ifndef HIERARCHICAL_GRAVITY
893  if(Points[no_sink - ImportedNodeOffset].ActiveFlag)
894 #endif
895  {
896  int idx = ResultIndexList[no_sink - ImportedNodeOffset];
897  ResultsActiveImported[idx].GravAccel -= dphi;
898 #ifdef EVALPOTENTIAL
899  ResultsActiveImported[idx].Potential += pot;
900 #endif
901  if(MeasureCostFlag)
902  ResultsActiveImported[idx].GravCost++;
903 
904  interactioncountPN += 1;
905  interactioncountEffective += 1;
906  }
907  }
908  }
909 
910  if(fmm_depends_on_local_mass(no_source, shmrank_source))
911  if(type_source == NODE_TYPE_LOCAL_NODE)
912  {
913  /* mediating field expansion of point particle on source node */
914 #ifdef EVALPOTENTIAL
915  TaylorCoeff[no_source].coeff.phi += (-mass_i) * D0;
916 #endif
917  TaylorCoeff[no_source].coeff.dphi += (-mass_i) * D1;
918 #if(MULTIPOLE_ORDER >= 2)
919  TaylorCoeff[no_source].coeff.d2phi += (-mass_i) * D2;
920 #endif
921 #if(MULTIPOLE_ORDER >= 3)
922  TaylorCoeff[no_source].coeff.d3phi += (-mass_i) * D3;
923 #endif
924 #if(MULTIPOLE_ORDER >= 4)
925  TaylorCoeff[no_source].coeff.d4phi += (-mass_i) * D4;
926 #endif
927 #if(MULTIPOLE_ORDER >= 5)
928  TaylorCoeff[no_source].coeff.d5phi += (-mass_i) * D5;
929 #endif
930  TaylorCoeff[no_source].coeff.interactions += 1;
931 
932  interactioncountPN += 1;
933  }
934 }
935 
936 inline void fmm::fmm_node_node_interaction(int no_sink, int no_source, int type_sink, int type_source, unsigned char shmrank_sink,
937  unsigned char shmrank_source, gravnode *noptr_sink, gravnode *noptr_source,
938  vector<MyReal> &dxyz, MyReal &r2)
939 {
940 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
941  if(skip_actual_force_computation)
942  return;
943 #endif
944 
945  /* 'sink' is a node with multipole moments
946  * 'source' node is a node with multipole moments
947  * 'dxyz' is the distance vector, pointing from sink to source, i.e. dxyz = pos(source) - pos(sink)
948  */
949 
950  /* now do the node-node interaction */
951  MyReal r = sqrt(r2);
952 
953 #if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
954  symtensor2<MyDouble> &Q2_m = noptr_source->Q2Tensor;
955  symtensor2<MyDouble> &Q2_n = noptr_sink->Q2Tensor;
956 #endif
957 #if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
958  symtensor3<MyDouble> &Q3_m = noptr_source->Q3Tensor;
959  symtensor3<MyDouble> &Q3_n = noptr_sink->Q3Tensor;
960 #endif
961 #if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
962  symtensor4<MyDouble> &Q4_m = noptr_source->Q4Tensor;
963  symtensor4<MyDouble> &Q4_n = noptr_sink->Q4Tensor;
964 #endif
965 #if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL)
966  symtensor5<MyDouble> &Q5_m = noptr_source->Q5Tensor;
967  symtensor5<MyDouble> &Q5_n = noptr_sink->Q5Tensor;
968 #endif
969 
970  MyReal mass_m = noptr_source->mass;
971  MyReal mass_n = noptr_sink->mass;
972 
973  MyReal rinv = (r > 0) ? 1 / r : 0;
974 
975  MyReal h_n = All.ForceSoftening[noptr_sink->getSofteningClass()];
976  MyReal h_m = All.ForceSoftening[noptr_source->getSofteningClass()];
977  MyReal h_max = (h_m > h_n) ? h_m : h_n;
978 
979  gfactors gfac;
980 
981 #ifdef PMGRID
982  if(DoPM)
983  {
984  mesh_factors *mfp = &mf[LOW_MESH];
985 
986 #ifdef PLACEHIGHRESREGION
987  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
988  {
989  if(noptr_source->overlap_flag == FLAG_INSIDE && noptr_sink->overlap_flag == FLAG_INSIDE)
990  mfp = &mf[HIGH_MESH];
991  }
992 #endif
993 
994  if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
995  return;
996  }
997 #endif
998 
999  get_gfactors_multipole(gfac, r, h_max, rinv);
1000 
1001 #ifdef EVALPOTENTIAL
1002  MyReal g0 = gfac.fac0;
1003  MyReal D0 = g0;
1004 #endif
1005 
1006  MyReal g1 = gfac.fac1 * rinv;
1007  vector<MyReal> D1 = g1 * dxyz;
1008 
1009 #if(MULTIPOLE_ORDER >= 2)
1010  MyReal g2 = gfac.fac2 * gfac.rinv2;
1011  symtensor2<MyReal> aux2 = dxyz % dxyz; // construct outer product of the two vectors
1012  symtensor2<MyReal> D2 = g2 * aux2;
1013  D2[qXX] += g1;
1014  D2[qYY] += g1;
1015  D2[qZZ] += g1;
1016 #endif
1017 
1018 #if(MULTIPOLE_ORDER >= 3)
1019  MyReal g3 = gfac.fac3 * gfac.rinv3;
1020  symtensor3<MyReal> D3;
1021  symtensor3<MyReal> aux3;
1022  setup_D3(INIT, D3, dxyz, aux2, aux3, g2, g3);
1023 #endif
1024 
1025 #if(MULTIPOLE_ORDER >= 4)
1026  MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
1027  symtensor4<MyReal> D4;
1028  symtensor4<MyReal> aux4;
1029  setup_D4(INIT, D4, dxyz, aux2, aux3, aux4, g2, g3, g4);
1030 #endif
1031 
1032 #if(MULTIPOLE_ORDER >= 5)
1033  MyReal g5 = gfac.fac5 * gfac.rinv3 * gfac.rinv2;
1034  symtensor5<MyReal> D5;
1035  symtensor5<MyReal> aux5;
1036  setup_D5(INIT, D5, dxyz, aux3, aux4, aux5, g3, g4, g5);
1037 #endif
1038 
1039  if(DoEwald)
1040  {
1041  ewald_data ew;
1042  Ewald.ewald_gridlookup(noptr_source->s.da, noptr_sink->s.da, ewald::MULTIPOLES, ew);
1043 
1044 #ifdef EVALPOTENTIAL
1045  D0 -= ew.D0phi;
1046 #endif
1047  D1 -= ew.D1phi;
1048 #if(MULTIPOLE_ORDER >= 2)
1049  D2 -= ew.D2phi;
1050 #endif
1051 #if(MULTIPOLE_ORDER >= 3)
1052  D3 -= ew.D3phi;
1053 #endif
1054 #if(MULTIPOLE_ORDER >= 4)
1055  D4 -= ew.D4phi;
1056 #endif
1057 #if(MULTIPOLE_ORDER >= 5)
1058  D5 -= ew.D5phi;
1059 #endif
1060  }
1061 
1062  if(fmm_depends_on_local_mass(no_sink, shmrank_sink))
1063  if(type_sink == NODE_TYPE_LOCAL_NODE)
1064  {
1065 #ifdef EVALPOTENTIAL
1066  TaylorCoeff[no_sink].coeff.phi += (-mass_m) * D0;
1067 #if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
1068  TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-0.5) * (D2 * Q2_m);
1069 #endif
1070 #if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
1071  TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 6) * (D3 * Q3_m);
1072 #endif
1073 #if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
1074  TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 24) * (D4 * Q4_m);
1075 #endif
1076 #if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
1077  TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 120) * (D5 * Q5_m);
1078 #endif
1079 #endif
1080 
1081  TaylorCoeff[no_sink].coeff.dphi += mass_m * D1;
1082 #if(MULTIPOLE_ORDER >= 2)
1083  TaylorCoeff[no_sink].coeff.d2phi += (-mass_m) * D2;
1084 #endif
1085 #if(MULTIPOLE_ORDER >= 3)
1086  TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(0.5) * (D3 * Q2_m);
1087  TaylorCoeff[no_sink].coeff.d3phi += mass_m * D3;
1088 #endif
1089 #if(MULTIPOLE_ORDER >= 4)
1090  TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_m);
1091  TaylorCoeff[no_sink].coeff.d2phi += static_cast<MyReal>(-0.5) * (D4 * Q2_m);
1092  TaylorCoeff[no_sink].coeff.d4phi += (-mass_m) * D4;
1093 #endif
1094 #if(MULTIPOLE_ORDER >= 5)
1095  TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(1.0 / 24) * (D5 * Q4_m);
1096  TaylorCoeff[no_sink].coeff.d2phi += static_cast<MyReal>(-1.0 / 6) * (D5 * Q3_m);
1097  TaylorCoeff[no_sink].coeff.d3phi += static_cast<MyReal>(0.5) * (D5 * Q2_m);
1098  TaylorCoeff[no_sink].coeff.d5phi += mass_m * D5;
1099 #endif
1100 
1101  TaylorCoeff[no_sink].coeff.interactions += 1;
1102  interactioncountNN += 1;
1103  }
1104 
1105  if(fmm_depends_on_local_mass(no_source, shmrank_source))
1106  if(type_source == NODE_TYPE_LOCAL_NODE)
1107  {
1108 #ifdef EVALPOTENTIAL
1109  TaylorCoeff[no_source].coeff.phi += (-mass_n) * D0;
1110 #if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
1111  TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(-0.5) * (D2 * Q2_n);
1112 #endif
1113 #if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
1114  TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(1.0 / 6) * (D3 * Q3_n);
1115 #endif
1116 #if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
1117  TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(-1.0 / 24) * (D4 * Q4_n);
1118 #endif
1119 #if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
1120  TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(1.0 / 120) * (D5 * Q5_n);
1121 #endif
1122 #endif
1123 
1124  TaylorCoeff[no_source].coeff.dphi += (-mass_n) * D1;
1125 #if(MULTIPOLE_ORDER >= 2)
1126  TaylorCoeff[no_source].coeff.d2phi += (-mass_n) * D2;
1127 #endif
1128 #if(MULTIPOLE_ORDER >= 3)
1129  TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(-0.5) * (D3 * Q2_n);
1130  TaylorCoeff[no_source].coeff.d3phi += (-mass_n) * D3;
1131 #endif
1132 #if(MULTIPOLE_ORDER >= 4)
1133  TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_n);
1134  TaylorCoeff[no_source].coeff.d2phi += static_cast<MyReal>(-0.5) * (D4 * Q2_n);
1135  TaylorCoeff[no_source].coeff.d4phi += (-mass_n) * D4;
1136 #endif
1137 #if(MULTIPOLE_ORDER >= 5)
1138  TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(-1.0 / 24) * (D5 * Q4_n);
1139  TaylorCoeff[no_source].coeff.d2phi += static_cast<MyReal>(1.0 / 6) * (D5 * Q3_n);
1140  TaylorCoeff[no_source].coeff.d3phi += static_cast<MyReal>(-0.5) * (D5 * Q2_n);
1141  TaylorCoeff[no_source].coeff.d5phi += (-mass_n) * D5;
1142 #endif
1143 
1144  TaylorCoeff[no_source].coeff.interactions += 1;
1145  interactioncountNN += 1;
1146  }
1147 }
1148 
1149 inline int fmm::fmm_evaluate_node_node_opening_criterion(gravnode *noptr_sink, gravnode *noptr_source, vector<MyReal> &dxyz,
1150  MyReal &r2)
1151 {
1152  if(noptr_source->level != noptr_sink->level)
1153  Terminate("This shouldn't happen: noptr_level=%d noptr_sink->level=%d ", noptr_source->level, noptr_sink->level);
1154 
1155  if(noptr_source->level <=
1156  1) // always open the root node, and the next level (note: full node length does not fit in the integer type)
1157  return NODE_OPEN;
1158 
1159  /* Note: we will always have noptr_sink->len == noptr_source->len in our algorithm! */
1160 
1161  MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - noptr_sink->level);
1162  MyIntPosType intlen = halflen << 1;
1163 
1164 #ifndef TREE_NO_SAFETY_BOX
1165  // We always open adjacent nodes to protect against worst-case force errors
1166  MyIntPosType twolens = intlen + (intlen - 1);
1167  MyIntPosType dist[3];
1168  Tp->nearest_image_intpos_to_absolute_intdist(noptr_source->center.da, noptr_sink->center.da, dist);
1169 
1170  if(dist[0] < twolens && dist[1] < twolens && dist[2] < twolens)
1171  return NODE_OPEN;
1172 #endif
1173 
1174  /* converts the integer distance of the centers of mass to floating point */
1175  Tp->nearest_image_intpos_to_pos(noptr_source->s.da, noptr_sink->s.da, dxyz.da);
1176 
1177  r2 = dxyz.r2();
1178 
1179 #ifdef PMGRID
1180  mesh_factors *mfp = &mf[LOW_MESH];
1181 
1182 #ifdef PLACEHIGHRESREGION
1183  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
1184  {
1185  int test_source = noptr_source->overlap_flag;
1186  int test_sink = noptr_sink->overlap_flag;
1187 
1188  if((test_source == FLAG_BOUNDARYOVERLAP && test_sink != FLAG_OUTSIDE) ||
1189  (test_sink == FLAG_BOUNDARYOVERLAP && test_source != FLAG_OUTSIDE))
1190  {
1191  Terminate("this shouldn't happen any more");
1192  return NODE_OPEN;
1193  }
1194  else
1195  {
1196  if(test_source == FLAG_INSIDE && test_sink == FLAG_INSIDE)
1197  mfp = &mf[HIGH_MESH];
1198  }
1199  }
1200 #endif
1201 
1202  if(DoPM && r2 > mfp->rcut2 && noptr_sink->level > 1)
1203  {
1204  /* check whether we can ignore any interactions along this branch */
1205  MyIntPosType dist_x = noptr_source->center[0] - noptr_sink->center[0];
1206  dist_x = (((MySignedIntPosType)dist_x) >= 0) ? dist_x : -dist_x;
1207  if(dist_x > mfp->intrcut[0] + intlen)
1208  return NODE_DISCARD;
1209 
1210  MyIntPosType dist_y = noptr_source->center[1] - noptr_sink->center[1];
1211  dist_y = (((MySignedIntPosType)dist_y) >= 0) ? dist_y : -dist_y;
1212  if(dist_y > mfp->intrcut[1] + intlen)
1213  return NODE_DISCARD;
1214 
1215  MyIntPosType dist_z = noptr_source->center[2] - noptr_sink->center[2];
1216  dist_z = (((MySignedIntPosType)dist_z) >= 0) ? dist_z : -dist_z;
1217  if(dist_z > mfp->intrcut[2] + intlen)
1218  return NODE_DISCARD;
1219  }
1220 #endif
1221 
1222  /* evaluate generalized opening criterion */
1223 
1224  MyReal len = intlen * Tp->FacIntToCoord;
1225  MyReal len2 = len * len;
1226 
1227  if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
1228  {
1229  if(4 * len2 > r2 * errTolTheta2)
1230  return NODE_OPEN;
1231  }
1232  else
1233  {
1234  if(4 * len2 > r2 * errTolThetaMax2)
1235  return NODE_OPEN;
1236 
1237  MyReal mmax = (noptr_source->mass < noptr_sink->mass) ? noptr_sink->mass : noptr_source->mass;
1238  MyReal amin =
1239  errTolForceAcc * ((noptr_sink->MinOldAcc < noptr_source->MinOldAcc) ? noptr_sink->MinOldAcc : noptr_source->MinOldAcc);
1240 #if(MULTIPOLE_ORDER == 1)
1241  if(mmax > r2 * amin)
1242  return NODE_OPEN;
1243 #elif(MULTIPOLE_ORDER == 2)
1244  if(square(mmax * len) > r2 * square(r2 * amin))
1245  return NODE_OPEN;
1246 #elif(MULTIPOLE_ORDER == 3)
1247  if(mmax * len2 > r2 * r2 * amin)
1248  return NODE_OPEN;
1249 #elif(MULTIPOLE_ORDER == 4)
1250  if(square(mmax * len * len2) > r2 * square(r2 * r2 * amin))
1251  return NODE_OPEN;
1252 #elif(MULTIPOLE_ORDER == 5)
1253  if(mmax * len2 * len2 > r2 * r2 * r2 * amin)
1254  return NODE_OPEN;
1255 #endif
1256  }
1257 
1258 #if NSOFTCLASSES > 1
1259  MyReal h_m = All.ForceSoftening[noptr_sink->getSofteningClass()];
1260  MyReal h_n = All.ForceSoftening[noptr_source->getSofteningClass()];
1261  MyReal h_max = (h_m > h_n) ? h_m : h_n;
1262 
1263  if(r2 < h_max * h_max)
1264  {
1265  if(All.ForceSoftening[noptr_source->minsofttype] < All.ForceSoftening[noptr_source->maxsofttype] &&
1266  h_max > All.ForceSoftening[noptr_sink->minsofttype])
1267  return NODE_OPEN;
1268  else if(All.ForceSoftening[noptr_sink->minsofttype] < All.ForceSoftening[noptr_sink->maxsofttype] &&
1269  h_max > All.ForceSoftening[noptr_source->minsofttype])
1270  return NODE_OPEN;
1271  }
1272 #endif
1273 
1274  return NODE_USE;
1275 }
1276 
1277 inline int fmm::fmm_evaluate_particle_node_opening_criterion(int no_sink, char type_sink, unsigned char shmrank_sink,
1278  gravnode *nop_source, vector<MyReal> &dxyz, MyReal &r2)
1279 {
1280  MyIntPosType *intpos_n;
1281  MyReal mass_n;
1282  MyReal aold;
1283 #if NSOFTCLASSES > 1
1284  MyReal h_n;
1285 #endif
1286 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1287  int test_point;
1288 #endif
1289 
1290  if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
1291  {
1292  particle_data *P = get_Pp(no_sink, shmrank_sink);
1293 
1294  intpos_n = P->IntPos;
1295  mass_n = P->getMass();
1296  aold = P->OldAcc;
1297 #if NSOFTCLASSES > 1
1298  h_n = All.ForceSoftening[P->getSofteningClass()];
1299 #endif
1300 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1301  test_point = P->InsideOutsideFlag;
1302 #endif
1303  }
1304  else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
1305  {
1306  gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
1307 
1308  intpos_n = Pointp->IntPos;
1309  mass_n = Pointp->Mass;
1310  aold = Pointp->OldAcc;
1311 #if NSOFTCLASSES > 1
1312  h_n = All.ForceSoftening[Pointp->getSofteningClass()];
1313 #endif
1314 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1315  test_point = Pointp->InsideOutsideFlag;
1316 #endif
1317  }
1318  else /* a point that was fetched */
1319  {
1320  foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
1321 
1322  intpos_n = foreignpoint->IntPos;
1323  mass_n = foreignpoint->Mass;
1324  aold = foreignpoint->OldAcc;
1325 #if NSOFTCLASSES > 1
1326  h_n = All.ForceSoftening[foreignpoint->getSofteningClass()];
1327 #endif
1328 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1329  test_point = foreignpoint->InsideOutsideFlag;
1330 #endif
1331  }
1332 
1333  if(nop_source->level == 0) // always open the root node (note: full node length does not fit in the integer type)
1334  return NODE_OPEN;
1335 
1336  MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop_source->level);
1337  MyIntPosType intlen = halflen << 1;
1338 
1339 #ifndef TREE_NO_SAFETY_BOX
1340  MyIntPosType dist[3];
1341  Tp->nearest_image_intpos_to_absolute_intdist(nop_source->center.da, intpos_n, dist);
1342  // if we are close to the node, and therefore open it to protect against worst-case force errors
1343  if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
1344  return NODE_OPEN;
1345 #endif
1346 
1347  /* check a variant of the classic opening criterion */
1348  Tp->nearest_image_intpos_to_pos(nop_source->s.da, intpos_n, dxyz.da); /* converts the integer distance to floating point */
1349 
1350  r2 = dxyz.r2();
1351 
1352 #ifdef PMGRID
1353  mesh_factors *mfp = &mf[LOW_MESH];
1354 
1355 #ifdef PLACEHIGHRESREGION
1356  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
1357  {
1358  int test_node = nop_source->overlap_flag;
1359 
1360  if(test_point == FLAG_INSIDE && test_node == FLAG_BOUNDARYOVERLAP)
1361  {
1362  Terminate("this shouldn't happen any more");
1363  return NODE_OPEN;
1364  }
1365  else
1366  {
1367  if(test_node == FLAG_INSIDE && test_point == FLAG_INSIDE)
1368  mfp = &mf[HIGH_MESH];
1369  }
1370  }
1371 #endif
1372 
1373  if(DoPM && r2 > mfp->rcut2)
1374  {
1375  /* check whether we can ignore any interactions along this branch */
1376  MyIntPosType dist_x = nop_source->center[0] - intpos_n[0];
1377  dist_x = (((MySignedIntPosType)dist_x) >= 0) ? dist_x : -dist_x;
1378  if(dist_x > mfp->intrcut[0] + halflen)
1379  return NODE_DISCARD;
1380 
1381  MyIntPosType dist_y = nop_source->center[1] - intpos_n[1];
1382  dist_y = (((MySignedIntPosType)dist_y) >= 0) ? dist_y : -dist_y;
1383  if(dist_y > mfp->intrcut[1] + halflen)
1384  return NODE_DISCARD;
1385 
1386  MyIntPosType dist_z = nop_source->center[2] - intpos_n[2];
1387  dist_z = (((MySignedIntPosType)dist_z) >= 0) ? dist_z : -dist_z;
1388  if(dist_z > mfp->intrcut[2] + halflen)
1389  return NODE_DISCARD;
1390  }
1391 #endif
1392 
1393  MyReal len = intlen * Tp->FacIntToCoord;
1394  MyReal len2 = len * len;
1395 
1396  if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
1397  {
1398  if(4 * len2 > r2 * errTolTheta2)
1399  return NODE_OPEN;
1400  }
1401  else /* check relative opening criterion */
1402  {
1403  if(4 * len2 > r2 * errTolThetaMax2)
1404  return NODE_OPEN;
1405 
1406  MyReal mmax = (nop_source->mass < mass_n) ? mass_n : nop_source->mass;
1407  MyReal amin = errTolForceAcc * ((aold < nop_source->MinOldAcc) ? aold : nop_source->MinOldAcc);
1408 
1409 #if(MULTIPOLE_ORDER == 1)
1410  if(mmax > r2 * amin)
1411  return NODE_OPEN;
1412 #elif(MULTIPOLE_ORDER == 2)
1413  if(square(mmax * len) > r2 * square(r2 * amin))
1414  return NODE_OPEN;
1415 #elif(MULTIPOLE_ORDER == 3)
1416  if(mmax * len2 > r2 * r2 * amin)
1417  return NODE_OPEN;
1418 #elif(MULTIPOLE_ORDER == 4)
1419  if(square(mmax * len * len2) > r2 * square(r2 * r2 * amin))
1420  return NODE_OPEN;
1421 #elif(MULTIPOLE_ORDER == 5)
1422  if(mmax * len2 * len2 > r2 * r2 * r2 * amin)
1423  return NODE_OPEN;
1424 #endif
1425  }
1426 
1427 #if NSOFTCLASSES > 1
1428  MyReal h_m = All.ForceSoftening[nop_source->getSofteningClass()];
1429 
1430  if(h_m > h_n)
1431  {
1432  if(r2 < h_m * h_m)
1433  if(All.ForceSoftening[nop_source->minsofttype] < All.ForceSoftening[nop_source->maxsofttype])
1434  {
1435  return NODE_OPEN;
1436  }
1437  }
1438 #endif
1439 
1440  return NODE_USE;
1441 }
1442 
1443 /* function to account for interaction of two nodes in the tree */
1444 void fmm::fmm_force_interact(int no_sink, int no_source, char type_sink, char type_source, unsigned char shmrank_sink,
1445  unsigned char shmrank_source, int mintopleafnode, int committed)
1446 {
1447  if(type_sink <= NODE_TYPE_FETCHED_PARTICLE && type_source <= NODE_TYPE_FETCHED_PARTICLE) /* particle-particle interaction */
1448  {
1449  /* nothing to be done, or if we do not deal with at least one local particle */
1450  if(type_sink == NODE_TYPE_FETCHED_PARTICLE && type_source == NODE_TYPE_FETCHED_PARTICLE)
1451  return;
1452 
1453  if(no_sink != no_source || shmrank_source != shmrank_sink) // exclude self-interaction
1454  fmm_particle_particle_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source);
1455  }
1456  else if(!(type_sink > NODE_TYPE_FETCHED_PARTICLE && type_source > NODE_TYPE_FETCHED_PARTICLE)) /* cell-particle interaction */
1457  {
1458  /* we have arranged it such that the particle is always on the sink side, the node on the source side */
1459 
1460  gravnode *noptr_source = get_nodep(no_source, shmrank_source);
1461 
1462  /* noting to be done if we do not deal with any local mass */
1463  if(fmm_depends_on_local_mass(no_source, shmrank_source) == false &&
1464  (type_sink == NODE_TYPE_FETCHED_PARTICLE ||
1465  (type_sink < NODE_TYPE_FETCHED_PARTICLE && shmrank_sink != Shmem.Island_ThisTask)))
1466  return;
1467 
1468  if(noptr_source->not_empty == 0)
1469  return;
1470 
1471  if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1472  if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1473  mintopleafnode = no_source;
1474 
1475  MyReal r2;
1476  vector<MyReal> dxyz;
1477 
1478  int openflag = fmm_evaluate_particle_node_opening_criterion(no_sink, type_sink, shmrank_sink, noptr_source, dxyz, r2);
1479 
1480  if(openflag == NODE_USE)
1481  {
1482  fmm_particle_node_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source, noptr_source, dxyz,
1483  r2);
1484  }
1485  else if(openflag == NODE_OPEN) /* open cell in a cell-particle interaction */
1486  {
1487  if(noptr_source->cannot_be_opened_locally)
1488  {
1489  // are we in the same shared memory node?
1490  if(Shmem.GetNodeIDForSimulCommRank[noptr_source->OriginTask] == Shmem.GetNodeIDForSimulCommRank[D->ThisTask])
1491  {
1492  Terminate("this should not happen any more");
1493  }
1494  else
1495  {
1496  tree_add_to_fetch_stack(noptr_source, no_source, shmrank_source);
1497 
1498  fmm_add_to_work_stack(no_sink, no_source, shmrank_sink, shmrank_source, mintopleafnode);
1499  }
1500  }
1501  else
1502  {
1503  int min_buffer_space =
1504  std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1505 
1506  if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
1507  fmm_open_node(no_sink, noptr_source, type_sink, shmrank_sink, mintopleafnode,
1508  committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
1509  else
1510  fmm_add_to_work_stack(no_sink, no_source, shmrank_sink, shmrank_source, mintopleafnode);
1511  }
1512  }
1513  }
1514  else /* cell - cell interaction */
1515  {
1516  gravnode *noptr_sink = get_nodep(no_sink, shmrank_sink);
1517  gravnode *noptr_source = get_nodep(no_source, shmrank_source);
1518 
1519  /* at least one of the cells needs to depend on local particles */
1520  if(fmm_depends_on_local_mass(no_sink, shmrank_sink) || fmm_depends_on_local_mass(no_source, shmrank_source))
1521  {
1522  /* both cells need to be non-empty */
1523  if(noptr_sink->not_empty != 0 && noptr_source->not_empty != 0)
1524  {
1525  if(noptr_sink == noptr_source) /* self-interaction */
1526  {
1527  if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1528  if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1529  mintopleafnode = no_source;
1530 
1531  if(noptr_sink->cannot_be_opened_locally)
1532  {
1533  Terminate("should not happen because we have a self-interaction of a supposedly local node");
1534  }
1535  else
1536  {
1537  int min_buffer_space =
1538  std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1539 
1540  if(min_buffer_space >= committed + 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT)
1541  fmm_open_both(noptr_sink, noptr_sink, mintopleafnode,
1543  else
1544  fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1545  }
1546  }
1547  else
1548  {
1549  MyReal r2;
1550  vector<MyReal> dxyz;
1551 
1552  int openflag = fmm_evaluate_node_node_opening_criterion(noptr_sink, noptr_source, dxyz, r2);
1553 
1554  if(openflag == NODE_USE)
1555  {
1556  /* evaluate the interaction */
1557  fmm_node_node_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source, noptr_sink,
1558  noptr_source, dxyz, r2);
1559  }
1560  else if(openflag == NODE_OPEN)
1561  {
1562  /* open both */
1563 
1564  if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1565  if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1566  mintopleafnode = std::min<int>(mintopleafnode, no_source);
1567 
1568  if(no_sink < MaxPart + MaxNodes) // we have a top-levelnode
1569  if(noptr_sink->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1570  mintopleafnode = std::min<int>(mintopleafnode, no_sink);
1571 
1572  if(noptr_source->cannot_be_opened_locally || noptr_sink->cannot_be_opened_locally)
1573  {
1574  if(noptr_source->cannot_be_opened_locally && noptr_sink->cannot_be_opened_locally)
1575  Terminate("this should not happen, because then both nodes would be foreign");
1576 
1577  if(noptr_source->cannot_be_opened_locally)
1578  tree_add_to_fetch_stack(noptr_source, no_source, shmrank_source);
1579 
1580  if(noptr_sink->cannot_be_opened_locally)
1581  tree_add_to_fetch_stack(noptr_sink, no_sink, shmrank_sink);
1582 
1583  fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1584  }
1585  else
1586  {
1587  int min_buffer_space =
1588  std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1589 
1590  if(min_buffer_space >= committed + 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT)
1591  fmm_open_both(noptr_sink, noptr_source, mintopleafnode,
1593  else
1594  fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1595  }
1596  }
1597  }
1598  }
1599  }
1600  }
1601 }
1602 
1603 void fmm::fmm_determine_nodes_with_local_mass(int no, int sib)
1604 {
1605  gravnode *nop = get_nodep(no, Shmem.Island_ThisTask);
1606 
1607  int p = nop->nextnode;
1608 
1609  /* if the next node is not a top-level node, we have reached a leaf node, and we need to do nothing */
1610  if(p < MaxPart || p >= FirstNonTopLevelNode)
1611  return;
1612 
1613  unsigned char depends_on_local_mass = 0;
1614 
1615  while(p != nop->sibling)
1616  {
1617  if(p >= 0)
1618  {
1619  if(p >= MaxPart && p < MaxPart + MaxNodes) /* we have an internal node */
1620  {
1621  int nextsib = get_nodep(p, Shmem.Island_ThisTask)->sibling;
1622 
1623  fmm_determine_nodes_with_local_mass(p, nextsib);
1624  }
1625 
1626  if(p < MaxPart) /* a particle */
1627  {
1628  Terminate("stop");
1629  }
1630  else if(p < MaxPart + MaxNodes) /* an internal node */
1631  {
1632  depends_on_local_mass |= Topnode_depends_on_local_mass[p];
1633 
1634  p = get_nodep(p, Shmem.Island_ThisTask)->sibling;
1635  }
1636  else if(p < MaxPart + MaxNodes + D->NTopleaves) /* a pseudo particle */
1637  {
1638  /* we are processing a local leaf-node which does not have any particles.
1639  * can continue to the next element, which should end the work.
1640  */
1641  Terminate("stop");
1642  }
1643  else
1644  {
1645  Terminate("stop");
1646  }
1647  }
1648  }
1649 
1650  Topnode_depends_on_local_mass[no] = depends_on_local_mass;
1651 }
1652 
1653 void fmm::gravity_fmm(int timebin)
1654 {
1655  interactioncountPP = 0;
1656  interactioncountPN = 0;
1657  interactioncountNN = 0;
1658  interactioncountEffective = 0;
1659 
1660  TIMER_STORE;
1661  TIMER_START(CPU_TREE);
1662 
1663  D->mpi_printf("FMM: Begin tree force. timebin=%d (presently allocated=%g MB)\n", timebin, All.ErrTolTheta,
1665 
1666 #ifdef PMGRID
1667  set_mesh_factors();
1668 #endif
1669 
1670  Topnode_depends_on_local_mass = (char *)Mem.mymalloc_clear("Topnode_depends_on_local_mass", D->NTopnodes * sizeof(char));
1671  Topnode_depends_on_local_mass -= MaxPart;
1672 
1673  for(int n = 0; n < D->NTopleaves; n++)
1674  {
1675  if(D->TaskOfLeaf[n] == D->ThisTask)
1676  {
1677  int no = NodeIndex[n];
1678 
1679  if(TopNodes[no].not_empty)
1680  Topnode_depends_on_local_mass[no] = 1;
1681  }
1682  }
1683 
1684  fmm_determine_nodes_with_local_mass(MaxPart, -1);
1685 
1686  TIMER_START(CPU_TREESTACK);
1687 
1688  NumOnWorkStack = 0;
1689  AllocWorkStackBaseLow = std::max<int>(1.5 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE);
1690  AllocWorkStackBaseHigh = AllocWorkStackBaseLow + TREE_EXPECTED_CYCLES * TREE_MIN_WORKSTACK_SIZE;
1691  MaxOnWorkStack = AllocWorkStackBaseLow;
1692 
1693  FMM_WorkStack = (fmm_workstack_data *)Mem.mymalloc("FMM_WorkStack", AllocWorkStackBaseHigh * sizeof(fmm_workstack_data));
1694  ResultIndexList = (int *)Mem.mymalloc("ResultIndexList", NumPartImported * sizeof(int));
1695 
1696  for(int i = 0; i < Tp->TimeBinsGravity.NActiveParticles; i++)
1697  {
1698  int target = Tp->TimeBinsGravity.ActiveParticleList[i];
1699 
1700  /* let's do a safety check here to protect against accidental use of zero softening lengths */
1701  int softtype = Tp->P[target].getSofteningClass();
1702  if(All.ForceSoftening[softtype] == 0)
1703  Terminate("Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
1704  (long long)Tp->P[target].ID.get(), Tp->P[target].getType(), softtype);
1705  }
1706 
1707  int ncount = 0;
1708 
1709  for(int i = 0; i < NumPartImported; i++)
1710  {
1711 #ifndef HIERARCHICAL_GRAVITY
1712  if(Points[i].ActiveFlag)
1713 #endif
1714  {
1715  ResultIndexList[i] = ncount++;
1716  }
1717  }
1718 
1719  NumOnWorkStack = 0;
1720  NewOnWorkStack = 0;
1721 
1722  /* for starting, request the self-interaction between the root node */
1723  fmm_add_to_work_stack(MaxPart, MaxPart, Shmem.Island_ThisTask, Shmem.Island_ThisTask, MaxPart + D->NTopnodes);
1724 
1725  NumOnWorkStack = NewOnWorkStack;
1726 
1727  ResultsActiveImported =
1728  (resultsactiveimported_data *)Mem.mymalloc_clear("ResultsActiveImported", ncount * sizeof(resultsactiveimported_data));
1729 
1730  TaylorCoeff = (taylor_data *)Mem.mymalloc_clear("TaylorCoeff", NumNodes * sizeof(taylor_data));
1731  TaylorCoeff -= MaxPart;
1732 
1733  /******************************************/
1734  /* now execute the tree walk calculations */
1735  /******************************************/
1736 
1737  errTolForceAcc = All.ErrTolForceAcc;
1738  errTolThetaMax2 = All.ErrTolThetaMax * All.ErrTolThetaMax;
1739  errTolTheta2 = All.ErrTolTheta * All.ErrTolTheta;
1740 
1741  sum_NumForeignNodes = 0;
1742  sum_NumForeignPoints = 0;
1743 
1744  // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
1745  MaxOnFetchStack = std::max<int>(0.1 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE);
1746  StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
1747 
1748  // let's grab at most half the still available memory for imported points and nodes
1749  int nspace = (0.33 * Mem.FreeBytes) / (sizeof(gravnode) + 8 * sizeof(foreign_gravpoint_data));
1750 
1751  MaxForeignNodes = nspace;
1752  MaxForeignPoints = 8 * nspace;
1753  NumForeignNodes = 0;
1754  NumForeignPoints = 0;
1755 
1756  /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
1757  Foreign_Nodes = (gravnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(gravnode));
1758  Foreign_Points = (foreign_gravpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
1759  MaxForeignPoints * sizeof(foreign_gravpoint_data));
1760 
1761  tree_initialize_leaf_node_access_info();
1762 
1763  TIMER_STOP(CPU_TREESTACK);
1764 
1765  double t0 = Logs.second();
1766  int max_ncycles = 0;
1767 
1768  prepare_shared_memory_access();
1769 
1770 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
1771  for(int rep = 0; rep < 2; rep++)
1772  {
1773  if(rep == 0)
1774  {
1775  skip_actual_force_computation = true;
1776  }
1777  else
1778  {
1779  skip_actual_force_computation = false;
1780 
1781  NumOnWorkStack = 0;
1782  NewOnWorkStack = 0;
1783 
1784  /* for starting, request the self-interaction between the root node */
1785  fmm_add_to_work_stack(MaxPart, MaxPart, Shmem.Island_ThisTask, Shmem.Island_ThisTask, MaxPart + D->NTopnodes);
1786 
1787  NumOnWorkStack = NewOnWorkStack;
1788  }
1789 #endif
1790 
1791  while(NumOnWorkStack > 0) // repeat until we are out of work
1792  {
1793  NewOnWorkStack = 0; // gives the new entries
1794  NumOnFetchStack = 0;
1795  MaxOnWorkStack = std::min<int>(AllocWorkStackBaseLow + max_ncycles * TREE_MIN_WORKSTACK_SIZE, AllocWorkStackBaseHigh);
1796 
1797  TIMER_START(CPU_TREEWALK);
1798 
1799  int item = 0;
1800 
1801  while(item < NumOnWorkStack)
1802  {
1803  int min_buffer_space =
1804  std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1805 
1806  int committed = 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT;
1807 
1808  if(min_buffer_space >= committed)
1809  {
1810  int no1 = FMM_WorkStack[item].Node1;
1811  int no2 = FMM_WorkStack[item].Node2;
1812  unsigned char shmrank1 = FMM_WorkStack[item].ShmRank1;
1813  unsigned char shmrank2 = FMM_WorkStack[item].ShmRank2;
1814  int mintopleaf = FMM_WorkStack[item].MinTopLeafNode;
1815  item++;
1816 
1817  char type1 = 0, type2 = 0;
1818 
1819  if(no1 < MaxPart) /* a local particle */
1820  type1 = NODE_TYPE_LOCAL_PARTICLE;
1821  else if(no1 < MaxPart + MaxNodes) /* an internal node */
1822  type1 = NODE_TYPE_LOCAL_NODE;
1823  else if(no1 >= ImportedNodeOffset && no1 < EndOfTreePoints) /* an imported Treepoint particle */
1825  else if(no1 >= EndOfTreePoints && no1 < EndOfForeignNodes) /* an imported LET node */
1826  type1 = NODE_TYPE_FETCHED_NODE;
1827  else if(no1 >= EndOfForeignNodes) /* an imported LED particle */
1829 
1830  if(no2 < MaxPart) /* a local particle */
1831  type2 = NODE_TYPE_LOCAL_PARTICLE;
1832  else if(no2 < MaxPart + MaxNodes) /* an internal node */
1833  type2 = NODE_TYPE_LOCAL_NODE;
1834  else if(no2 >= ImportedNodeOffset && no2 < EndOfTreePoints) /* an imported Treepoint particle */
1836  else if(no2 >= EndOfTreePoints && no2 < EndOfForeignNodes) /* an imported LET node */
1837  type2 = NODE_TYPE_FETCHED_NODE;
1838  else if(no2 >= EndOfForeignNodes) /* an imported LED particle */
1840 
1841  if(no1 == MaxPart && no2 == MaxPart)
1842  {
1843  // we have the interaction between the two root nodes
1844  fmm_force_interact(no1, no2, type1, type2, shmrank1, shmrank2, mintopleaf, committed);
1845  }
1846  else
1847  {
1849  {
1850  /* node-node interaction */
1851 
1852  gravnode *nop1 = get_nodep(no1, shmrank1);
1853  gravnode *nop2 = get_nodep(no2, shmrank2);
1854 
1856  Terminate("how can this be");
1857 
1858  fmm_open_both(nop1, nop2, mintopleaf, committed);
1859  }
1860  else
1861  {
1862  /* particle-node interaction, particle should on the sink side */
1863 
1864  // we have a node that we previously could not open
1865  gravnode *nop2 = get_nodep(no2, shmrank2);
1866 
1867  if(nop2->cannot_be_opened_locally)
1868  Terminate("now we should be able to open it!");
1869 
1870  fmm_open_node(no1, nop2, type1, shmrank1, mintopleaf, committed);
1871  }
1872  }
1873  }
1874  else
1875  break;
1876  }
1877 
1878  if(item == 0 && NumOnWorkStack > 0)
1879  Terminate("Can't even process a single particle");
1880 
1881  TIMER_STOP(CPU_TREEWALK);
1882 
1883  TIMER_START(CPU_TREEFETCH);
1884 
1885  tree_fetch_foreign_nodes(FETCH_GRAVTREE);
1886 
1887  TIMER_STOP(CPU_TREEFETCH);
1888 
1889  TIMER_START(CPU_TREESTACK);
1890 
1891  /* now reorder the workstack such that we are first going to do residual pristine particles, and then
1892  * imported nodes that hang below the first leaf nodes */
1893  NumOnWorkStack = NumOnWorkStack - item + NewOnWorkStack;
1894  memmove(FMM_WorkStack, FMM_WorkStack + item, NumOnWorkStack * sizeof(fmm_workstack_data));
1895 
1896  /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
1897  mycxxsort(FMM_WorkStack, FMM_WorkStack + NumOnWorkStack, compare_fmm_workstack);
1898 
1899  TIMER_STOP(CPU_TREESTACK);
1900 
1901  max_ncycles++;
1902  }
1903 
1904 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
1905  }
1906 #endif
1907 
1908  TIMER_START(CPU_TREEIMBALANCE);
1909 
1910  MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
1911 
1912  TIMER_STOP(CPU_TREEIMBALANCE);
1913 
1914  cleanup_shared_memory_access();
1915 
1916  /* free temporary buffers */
1917 
1918  Mem.myfree(Foreign_Points);
1919  Mem.myfree(Foreign_Nodes);
1920  Mem.myfree(StackToFetch);
1921 
1922  TIMER_START(CPU_TREEWALK);
1923 
1924  taylor_data taylor_current{}; // note: the curly braces initialize this to zero in this case
1925 
1926  /* propagate node expansions to particles */
1927  if(fmm_depends_on_local_mass(MaxPart, Shmem.Island_ThisTask))
1928  fmm_force_passdown(MaxPart, Shmem.Island_ThisTask, taylor_current);
1929 
1930  TIMER_STOP(CPU_TREEWALK);
1931 
1932  Mem.myfree(TaylorCoeff + MaxPart);
1933 
1934  double t1 = Logs.second();
1935 
1936  D->mpi_printf("FMM: Forces calculated, with %d cycles took %g sec\n", max_ncycles, Logs.timediff(t0, t1));
1937 
1938  /* now communicate the forces in ResultsActiveImported */
1939  gravity_exchange_forces();
1940 
1941  Mem.myfree(ResultsActiveImported);
1942  Mem.myfree(ResultIndexList);
1943  Mem.myfree(FMM_WorkStack);
1944 
1945  TIMER_STOP(CPU_TREE);
1946 
1947  D->mpi_printf("FMM: tree-force is done.\n");
1948 
1949  /* gather some diagnostic information */
1950 
1951  TIMER_START(CPU_LOGS);
1952 
1953  struct detailed_timings
1954  {
1955  double all, tree, wait, fetch, stack, lastpm;
1956  double costtotal, numnodes;
1957  double interactioncountEffective;
1958  double interactioncountPP, interactioncountPN, interactioncountNN;
1959  double NumForeignNodes, NumForeignPoints;
1960  double fillfacFgnNodes, fillfacFgnPoints;
1961  double sumcost;
1962  };
1963  detailed_timings timer, tisum, timax;
1964 
1965  memset(&timer, 0, sizeof(detailed_timings));
1966 
1967  if(MeasureCostFlag)
1968  {
1969  double sum = 0;
1970  for(int i = 0; i < Tp->NumPart; i++)
1971  if(Tp->TimeBinSynchronized[Tp->P[i].TimeBinGrav])
1972  sum += Tp->P[i].GravCost;
1973 
1974  timer.sumcost = sum;
1975  }
1976 
1977  timer.tree = TIMER_DIFF(CPU_TREEWALK);
1978  timer.wait = TIMER_DIFF(CPU_TREEIMBALANCE);
1979  timer.fetch = TIMER_DIFF(CPU_TREEFETCH);
1980  timer.stack = TIMER_DIFF(CPU_TREESTACK);
1981  timer.all = timer.tree + timer.wait + timer.fetch + timer.stack + TIMER_DIFF(CPU_TREE);
1982  tisum.lastpm = All.CPUForLastPMExecution;
1983  timer.costtotal = interactioncountPP + interactioncountEffective;
1984  timer.interactioncountPP = interactioncountPP;
1985  timer.interactioncountPN = interactioncountPN;
1986  timer.interactioncountNN = interactioncountNN;
1987  timer.interactioncountEffective = interactioncountEffective;
1988  timer.NumForeignNodes = NumForeignNodes;
1989  timer.NumForeignPoints = NumForeignPoints;
1990  timer.fillfacFgnNodes = NumForeignNodes / ((double)MaxForeignNodes);
1991  timer.fillfacFgnPoints = NumForeignPoints / ((double)MaxForeignPoints);
1992  timer.numnodes = NumNodes;
1993 
1994  MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
1995  D->Communicator);
1996  MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
1997  D->Communicator);
1998 
1999  All.TotNumOfForces += Tp->TimeBinsGravity.GlobalNActiveParticles;
2000 
2001  if(D->ThisTask == 0)
2002  {
2003  fprintf(Logs.FdTimings, "Nf=%9lld FMM timebin=%d total-Nf=%lld\n", Tp->TimeBinsGravity.GlobalNActiveParticles, timebin,
2005  fprintf(Logs.FdTimings, " work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g|%g)\n",
2006  timax.tree / ((tisum.tree + 1e-20) / D->NTask), Tp->TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
2007  Tp->TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * D->NTask),
2008  tisum.costtotal / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2009  tisum.interactioncountPP / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2010  tisum.interactioncountPN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2011  tisum.interactioncountNN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20));
2012  fprintf(Logs.FdTimings,
2013  " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
2014  "fill=%g cycles=%d\n",
2015  timax.numnodes, timax.numnodes / MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes / D->NTask,
2016  timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints / D->NTask, timax.fillfacFgnPoints, max_ncycles);
2017  fprintf(Logs.FdTimings,
2018  " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
2019  "(lastpm=%g) sec\n",
2020  tisum.all / D->NTask, tisum.tree / D->NTask, tisum.wait / D->NTask, tisum.fetch / D->NTask, tisum.stack / D->NTask,
2021  tisum.lastpm / D->NTask);
2022  fprintf(Logs.FdTimings, " total interaction cost: %g (imbalance=%g) total cost measure: %g %g\n", tisum.costtotal,
2023  timax.costtotal / (tisum.costtotal / D->NTask), tisum.sumcost,
2024  tisum.interactioncountPP + tisum.interactioncountEffective);
2026  }
2027 
2028  Mem.myfree(Topnode_depends_on_local_mass + MaxPart);
2029 
2030  TIMER_STOP(CPU_LOGS);
2031 }
2032 
2033 #endif
global_data_all_processes All
Definition: main.cc:40
@ POINTMASS
Definition: ewald.h:67
@ MULTIPOLES
Definition: ewald.h:68
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:304
double timediff(double t0, double t1)
Definition: logs.cc:488
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:68
size_t FreeBytes
Definition: mymalloc.h:48
int * GetNodeIDForSimulCommRank
int Island_ThisTask
Definition: tree.h:91
T r2(void)
Definition: symtensors.h:187
T da[3]
Definition: symtensors.h:106
#define FLAG_OUTSIDE
Definition: constants.h:29
#define FLAG_BOUNDARYOVERLAP
Definition: constants.h:31
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
int32_t MySignedIntPosType
Definition: dtypes.h:36
double MyReal
Definition: dtypes.h:82
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_USE
Definition: gravtree.h:35
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define NODE_TYPE_TREEPOINT_PARTICLE
Definition: gravtree.h:30
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
Definition: logs.h:257
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
Definition: logs.h:250
#define Terminate(...)
Definition: macros.h:19
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
vector< MyReal > D1phi
Definition: gravtree.h:186
symtensor3< MyReal > D3phi
Definition: gravtree.h:188
MyReal D0phi
Definition: gravtree.h:185
symtensor2< MyReal > D2phi
Definition: gravtree.h:187
unsigned char getSofteningClass(void)
Definition: gravtree.h:143
unsigned char Nextnode_shmrank
Definition: gravtree.h:133
MyIntPosType IntPos[3]
Definition: gravtree.h:130
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
long long TotNumOfForces
Definition: allvars.h:103
MyDouble mass
Definition: gravtree.h:65
vector< MyIntPosType > s
Definition: gravtree.h:66
int getSofteningClass(void)
Definition: gravtree.h:90
unsigned char maxsofttype
Definition: gravtree.h:83
unsigned char minsofttype
Definition: gravtree.h:84
unsigned char getSofteningClass(void)
Definition: gravtree.h:118
MyDouble Mass
Definition: gravtree.h:103
float OldAcc
Definition: gravtree.h:104
MyIntPosType IntPos[3]
Definition: gravtree.h:102
MyDouble getMass(void)
unsigned char getSofteningClass(void)
MyIntPosType IntPos[3]
Definition: particle_data.h:53
#define qXX
#define qYY
#define qZZ
void setup_D5(enum setup_options opt, symtensor5< T > &D5, vector< T > &dxyz, symtensor3< T > &aux3, symtensor4< T > &aux4, symtensor5< T > &aux5, TypeGfac g3, TypeGfac g4, TypeGfac g5)
Definition: symtensors.h:1842
vector< typename which_return< T1, T2 >::type > contract_twice(const symtensor3< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1321
void setup_D3(enum setup_options opt, symtensor3< T > &D3, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, TypeGfac g2, TypeGfac g3)
Definition: symtensors.h:1775
vector< typename which_return< T1, T2 >::type > contract_thrice(const symtensor4< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1333
vector< typename which_return< T1, T2 >::type > contract_fourtimes(const symtensor5< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1346
@ INIT
Definition: symtensors.h:1770
void setup_D4(enum setup_options opt, symtensor4< T > &D4, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, symtensor4< T > &aux4, TypeGfac g2, TypeGfac g3, TypeGfac g4)
Definition: symtensors.h:1801
void myflush(FILE *fstream)
Definition: system.cc:125
T square(T const value)
Definition: system.h:36
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16