12 #include "gadgetconfig.h"
19 #include "../data/allvars.h"
20 #include "../data/dtypes.h"
21 #include "../data/intposconvert.h"
22 #include "../data/mymalloc.h"
23 #include "../domain/domain.h"
24 #include "../gravity/ewald.h"
25 #include "../gravtree/gravtree.h"
26 #include "../gravtree/gwalk.h"
27 #include "../logs/logs.h"
28 #include "../logs/timer.h"
29 #include "../main/simulation.h"
30 #include "../mpi_utils/mpi_utils.h"
32 #include "../sort/cxxsort.h"
33 #include "../sort/peano.h"
34 #include "../system/system.h"
35 #include "../time_integration/timestep.h"
45 inline void gwalk::evaluate_particle_particle_interaction(
const pinfo &pdat,
const int no,
const char jtype,
int shmrank)
47 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
48 if(skip_actual_force_computation)
56 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
74 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
75 overlap_check = P->InsideOutsideFlag;
92 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
93 overlap_check = Pointp->InsideOutsideFlag;
102 intpos = foreignpoint->
IntPos;
103 mass = foreignpoint->
Mass;
109 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
110 overlap_check = foreignpoint->InsideOutsideFlag;
116 #if defined(PLACEHIGHRESREGION)
130 MyReal rinv = (r > 0) ? 1 / r : 0;
137 if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
145 *pdat.pot -= mass * gfac.fac0;
147 *pdat.acc -= (mass * gfac.fac1 * rinv) * dxyz;
157 *pdat.pot += mass * ew.
D0phi;
159 *pdat.acc += mass * ew.
D1phi;
165 interactioncountPP += 1;
168 inline int gwalk::evaluate_particle_node_opening_criterion_and_interaction(
const pinfo &pdat,
gravnode *nop)
176 #if defined(PMGRID) || !defined(TREE_NO_SAFETY_BOX)
181 #ifndef TREE_NO_SAFETY_BOX
183 if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
192 #ifdef PLACEHIGHRESREGION
195 int overlap_check = nop->overlap_flag;
199 "this case should not happen: node center=(%g|%g|%g) len=%g particle=(%g|%g|%g) "
200 "rel-dist=(%g|%g|%g)\n",
213 if(dist[0] > mfp->intrcut[0] + halflen)
217 if(dist[1] > mfp->intrcut[1] + halflen)
221 if(dist[2] > mfp->intrcut[2] + halflen)
239 if(len2 > r2 * theta2)
244 #if(MULTIPOLE_ORDER <= 2)
245 if(mass * len2 > r2 * r2 * errTolForceAcc * pdat.aold)
247 #elif(MULTIPOLE_ORDER == 3)
248 if(
square(mass * len * len2) > r2 *
square(r2 * r2 * errTolForceAcc * pdat.aold))
250 #elif(MULTIPOLE_ORDER == 4)
251 if(mass * len2 * len2 > r2 * r2 * r2 * errTolForceAcc * pdat.aold)
253 #elif(MULTIPOLE_ORDER == 5)
254 if(
square(mass * len2 * len2 * len) > r2 *
square(r2 * r2 * r2 * errTolForceAcc * pdat.aold))
258 if(len2 > r2 * thetamax2)
282 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
283 if(skip_actual_force_computation)
288 MyReal rinv = (r > 0) ? 1 / r : 0;
295 if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp))
304 *pdat.pot -= mass * g0;
307 MyReal g1 = gfac.fac1 * rinv;
308 *pdat.acc -= (mass * g1) * dxyz;
310 #if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
311 MyReal g2 = gfac.fac2 * gfac.rinv2;
313 MyReal Q2dxyz2 = Q2dxyz * dxyz;
314 MyReal Q2trace = nop->Q2Tensor.trace();
315 #if(MULTIPOLE_ORDER >= 3)
316 MyReal g3 = gfac.fac3 * gfac.rinv3;
317 *pdat.acc -=
static_cast<MyReal>(0.5) * (g2 * Q2trace + g3 * Q2dxyz2) * dxyz + g2 * Q2dxyz;
320 *pdat.pot -=
static_cast<MyReal>(0.5) * (g1 * Q2trace + g2 * Q2dxyz2);
324 #if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
329 MyReal Q3dxyz3 = Q3dxyz2 * dxyz;
337 #if(MULTIPOLE_ORDER >= 4)
338 MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
340 static_cast<MyReal>(0.5) *
341 (g2 * Q3vec + g3 * Q3dxyz2 + (
static_cast<MyReal>(1.0 / 3) * g4 * Q3dxyz3 + g3 * Q3dxyzTrace) * dxyz);
344 *pdat.pot -=
static_cast<MyReal>(1.0 / 6) * (3 * g2 * Q3dxyzTrace + g3 * Q3dxyz3);
348 #if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
355 MyReal Q4dxyz4 = Q4dxyz3 * dxyz;
367 #if(MULTIPOLE_ORDER >= 5)
369 MyReal g5 = gfac.fac5 * gfac.rinv2 * gfac.rinv3;
371 static_cast<MyReal>(1.0 / 24) * (g3 * (3 * QTtrace * dxyz + 12 * QTdxyz) + g4 * (6 * Q4dxyz2trace * dxyz + 4 * Q4dxyz3) +
372 g5 * Q4dxyz4 * dxyz);
375 *pdat.pot -=
static_cast<MyReal>(1.0 / 24) * (g2 * 3 * QTtrace + g3 * 6 * Q4dxyz2trace + g4 * Q4dxyz4);
379 #if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
386 MyReal Q5dxyz5 = Q5dxyz4 * dxyz;
391 *pdat.pot -=
static_cast<MyReal>(1.0 / 120) * (g3 * 15 * Q5dxyzTtrace + g4 * 10 * Q5dxyz3.
trace() + g5 * Q5dxyz5);
402 *pdat.pot += mass * ew.
D0phi;
403 #if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
404 *pdat.pot +=
static_cast<MyReal>(0.5) * (nop->Q2Tensor * ew.
D2phi);
406 #if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
407 *pdat.pot +=
static_cast<MyReal>(1.0 / 6) * (nop->Q3Tensor * ew.
D3phi);
409 #if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
410 *pdat.pot +=
static_cast<MyReal>(1.0 / 24) * (nop->Q4Tensor * ew.D4phi);
412 #if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
413 *pdat.pot +=
static_cast<MyReal>(1.0 / 120) * (nop->Q5Tensor * ew.D5phi);
416 *pdat.acc += mass * ew.
D1phi;
417 #if(MULTIPOLE_ORDER >= 3)
418 *pdat.acc +=
static_cast<MyReal>(0.5) * (ew.
D3phi * nop->Q2Tensor);
420 #if(MULTIPOLE_ORDER >= 4)
421 *pdat.acc +=
static_cast<MyReal>(1.0 / 6) * (ew.D4phi * nop->Q3Tensor);
423 #if(MULTIPOLE_ORDER >= 5)
424 *pdat.acc +=
static_cast<MyReal>(1.0 / 24) * (ew.D5phi * nop->Q4Tensor);
428 interactioncountPN += 1;
436 inline void gwalk::gwalk_open_node(
const pinfo &pdat,
int i,
char ptype,
gravnode *nop,
int mintopleafnode,
int committed)
446 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d mass=%g "
447 "first_nontoplevelnode=%d",
451 unsigned char next_shmrank;
458 next_shmrank = shmrank;
472 next_shmrank = shmrank;
494 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
499 gravity_force_interact(pdat, i, p, ptype, type, shmrank, mintopleafnode, committed);
502 shmrank = next_shmrank;
506 void gwalk::gravity_force_interact(
const pinfo &pdat,
int i,
int no,
char ptype,
char no_type,
unsigned char shmrank,
507 int mintopleafnode,
int committed)
511 evaluate_particle_particle_interaction(pdat, no, no_type, shmrank);
525 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
528 if(skip_actual_force_computation)
534 int openflag = evaluate_particle_node_opening_criterion_and_interaction(pdat, nop);
543 Terminate(
"this should not happen any more");
554 int min_buffer_space =
602 interactioncountPP = 0;
603 interactioncountPN = 0;
640 unsigned char level = 0;
641 unsigned char rotation = 0;
646 unsigned char pix = (((
unsigned char)((xxb & mask) >> (shiftx--))) | ((
unsigned char)((yyb & mask) >> (shifty--))) |
647 ((
unsigned char)((zzb & mask) >> (shiftz--))));
678 Terminate(
"Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
686 #ifndef HIERARCHICAL_GRAVITY
700 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
701 workstack_data *WorkStackBak = (workstack_data *)
Mem.mymalloc(
"WorkStackBak",
NumOnWorkStack *
sizeof(workstack_data));
707 (resultsactiveimported_data *)
Mem.mymalloc_clear(
"ResultsActiveImported", ncount *
sizeof(resultsactiveimported_data));
746 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
747 for(
int rep = 0; rep < 2; rep++)
751 skip_actual_force_computation =
true;
755 skip_actual_force_computation =
false;
774 int min_buffer_space =
776 if(min_buffer_space >= committed)
781 int mintopleaf =
WorkStack[item].MinTopLeafNode;
785 int ptype = get_pinfo(target, pdat);
790 gravity_force_interact(pdat, target, no, ptype,
NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
799 Terminate(
"item=%d: no=%d now we should be able to open it!", item, no);
802 gwalk_open_node(pdat, target, ptype, nop, mintopleaf, committed);
810 Terminate(
"Can't even process a single particle");
835 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
841 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX,
D->
Communicator);
855 D->
mpi_printf(
"GRAVTREE: tree-forces are calculated, with %d cycles took %g sec\n", max_ncycles,
Logs.
timediff(t0, t1));
861 #ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
862 Mem.myfree(WorkStackBak);
875 struct detailed_timings
877 double tree, wait, fetch, stack, all, lastpm;
878 double costtotal, numnodes;
879 double interactioncountPP, interactioncountPN;
881 double fillfacFgnNodes, fillfacFgnPoints;
883 detailed_timings timer, tisum, timax;
889 timer.all = timer.tree + timer.wait + timer.fetch + timer.stack +
TIMER_DIFF(CPU_TREE);
891 timer.costtotal = interactioncountPP + interactioncountPN;
893 timer.interactioncountPP = interactioncountPP;
894 timer.interactioncountPN = interactioncountPN;
900 MPI_Reduce((
double *)&timer, (
double *)&tisum, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_SUM, 0,
902 MPI_Reduce((
double *)&timer, (
double *)&timax, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_MAX, 0,
911 fprintf(
Logs.
FdTimings,
" work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g)\n",
918 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
919 "fill=%g cycles=%d\n",
920 timax.numnodes, timax.numnodes /
MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes /
D->
NTask,
921 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints /
D->
NTask, timax.fillfacFgnPoints, max_ncycles);
923 " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
927 fprintf(
Logs.
FdTimings,
" total interaction cost: %g (imbalance=%g)\n", tisum.costtotal,
928 timax.costtotal / (tisum.costtotal /
D->
NTask));
936 #include "../data/simparticles.h"
global_data_all_processes All
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
void gravity_exchange_forces(void)
resultsactiveimported_data * ResultsActiveImported
void get_gfactors_multipole(gfactors &res, const T r, const T h_max, const T rinv)
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
void gravity_tree(int timebin)
This function computes the gravitational forces for all active particles.
void nearest_image_intpos_to_absolute_intdist(const MyIntPosType *a, const MyIntPosType *b, MyIntPosType *delta)
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
void mpi_printf(const char *fmt,...)
int * GetNodeIDForSimulCommRank
TimeBinData TimeBinsGravity
long long sum_NumForeignPoints
workstack_data * WorkStack
foreign_gravpoint_data * get_foreignpointsp(int n, unsigned char shmrank)
void cleanup_shared_memory_access(void)
gravnode * get_nodep(int no)
int * get_nextnodep(unsigned char shmrank)
domain< simparticles > * D
pdata get_Pp(int n, unsigned char shmrank)
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
long long sum_NumForeignNodes
void tree_fetch_foreign_nodes(enum ftype fetch_type)
void prepare_shared_memory_access(void)
foreign_gravpoint_data * Foreign_Points
fetch_data * StackToFetch
gravpoint_data * get_pointsp(int no, unsigned char shmrank)
int AllocWorkStackBaseHigh
void tree_add_to_fetch_stack(gravnode *nop, int nodetoopen, unsigned char shmrank)
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
void tree_initialize_leaf_node_access_info(void)
int AllocWorkStackBaseLow
#define FLAG_BOUNDARYOVERLAP
double mycxxsort(T *begin, T *end, Tcomp comp)
#define BITS_FOR_POSITIONS
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
#define TREE_MIN_WORKSTACK_SIZE
#define NODE_TYPE_FETCHED_NODE
#define NODE_TYPE_TREEPOINT_PARTICLE
#define TREE_EXPECTED_CYCLES
#define NODE_TYPE_FETCHED_PARTICLE
#define NODE_TYPE_LOCAL_NODE
#define TREE_ACTIVE_CUTTOFF_BASE_PM
#define NODE_TYPE_LOCAL_PARTICLE
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
unsigned char peano_incremental_key(unsigned char pix, unsigned char *rotation)
long long GlobalNActiveParticles
unsigned char nextnode_shmrank
std::atomic< unsigned char > cannot_be_opened_locally
vector< MyIntPosType > center
unsigned char sibling_shmrank
symtensor3< MyReal > D3phi
symtensor2< MyReal > D2phi
unsigned char getSofteningClass(void)
unsigned char Nextnode_shmrank
double CPUForLastPMExecution
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
char RelOpeningCriterionInUse
unsigned char maxsofttype
unsigned char minsofttype
unsigned char getSofteningClass(void)
unsigned char getSofteningClass(void)
unsigned char getType(void)
void myflush(FILE *fstream)
#define TREE_NUM_BEFORE_NODESPLIT