GADGET-4
tree.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <math.h>
15 #include <mpi.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "../data/allvars.h"
21 #include "../data/dtypes.h"
22 #include "../data/intposconvert.h"
23 #include "../data/mymalloc.h"
24 #include "../domain/domain.h"
25 #include "../logs/logs.h"
26 #include "../logs/timer.h"
27 #include "../main/simulation.h"
28 #include "../mpi_utils/mpi_utils.h"
29 #include "../sort/cxxsort.h"
30 #include "../sort/peano.h"
31 #include "../system/system.h"
32 #include "../time_integration/timestep.h"
33 #include "../tree/tree.h"
34 
51 template <typename node, typename partset, typename point_data, typename foreign_point_data>
53 {
54  if(MaxPart == 0 && ninsert == 0)
55  return 0; // nothing to be done
56 
57  if(MaxPart == 0 && ninsert > 0)
58  Terminate("Strange, we'll try to construct a tree for %d particles, but it appears not be allocated\n", ninsert);
59 
60  if(ninsert == Tp->NumPart)
61  D->mpi_printf("TREE: Full tree construction for all particles. (presently allocated=%g MB)\n", Mem.getAllocatedBytesInMB());
62 
63  Ninsert = ninsert;
64  IndexList = indexlist;
65 
66  TIMER_START(CPU_TREEBUILD);
67 
68  double t0 = Logs.second();
69 
70  int flag, iter = 0;
71  do /* try constructing tree until successful in terms of storage allocation */
72  {
73  TIMER_START(CPU_TREEBUILD_INSERT);
74 
75  int flag_single = treebuild_construct();
76 
77  TIMER_STOP(CPU_TREEBUILD_INSERT);
78 
79  MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, D->Communicator);
80 
81  if(flag < 0)
82  {
83  /* tree construction was not successful and needs to be repeated */
84  treefree();
85  All.TreeAllocFactor *= 1.15;
86  // D->mpi_printf("TREE: Increasing TreeAllocFactor, new value=%g\n", All.TreeAllocFactor);
87  treeallocate(Tp->NumPart, Tp, D);
88  }
89  else
90  {
91  /* treebuild was successfull, but let's check if we allocated clearly too much storage, and if so repeat it */
92  int max_numnodes;
93  MPI_Allreduce(&NumNodes, &max_numnodes, 1, MPI_INT, MPI_MAX, D->Communicator);
94 
95  if((MaxNodes - D->NTopnodes) > 1.5 * (max_numnodes - D->NTopnodes))
96  {
97  double oldvalue = All.TreeAllocFactor;
98  double newvalue = std::max<double>(1.1 * (max_numnodes - D->NTopnodes) / (MaxPart + BASENUMBER), 0.02);
99 
100  if(newvalue < oldvalue)
101  {
102  treefree();
103 
104  All.TreeAllocFactor = newvalue;
105  flag = -1;
106  treeallocate(Tp->NumPart, Tp, D);
107  }
108  }
109  }
110  iter++;
111 
112  if(iter > TREE_MAX_ITER)
113  Terminate("tree construction failed\n");
114  }
115  while(flag < 0);
116 
117  TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_BRANCHES);
118 
119  /* first, construct the properties of the tree branches below the top leaves */
120 
121  int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
122  int *list = D->ListOfTopleaves + D->FirstTopleafOfTask[D->ThisTask];
123 
124  for(int i = 0; i < ntopleaves; i++)
125  {
126  int no = NodeIndex[list[i]];
127  int sib = NodeSibling[list[i]];
128 
129  update_node_recursive(no, sib, TREE_MODE_BRANCH);
130  }
131 
132  TIMER_STOPSTART(CPU_TREEBUILD_BRANCHES, CPU_TREEBUILD_TOPLEVEL);
133 
134  exchange_topleafdata();
135 
136  /* now update the top-level tree nodes */
137  if(TreeSharedMem_ThisTask == 0)
138  update_node_recursive(MaxPart, -1, TREE_MODE_TOPLEVEL);
139 
140  for(int k = D->NTopnodes; k < NumNodes; k++)
141  {
142  int index = MaxPart + k;
143  Nodes[index].OriginTask = D->ThisTask;
144  Nodes[index].OriginNode = index;
145  Nodes[index].access.clear();
146  Nodes[index].flag_already_fetched = 0;
147  }
148 
149  if(TreeSharedMem_ThisTask == 0)
150  {
151  for(int k = 0; k < D->NTopnodes; k++)
152  {
153  int index = MaxPart + k;
154  TopNodes[index].OriginTask = D->ThisTask;
155  TopNodes[index].OriginNode = index;
156  TopNodes[index].access.clear();
157  TopNodes[index].flag_already_fetched = 0;
158  }
159  }
160 
161  tree_initialize_leaf_node_access_info();
162 
163  double t1 = Logs.second();
164  Buildtime = Logs.timediff(t0, t1);
165 
166  report_log_message();
167 
168  TIMER_STOP(CPU_TREEBUILD_TOPLEVEL);
169 
170  return NumNodes;
171 }
172 
173 template <typename node, typename partset, typename point_data, typename foreign_point_data>
175 {
176  if(TreeSharedMem_ThisTask == 0)
177  {
178  for(int k = 0; k < D->NTopleaves; k++)
179  {
180  int index = NodeIndex[k];
181 
182  if(Shmem.GetNodeIDForSimulCommRank[D->TaskOfLeaf[k]] == Shmem.GetNodeIDForSimulCommRank[D->ThisTask])
183  TopNodes[index].cannot_be_opened_locally = 0;
184  else
185  {
186  TopNodes[index].cannot_be_opened_locally = 1;
187  TopNodes[index].flag_already_fetched = 0;
188  TopNodes[index].nextnode = MaxPart + MaxNodes + k;
189  TopNodes[index].nextnode_shmrank = TreeSharedMem_ThisTask;
190  }
191  TopNodes[index].OriginTask = D->TaskOfLeaf[k];
192  TopNodes[index].OriginNode = index;
193  }
194  }
195 }
196 
214 template <typename node, typename partset, typename point_data, typename foreign_point_data>
216 {
217  if(TreeSharedMem_ThisTask == 0)
218  {
219  /* create an empty root node */
220  NextFreeNode = MaxPart; /* index of first free node */
221 
222  node *nfreep = &TopNodes[NextFreeNode]; /* select first node */
223  nfreep->nextnode = -1;
224  nfreep->sibling = -1;
225  nfreep->father = -1;
226  nfreep->level = 0;
227  nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
228  nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
229 
230  for(int j = 0; j < 3; j++)
231  nfreep->center[j] = ((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1);
232 
233  NodeIndex[0] = NextFreeNode;
234  NodeLevel[0] = 0;
235  NodeSibling[0] = -1;
236  NumNodes = 1;
237  NextFreeNode++;
238 
239  /* create a set of empty nodes corresponding to the top-level domain
240  * grid. We need to generate these nodes first to make sure that we have a
241  * complete top-level tree which allows the easy insertion of the
242  * pseudo-particles at the right place.
243  */
244 
245  if(create_empty_nodes(MaxPart, 1, 0, 1, -1, 0, 0, 0) < 0)
246  return -2;
247  }
248 
249  MPI_Bcast(&NumNodes, 1, MPI_INT, 0, D->Communicator);
250  MPI_Bcast(&NextFreeNode, 1, MPI_INT, 0, D->Communicator);
251 
252  if(NumNodes != D->NTopnodes)
253  Terminate("NumNodes=%d != D->NTopnodes=%d", NumNodes, D->NTopnodes);
254 
255  FirstNonTopLevelNode = NextFreeNode;
256 
257  if(MaxPart + D->NTopnodes != FirstNonTopLevelNode)
258  Terminate("unexpected");
259 
260  for(int j = 0; j < D->NTask; j++)
261  Send_count[j] = 0;
262 
263 #ifndef LEAN
264  int *node_list = (int *)Mem.mymalloc_movable(&node_list, "node_list", Tp->NumPart * sizeof(int));
265  int *task_list = (int *)Mem.mymalloc_movable(&task_list, "task_list", Tp->NumPart * sizeof(int));
266 #endif
267 
268  /* now we determine for each point the insertion top-level node, and the task on which this lies */
269  for(int idx = 0; idx < Ninsert; idx++)
270  {
271  int i;
272  if(IndexList)
273  i = IndexList[idx];
274  else
275  i = idx;
276 
277  if(Tp->P[i].get_Ti_Current() != All.Ti_Current)
278  Tp->drift_particle(&Tp->P[i], &Tp->SphP[i], All.Ti_Current);
279 
280  int no;
281  int task;
282  tree_get_node_and_task(i, no, task);
283 
284 #ifndef LEAN
285  node_list[i] = no;
286  task_list[i] = task;
287 #endif
288 
289  if(task != D->ThisTask)
290  Send_count[task]++;
291  }
292 
293  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator);
294 
295  NumPartImported = 0;
296  NumPartExported = 0;
297  Recv_offset[0] = 0;
298  Send_offset[0] = 0;
299 
300  for(int j = 0; j < D->NTask; j++)
301  {
302  NumPartImported += Recv_count[j];
303  NumPartExported += Send_count[j];
304  if(j > 0)
305  {
306  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
307  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
308  }
309  }
310 
311  Points = (point_data *)Mem.mymalloc_movable(&Points, "Points", NumPartImported * sizeof(point_data));
312  Nextnode = (int *)Mem.mymalloc_movable(&Nextnode, "Nextnode", (MaxPart + D->NTopleaves + NumPartImported) * sizeof(int));
313  Father = (int *)Mem.mymalloc_movable(&Father, "Father", (MaxPart + NumPartImported) * sizeof(int));
314 
315  /* now put in markers ("pseudo" particles) in top-leaf nodes to indicate on which task the branch lies */
316  for(int i = 0; i < D->NTopleaves; i++)
317  {
318  int index = NodeIndex[i];
319 
320  if(TreeSharedMem_ThisTask == 0)
321  TopNodes[index].nextnode = MaxPart + MaxNodes + i;
322 
323  /* set nextnode for pseudo-particle (Nextnode exists on all ranks) */
324  Nextnode[MaxPart + i] = TopNodes[index].sibling;
325  }
326 
327  point_data *export_Points = (point_data *)Mem.mymalloc("export_Points", NumPartExported * sizeof(point_data));
328 
329  for(int j = 0; j < D->NTask; j++)
330  Send_count[j] = 0;
331 
332  for(int idx = 0; idx < Ninsert; idx++) /* prepare particle data to be copied to other tasks */
333  {
334  int i;
335  if(IndexList)
336  i = IndexList[idx];
337  else
338  i = idx;
339 
340 #ifdef LEAN
341  int no, task;
342  tree_get_node_and_task(i, no, task);
343 #else
344  int task = task_list[i];
345  int no = node_list[i];
346 #endif
347 
348  if(task != D->ThisTask)
349  {
350  int n = Send_offset[task] + Send_count[task]++;
351 
352  fill_in_export_points(&export_Points[n], i, no);
353  }
354  }
355 
356  /* exchange data */
357  for(int ngrp = 1; ngrp < (1 << D->PTask); ngrp++)
358  {
359  int recvTask = D->ThisTask ^ ngrp;
360  if(recvTask < D->NTask)
361  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
362  MPI_Sendrecv(&export_Points[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask,
363  TAG_DENS_A, &Points[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask,
364  TAG_DENS_A, D->Communicator, MPI_STATUS_IGNORE);
365  }
366 
367  Mem.myfree(export_Points);
368 
369  ImportedNodeOffset = MaxPart + MaxNodes + D->NTopleaves;
370 
371  MPI_Allreduce(&NumPartImported, &EndOfTreePoints, 1, MPI_INT, MPI_MAX, D->Communicator);
372  EndOfTreePoints += ImportedNodeOffset;
373  EndOfForeignNodes = EndOfTreePoints + (INT_MAX - EndOfTreePoints) / 2;
374 
375  /* make a list that holds the particles that belong to a certain node */
376  index_data *index_list =
377  (index_data *)Mem.mymalloc_movable(&index_list, "index_list", (Ninsert + NumPartImported) * sizeof(index_data));
378  int count = 0;
379 
380  for(int idx = 0; idx < Ninsert; idx++)
381  {
382  int i;
383  if(IndexList)
384  i = IndexList[idx];
385  else
386  i = idx;
387 
388 #ifdef LEAN
389  int no, task;
390  tree_get_node_and_task(i, no, task);
391 #else
392  int task = task_list[i];
393  int no = node_list[i];
394 #endif
395 
396  if(task == D->ThisTask)
397  {
398  index_list[count].p = i;
399  index_list[count].subnode = no;
400  count++;
401  }
402  }
403 
404  for(int i = 0; i < NumPartImported; i++)
405  {
406  index_list[count].p = i + ImportedNodeOffset;
407  index_list[count].subnode = Points[i].no;
408  count++;
409  }
410 
411 #ifndef LEAN
412  Mem.myfree_movable(task_list);
413  Mem.myfree_movable(node_list);
414 #endif
415 
416  /* sort according to node so that particles indices in the same node are grouped together */
417  mycxxsort(index_list, index_list + count, compare_index_data_subnode);
418 
419  int full_flag = 0;
420  int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
421  int start = 0;
422 
423  for(int n = 0; n < ntopleaves; n++)
424  {
425  int no = D->ListOfTopleaves[D->FirstTopleafOfTask[D->ThisTask] + n];
426  int th = NodeIndex[no];
427  unsigned char level = NodeLevel[no];
428  int sibling = NodeSibling[no];
429 
430  while(start < count && index_list[start].subnode < no)
431  start++;
432 
433  if(start < count)
434  {
435  int last = start;
436  while(last < count && index_list[last].subnode == no)
437  last++;
438 
439  int num = last - start;
440 
441  if(treebuild_insert_group_of_points(num, &index_list[start], th, level, sibling))
442  {
443  full_flag = 1;
444  break;
445  }
446 
447  start += num;
448  }
449  }
450 
451  if((NumNodes = NextFreeNode - MaxPart) >= MaxNodes)
452  {
454  {
455  Tp->dump_particles();
456  Terminate(
457  "task %d: looks like a serious problem, stopping with particle dump. NumNodes=%d MaxNodes=%d NumPartImported=%d "
458  "NumPart=%d\n",
459  D->ThisTask, NumNodes, MaxNodes, NumPartImported, Tp->NumPart);
460  }
461  }
462 
463  Mem.myfree(index_list);
464 
465  if(full_flag)
466  return -1;
467 
468  return NumNodes;
469 }
470 
479 template <typename node, typename partset, typename point_data, typename foreign_point_data>
481  unsigned char level, int sibling)
482 {
483  if(level >= BITS_FOR_POSITIONS)
484  Terminate(
485  "It appears we have reached the bottom of the tree because there are more than TREE_NUM_BEFORE_NODESPLIT=%d particles in the "
486  "smallest tree node representable for BITS_FOR_POSITIONS=%d.\n"
487  "Either eliminate the particles at (nearly) indentical coordinates, increase the setting for TREE_NUM_BEFORE_NODESPLIT, or "
488  "possibly enlarge BITS_FOR_POSITIONS if you have really not enough dynamic range\n",
490 
491  MyIntPosType mask = (((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1 - level));
492  unsigned char shiftx = (BITS_FOR_POSITIONS - 1 - level);
493  unsigned char shifty = (BITS_FOR_POSITIONS - 2 - level);
494  unsigned char shiftz = (BITS_FOR_POSITIONS - 3 - level);
495  MyIntPosType centermask = ~(~((MyIntPosType)0) >> level);
496 
497  int subcount[8] = {0, 0, 0, 0, 0, 0, 0, 0}, subnode[8];
498  MyIntPosType *subintpos[8];
499 
500  for(int i = 0; i < num; i++)
501  {
502  MyIntPosType *intpos;
503 
504  int p = index_list[i].p;
505  if(p < MaxPart)
506  {
507  intpos = Tp->P[p].IntPos;
508  }
509  else
510  {
511  int n = p - ImportedNodeOffset;
512  intpos = Points[n].IntPos;
513  }
514 
515  unsigned char subnode = (((unsigned char)((intpos[0] & mask) >> shiftx)) | ((unsigned char)((intpos[1] & mask) >> shifty)) |
516  ((unsigned char)((intpos[2] & mask) >> shiftz)));
517  if(subnode > 7)
518  Terminate("stop: subnode > 7");
519 
520  subcount[subnode]++;
521 
522  subintpos[subnode] = intpos;
523 
524  index_list[i].subnode = subnode;
525  }
526 
527  /* sort */
528  mycxxsort(index_list, index_list + num, compare_index_data_subnode);
529 
530  centermask >>= 1;
531  centermask |= ~(~((MyIntPosType)0) >> 1); /* this sets the MSB */
532  mask >>= 1;
533 
534  node *nfreep_last = NULL;
535 
536  /* create the daughter nodes */
537  for(int i = 0; i < 8; i++)
538  {
539  if(subcount[i] > TREE_NUM_BEFORE_NODESPLIT)
540  {
541  int thnew;
542 
543  thnew = NextFreeNode++;
544 
545  if(thnew - MaxPart >= MaxNodes)
546  return 1; /* we are out of space */
547 
548  subnode[i] = thnew;
549 
550  if(nfreep_last)
551  {
552  nfreep_last->sibling = thnew;
553  nfreep_last->nextnode = thnew;
554 
555  nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
556  nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
557  }
558  else
559  {
560  get_nodep(th)->nextnode = thnew;
561  get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
562  }
563 
564  if(thnew < MaxPart + D->NTopnodes)
565  Terminate("thnew = %d < MaxPart=%d + D->NTopnodes=%d", thnew, MaxPart, D->NTopnodes);
566 
567  node *nfreep = get_nodep(thnew);
568 
569  nfreep->father = th;
570  nfreep->level = level + 1;
571 
572  nfreep->center[0] = ((subintpos[i][0] & centermask) | mask);
573  nfreep->center[1] = ((subintpos[i][1] & centermask) | mask);
574  nfreep->center[2] = ((subintpos[i][2] & centermask) | mask);
575 
576  nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
577  nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
578 
579  nfreep_last = nfreep;
580  }
581  }
582 
583  /* now insert the particles that are chained to the node */
584 
585  int p_last = -1, p_first = -1;
586 
587  /* now insert the particle groups into the created daughter nodes, or chain them to the node */
588  for(int i = 0, off = 0; i < 8; i++)
589  {
590  if(subcount[i] <= TREE_NUM_BEFORE_NODESPLIT)
591  {
592  /* put the particles into the node as a chain */
593  for(int j = 0; j < subcount[i]; j++)
594  {
595  int p = index_list[off + j].p;
596 
597  if(nfreep_last == NULL && p_first == -1)
598  {
599  get_nodep(th)->nextnode = p;
600  get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
601  }
602 
603  if(p < MaxPart)
604  Father[p] = th;
605  else
606  Father[p - MaxNodes - D->NTopleaves] = th;
607 
608  if(p_last >= 0)
609  {
610  if(p_last < MaxPart)
611  Nextnode[p_last] = p;
612  else
613  Nextnode[p_last - MaxNodes] = p; /* imported point */
614  }
615 
616  p_last = p;
617 
618  if(p_first == -1)
619  p_first = p;
620  }
621  }
622 
623  off += subcount[i];
624  }
625 
626  if(p_last >= 0)
627  {
628  if(p_last < MaxPart)
629  Nextnode[p_last] = sibling;
630  else
631  Nextnode[p_last - MaxNodes] = sibling; /* imported point */
632  }
633 
634  if(nfreep_last)
635  {
636  if(p_first >= 0)
637  {
638  nfreep_last->sibling = p_first;
639  nfreep_last->nextnode = p_first;
640 
641  nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
642  nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
643  }
644  else
645  {
646  nfreep_last->sibling = sibling;
647  nfreep_last->nextnode = sibling;
648 
649  nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
650  nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
651  }
652  }
653 
654  for(int i = 0, off = 0; i < 8; i++)
655  {
656  if(subcount[i] > TREE_NUM_BEFORE_NODESPLIT)
657  {
658  if(subnode[i] < MaxPart + D->NTopnodes)
659  Terminate("subnode[i]=%d < MaxPart=%d + D->NTopnodes=%d", subnode[i], MaxPart, D->NTopnodes);
660 
661  int out_of_space =
662  treebuild_insert_group_of_points(subcount[i], &index_list[off], subnode[i], level + 1, get_nodep(subnode[i])->sibling);
663  if(out_of_space)
664  return out_of_space;
665  }
666 
667  off += subcount[i];
668  }
669 
670  return 0; /* success */
671 }
672 
683 template <typename node, typename partset, typename point_data, typename foreign_point_data>
685  int no_parent , int level ,
686  int topnode ,
687  int bits , int sibling,
688  MyIntPosType x ,
689  MyIntPosType y ,
690  MyIntPosType z )
691 {
692  if(D->TopNodes[topnode].Daughter >= 0)
693  {
694  int firstflag = 0;
695  int nostart = NextFreeNode;
696 
697  /* loop over daughter nodes */
698  for(int i = 0; i < 2; i++)
699  for(int j = 0; j < 2; j++)
700  for(int k = 0; k < 2; k++)
701  {
702  if(NumNodes >= MaxNodes)
703  {
705  {
706  char buf[MAXLEN_PATH];
707  sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", D->ThisTask,
708  D->NTopnodes);
709  Tp->dump_particles();
710  Terminate(buf);
711  }
712  return -1;
713  }
714 
715  int no = NextFreeNode++;
716  NumNodes++;
717 
718  if(firstflag == 0)
719  {
720  TopNodes[no_parent].nextnode = no;
721  firstflag = 1;
722  }
723 
724  TopNodes[no].father = no_parent;
725 
726  if(i + 2 * j + 4 * k == 7)
727  TopNodes[no].sibling = sibling;
728  else
729  TopNodes[no].sibling = no + 1;
730 
731  TopNodes[no].nextnode = TopNodes[no].sibling;
732 
733  TopNodes[no].level = level;
734 
735  MyIntPosType lenhalf = ((MyIntPosType)1) << (BITS_FOR_POSITIONS - level - 1);
736  TopNodes[no].center[0] = TopNodes[no_parent].center[0] + (2 * i - 1) * lenhalf;
737  TopNodes[no].center[1] = TopNodes[no_parent].center[1] + (2 * j - 1) * lenhalf;
738  TopNodes[no].center[2] = TopNodes[no_parent].center[2] + (2 * k - 1) * lenhalf;
739 
740  TopNodes[no].sibling_shmrank = TreeSharedMem_ThisTask;
741  TopNodes[no].nextnode_shmrank = TreeSharedMem_ThisTask;
742  }
743 
744  /* loop over daughter nodes */
745  for(int i = 0; i < 2; i++)
746  for(int j = 0; j < 2; j++)
747  for(int k = 0; k < 2; k++)
748  {
749  int no = nostart++;
750 
751  peanokey key = peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
752  int sub = 7 & key.ls;
753 
754  if(D->TopNodes[D->TopNodes[topnode].Daughter + sub].Daughter == -1)
755  {
756  NodeIndex[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = no;
757  NodeLevel[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = level;
758  NodeSibling[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = TopNodes[no].sibling;
759  }
760 
761  /* create grand daughter nodes for current daughter node */
762  if(create_empty_nodes(no, level + 1, D->TopNodes[topnode].Daughter + sub, bits + 1, TopNodes[no].sibling, 2 * x + i,
763  2 * y + j, 2 * z + k) < 0)
764  return -1;
765  }
766  }
767 
768  return 0;
769 }
770 
775 template <typename node, typename partset, typename point_data, typename foreign_point_data>
777 {
778  D = Dptr;
779  Tp = Tp_ptr;
780 
781  /* split up the communicator into pieces overlap with different shared memory regions */
782  if(max_partindex != -1)
783  MPI_Comm_split(D->Communicator, Shmem.Island_Smallest_WorldTask, 0, &TreeSharedMemComm);
784 
785  if(max_partindex != -1)
786  MPI_Allreduce(&max_partindex, &MaxPart, 1, MPI_INT, MPI_MAX, D->Communicator);
787 
788  if(MaxPart == 0)
789  return; // nothing to be done
790 
791  if(Nodes)
792  Terminate("Nodes already allocated");
793 
794  Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * D->NTask);
795  Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * D->NTask);
796  Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * D->NTask);
797  Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * D->NTask);
798 
799  if(max_partindex != -1)
800  {
801  MPI_Allreduce(MPI_IN_PLACE, &All.TreeAllocFactor, 1, MPI_DOUBLE, MPI_MAX, D->Communicator);
802 
803  MaxNodes = (int)(All.TreeAllocFactor * (MaxPart + BASENUMBER)) + D->NTopnodes;
804 
805  int max_nodes;
806  MPI_Allreduce(&MaxNodes, &max_nodes, 1, MPI_INT, MPI_MAX, D->Communicator);
807 
808  if(max_nodes != MaxNodes)
809  Terminate("Strange: different maxnodes detected: %d %d", max_nodes, MaxNodes);
810 
811  int max_leaves;
812  MPI_Allreduce(&D->NTopleaves, &max_leaves, 1, MPI_INT, MPI_MAX, D->Communicator);
813  if(max_leaves != D->NTopleaves)
814  Terminate("Strange: different maxnodes detected: %d %d", max_leaves, D->NTopleaves);
815  }
816  else
817  {
818  max_partindex = MaxPart;
819  }
820 
821  MPI_Comm_rank(TreeSharedMemComm, &TreeSharedMem_ThisTask);
822  MPI_Comm_size(TreeSharedMemComm, &TreeSharedMem_NTask);
823 
824  TreeNodes_offsets = (ptrdiff_t *)Mem.mymalloc("TreeNodes_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
825  TreePoints_offsets = (ptrdiff_t *)Mem.mymalloc("TreePoints_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
826  TreeNextnode_offsets = (ptrdiff_t *)Mem.mymalloc("TreeNextnode_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
827  TreeForeign_Nodes_offsets = (ptrdiff_t *)Mem.mymalloc("TreeForeign_Nodes_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
828  TreeForeign_Points_offsets = (ptrdiff_t *)Mem.mymalloc("TreeForeign_Points_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
829  TreeP_offsets = (ptrdiff_t *)Mem.mymalloc("TreeP_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
830  TreeSphP_offsets = (ptrdiff_t *)Mem.mymalloc("TreeSphP_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
831  TreePS_offsets = (ptrdiff_t *)Mem.mymalloc("TreePS_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
832 
833  TreeSharedMemBaseAddr = (void **)Mem.mymalloc("TreeSharedMemBaseAddr", TreeSharedMem_NTask * sizeof(void *));
834 
835  for(int i = 0; i < TreeSharedMem_NTask; i++)
836  {
837  int island_rank = Shmem.Island_ThisTask + (i - TreeSharedMem_ThisTask);
838 
839  if(island_rank < 0 || island_rank >= Shmem.Island_NTask)
840  Terminate("island_rank=%d < 0 || island_rank >= Shmem.Island_NTask=%d", island_rank, Shmem.Island_NTask);
841 
842  TreeSharedMemBaseAddr[i] = Shmem.SharedMemBaseAddr[island_rank];
843  }
844 
845  /* allocate the Top-Level tree only once per shared-memory section in the communicator */
846  if(TreeSharedMem_ThisTask == 0)
847  {
848  /* if we have a single shared memory node, or we are building a tree on a subcommunicator, allocate locally */
849  if(TreeSharedMem_NTask == Shmem.World_NTask || D->NTask < Shmem.Sim_NTask)
850  {
851  NodeLevel = (unsigned char *)Mem.mymalloc("NodeLevel", D->NTopleaves * sizeof(unsigned char));
852  NodeSibling = (int *)Mem.mymalloc("NodeSibling", D->NTopleaves * sizeof(int));
853  NodeIndex = (int *)Mem.mymalloc("NodeIndex", D->NTopleaves * sizeof(int));
854 
855  TopNodes = (node *)Mem.mymalloc("TopNodes", D->NTopnodes * sizeof(node));
856  TopNodes -= MaxPart;
857  }
858  else /* otherwise, allocate the storage on the ghost processor, and get the address in our space from him */
859  {
861 
862  /* request the storage from the responsible ghost rank, and map it into the local address space */
863  size_t tab_len[4] = {D->NTopleaves * sizeof(unsigned char), D->NTopleaves * sizeof(int), D->NTopleaves * sizeof(int),
864  D->NTopnodes * sizeof(node)};
865 
866  MPI_Send(tab_len, 4 * sizeof(tab_len), MPI_BYTE, ghost_rank, TAG_TOPNODE_ALLOC, MPI_COMM_WORLD);
867 
868  ptrdiff_t off[4];
869  MPI_Recv(off, 4 * sizeof(ptrdiff_t), MPI_BYTE, ghost_rank, TAG_TOPNODE_OFFSET, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
870 
871  NodeLevel = (unsigned char *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[0]);
872  NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[1]);
873  NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[2]);
874 
875  TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[3]);
876  TopNodes -= MaxPart;
877 
878  MPI_Recv(&TreeInfoHandle, 1, MPI_INT, ghost_rank, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
879  }
880  }
881 
882  Nodes = (node *)Mem.mymalloc_movable(&Nodes, "Nodes", (MaxNodes - D->NTopnodes + 1) * sizeof(node));
883  Nodes -= (MaxPart + D->NTopnodes);
884 
885  if(max_partindex != -1)
886  treeallocate_share_topnode_addresses();
887 }
888 
889 template <typename node, typename partset, typename point_data, typename foreign_point_data>
891 {
892  MPI_Bcast(&TreeInfoHandle, 1, MPI_INT, 0, TreeSharedMemComm);
893 
894  ptrdiff_t off[4] = {((char *)NodeLevel - Mem.Base), ((char *)NodeSibling - Mem.Base), ((char *)NodeIndex - Mem.Base),
895  ((char *)TopNodes - Mem.Base)};
896 
897  MPI_Bcast(off, 4 * sizeof(ptrdiff_t), MPI_BYTE, 0, TreeSharedMemComm);
898 
900  MPI_Bcast(&shmrank, 1, MPI_INT, 0, TreeSharedMemComm);
901 
902  NodeLevel = (unsigned char *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[0]);
903  NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[1]);
904  NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[2]);
905  TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[3]);
906 }
907 
908 template <typename node, typename partset, typename point_data, typename foreign_point_data>
910 {
911  if(D->NTask != Shmem.Sim_NTask)
912  Terminate("This version of the tree communication algorithm only works with the full simulation partition");
913 
914  ptrdiff_t off;
915  off = (char *)Nodes - Mem.Base;
916  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeNodes_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
917  off = (char *)Points - Mem.Base;
918  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreePoints_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
919  off = (char *)Nextnode - Mem.Base;
920  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeNextnode_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
921  off = (char *)Foreign_Nodes - Mem.Base;
922  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Nodes_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
923  off = (char *)Foreign_Points - Mem.Base;
924  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Points_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
925  off = (char *)Tp->P - Mem.Base;
926  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeP_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
927  off = (char *)Tp->SphP - Mem.Base;
928  MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeSphP_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
929 
931  {
932  Shmem.tree_info[TreeInfoHandle].Bd.MaxPart = MaxPart;
933  Shmem.tree_info[TreeInfoHandle].Bd.MaxNodes = MaxNodes;
934  Shmem.tree_info[TreeInfoHandle].Bd.NTopnodes = D->NTopnodes;
935  Shmem.tree_info[TreeInfoHandle].Bd.ImportedNodeOffset = ImportedNodeOffset;
936  Shmem.tree_info[TreeInfoHandle].Bd.EndOfTreePoints = EndOfTreePoints;
937  Shmem.tree_info[TreeInfoHandle].Bd.EndOfForeignNodes = EndOfForeignNodes;
938 
939  // need to inform also our shared shared memory processor
940  if(TreeSharedMem_ThisTask == 0)
941  {
942  MPI_Send(&TreeInfoHandle, 1, MPI_INT, Shmem.MyShmRankInGlobal, TAG_METDATA, MPI_COMM_WORLD);
943  MPI_Send(&All.Ti_Current, sizeof(All.Ti_Current), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_METDATA + 1, MPI_COMM_WORLD);
944  MPI_Send(&Shmem.tree_info[TreeInfoHandle].Bd, sizeof(Shmem.tree_info[TreeInfoHandle].Bd), MPI_BYTE, Shmem.MyShmRankInGlobal,
945  TAG_METDATA + 2, MPI_COMM_WORLD);
946 
947  intposconvert *convfac = Tp;
948  MPI_Send(convfac, sizeof(intposconvert), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_METDATA + 3, MPI_COMM_WORLD);
949  }
950 
951  Shmem.inform_offset_table(TopNodes);
952  Shmem.inform_offset_table(Nodes);
953  Shmem.inform_offset_table(Nextnode);
954  Shmem.inform_offset_table(Points);
955  Shmem.inform_offset_table(Tp->P);
956  Shmem.inform_offset_table(Tp->SphP);
957  Shmem.inform_offset_table(Foreign_Nodes);
958  Shmem.inform_offset_table(Foreign_Points);
959 
960  /* the following is needed to make sure that the shared memory handler on different nodes is already properly initialized */
961  MPI_Barrier(D->Communicator);
962  }
963 }
964 
965 template <typename node, typename partset, typename point_data, typename foreign_point_data>
967 {
969  {
970  if(TreeSharedMem_ThisTask == 0)
971  {
972  // need to send this flag to the correct processor rank (our shared memoin the global communicator
973  MPI_Send(&TreeInfoHandle, 1, MPI_INT, Shmem.MyShmRankInGlobal, TAG_HEADER, MPI_COMM_WORLD);
974  }
975  }
976 }
977 
978 template <typename node, typename partset, typename point_data, typename foreign_point_data>
980 {
981  // find out how many we have to fetch from each node
982  int *CountFetch = (int *)Mem.mymalloc_movable_clear(&CountFetch, "CountFetch", Shmem.World_NTask * sizeof(int));
983  int *OffsetFetch = (int *)Mem.mymalloc_movable(&OffsetFetch, "OffsetFetch", Shmem.World_NTask * sizeof(int));
984 
985  for(int i = 0; i < NumOnFetchStack; i++)
986  CountFetch[StackToFetch[i].GhostRank]++;
987 
988  OffsetFetch[0] = 0;
989  for(int i = 1; i < Shmem.World_NTask; i++)
990  OffsetFetch[i] = OffsetFetch[i - 1] + CountFetch[i - 1];
991 
992  mycxxsort(StackToFetch, StackToFetch + NumOnFetchStack, compare_ghostrank);
993 
994  /* now go through each node in turn, and import from them the requested nodes
995  */
996  int CommPTask;
997  for(CommPTask = 0; Shmem.World_NTask > (1 << CommPTask); CommPTask++)
998  ;
999 
1000  for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
1001  {
1002  int ghost_rank = Shmem.World_ThisTask ^ ngrp; // this is already the responsible ghost rank
1003 
1004  if(ghost_rank < Shmem.World_NTask)
1005  {
1006  if(CountFetch[ghost_rank] > 0)
1007  {
1008  node_req *node_req_send =
1009  (node_req *)Mem.mymalloc_movable(&node_req_send, "node_req_send", CountFetch[ghost_rank] * sizeof(node_req));
1010 
1011  for(int i = 0; i < CountFetch[ghost_rank]; i++)
1012  {
1013  int k = OffsetFetch[ghost_rank] + i;
1014 
1015  node *nop = get_nodep(StackToFetch[k].NodeToOpen, StackToFetch[k].ShmRank);
1016 
1017  node_req_send[i].foreigntask = nop->OriginTask;
1018  node_req_send[i].foreignnode = nop->OriginNode;
1019  }
1020 
1021  /* now make the node request */
1022  int tag;
1023  if(fetch_type == FETCH_GRAVTREE)
1024  tag = TAG_FETCH_GRAVTREE + TreeInfoHandle;
1025  else if(fetch_type == FETCH_SPH_DENSITY)
1026  tag = TAG_FETCH_SPH_DENSITY + TreeInfoHandle;
1027  else if(fetch_type == FETCH_SPH_HYDRO)
1028  tag = TAG_FETCH_SPH_HYDRO + TreeInfoHandle;
1029  else if(fetch_type == FETCH_SPH_TREETIMESTEP)
1030  tag = TAG_FETCH_SPH_TREETIMESTEP + TreeInfoHandle;
1031  else
1032  {
1033  tag = 0;
1034  Terminate("tag undefined");
1035  }
1036 
1037  MPI_Send(node_req_send, CountFetch[ghost_rank] * sizeof(node_req), MPI_BYTE, ghost_rank, tag, MPI_COMM_WORLD);
1038  Mem.myfree(node_req_send);
1039 
1040  // get the information about how many nodes and particles hang below each of the nodes
1041  node_count_info *node_info_send = (node_count_info *)Mem.mymalloc_movable(
1042  &node_info_send, "node_info_send", CountFetch[ghost_rank] * sizeof(node_count_info));
1043 
1044  MPI_Recv(node_info_send, CountFetch[ghost_rank] * sizeof(node_count_info), MPI_BYTE, ghost_rank, TAG_N, MPI_COMM_WORLD,
1045  MPI_STATUS_IGNORE);
1046 
1047  /* now find out how many nodes and points we want to import in total */
1048  int n_sendpoints = 0;
1049  int n_sendnodes = 0;
1050 
1051  for(int i = 0; i < CountFetch[ghost_rank]; i++)
1052  {
1053  n_sendpoints += node_info_send[i].count_parts;
1054  n_sendnodes += node_info_send[i].count_nodes;
1055  }
1056 
1057  foreign_point_data *buf_foreignpoints =
1058  (foreign_point_data *)Mem.mymalloc("buf_foreignpoints", n_sendpoints * sizeof(foreign_point_data));
1059 
1060  node *buf_foreignnodes = (node *)Mem.mymalloc("buf_foreignnodes", n_sendnodes * sizeof(node));
1061 
1062  /* now receive the points and nodes */
1063  if(n_sendpoints > 0)
1064  MPI_Recv(buf_foreignpoints, n_sendpoints * sizeof(foreign_point_data), MPI_BYTE, ghost_rank, TAG_PDATA, MPI_COMM_WORLD,
1065  MPI_STATUS_IGNORE);
1066 
1067  if(n_sendnodes > 0)
1068  MPI_Recv(buf_foreignnodes, n_sendnodes * sizeof(node), MPI_BYTE, ghost_rank, TAG_SPHDATA, MPI_COMM_WORLD,
1069  MPI_STATUS_IGNORE);
1070 
1071  /* now we have to link the nodes and particles into the tree */
1072 
1073  int n_nodes_used = 0;
1074  int n_parts_used = 0;
1075 
1076  for(int i = 0; i < CountFetch[ghost_rank]; i++)
1077  {
1078  int k = OffsetFetch[ghost_rank] + i;
1079 
1080  int no = StackToFetch[k].NodeToOpen;
1081  int shmrank = StackToFetch[k].ShmRank;
1082 
1083  node *nop = get_nodep(no, shmrank);
1084 
1085  int n_nodes = node_info_send[i].count_nodes;
1086  int n_parts = node_info_send[i].count_parts;
1087 
1088  while(nop->access.test_and_set(std::memory_order_acquire))
1089  {
1090  // acquire spin lock
1091  }
1092 
1093  if(nop->cannot_be_opened_locally) // make sure the node hasn't been inserted by another task yet
1094  {
1095  int pfirst = -1;
1096  int plast = -1;
1097 
1098  for(int j = 0; j < n_nodes; j++)
1099  {
1100  if(NumForeignNodes >= MaxForeignNodes)
1101  Terminate(
1102  "We are out of storage for foreign nodes: NumForeignNodes=%d MaxForeignNodes=%d j=%d n_parts=%d",
1103  NumForeignNodes, MaxForeignNodes, j, n_parts);
1104 
1105  /* tree index of this node */
1106  int p = EndOfTreePoints + NumForeignNodes++;
1107 
1108  // cannot do a Foreign_Nodes[p - EndOfTreePoints] = buf_foreignnodes[n_nodes_used++]; because out
1109  // std::atomic_flag has a deleted default copy operator
1110  memcpy(static_cast<void *>(&Foreign_Nodes[p - EndOfTreePoints]),
1111  static_cast<void *>(&buf_foreignnodes[n_nodes_used++]), sizeof(node));
1112 
1113  Foreign_Nodes[p - EndOfTreePoints].access.clear();
1114  Foreign_Nodes[p - EndOfTreePoints].flag_already_fetched = 0;
1115 
1116  Foreign_Nodes[p - EndOfTreePoints].nextnode = -1;
1117 
1118  if(plast >= 0) /* this is here still the previous one */
1119  {
1120  if(plast < EndOfForeignNodes) /* have a foreign node */
1121  {
1122  Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1123  Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = Shmem.Island_ThisTask;
1124  }
1125  else
1126  {
1127  Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1128  Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = Shmem.Island_ThisTask;
1129  }
1130  }
1131 
1132  if(pfirst < 0)
1133  pfirst = p;
1134 
1135  plast = p;
1136  }
1137 
1138  for(int j = 0; j < n_parts; j++)
1139  {
1140  if(NumForeignPoints >= MaxForeignPoints)
1141  Terminate(
1142  "We are out of storage for foreign points: NumForeignPoints=%d MaxForeignPoints=%d j=%d n_parts=%d",
1143  NumForeignPoints, MaxForeignPoints, j, n_parts);
1144 
1145  /* global tree index of this foreign point */
1146  int p = EndOfForeignNodes + NumForeignPoints++;
1147  Foreign_Points[p - EndOfForeignNodes] = buf_foreignpoints[n_parts_used++];
1148 
1149  if(plast >= 0) /* this is here still the previous one */
1150  {
1151  if(plast < EndOfForeignNodes) /* have a foreign node */
1152  {
1153  Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1154  Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = Shmem.Island_ThisTask;
1155  }
1156  else
1157  {
1158  Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1159  Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = Shmem.Island_ThisTask;
1160  }
1161  }
1162 
1163  if(pfirst < 0)
1164  pfirst = p;
1165 
1166  plast = p;
1167  }
1168 
1169  if(plast < 0 || pfirst < 0)
1170  Terminate("plast=%d < 0 || pfirst=%d < 0 n_nodes=%d n_parts=%d", plast, pfirst, n_nodes, n_parts);
1171 
1172  if(plast >= 0) /* this is here still the previous one */
1173  {
1174  if(plast < EndOfForeignNodes) /* have a foreign node */
1175  {
1176  Foreign_Nodes[plast - EndOfTreePoints].sibling = nop->sibling;
1177  Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = nop->sibling_shmrank;
1178 
1179  if(Foreign_Nodes[plast - EndOfTreePoints].cannot_be_opened_locally == 0)
1180  if(D->ThisTask == 0)
1181  Terminate("what? plast - EndOfTreePoints=%d", plast - EndOfTreePoints);
1182  }
1183  else
1184  {
1185  Foreign_Points[plast - EndOfForeignNodes].Nextnode = nop->sibling;
1186  Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = nop->sibling_shmrank;
1187  }
1188  }
1189 
1190  nop->nextnode = pfirst;
1191  nop->nextnode_shmrank = Shmem.Island_ThisTask;
1192 
1193  nop->cannot_be_opened_locally.store(0, std::memory_order_release);
1194 
1195  sum_NumForeignNodes += n_nodes;
1196  sum_NumForeignPoints += n_parts;
1197  }
1198  else
1199  {
1200  // skip this node, because apparently it was fetched in the meantime by another mpi task
1201  n_nodes_used += n_nodes;
1202  n_parts_used += n_parts;
1203  }
1204 
1205  nop->access.clear(std::memory_order_release);
1206  }
1207 
1208  if(n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used)
1209  Terminate("n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used");
1210 
1211  Mem.myfree(buf_foreignnodes);
1212  Mem.myfree(buf_foreignpoints);
1213  Mem.myfree(node_info_send);
1214  }
1215  }
1216  }
1217 
1218  NumOnFetchStack = 0;
1219 
1220  Mem.myfree(OffsetFetch);
1221  Mem.myfree(CountFetch);
1222 }
1223 
1227 template <typename node, typename partset, typename point_data, typename foreign_point_data>
1229 {
1230  MPI_Comm_free(&TreeSharedMemComm);
1231 
1232  if(MaxPart == 0)
1233  return; // nothing to be done
1234 
1235  if(Nodes)
1236  {
1237  if(Father)
1238  {
1239  Mem.myfree_movable(Father);
1240  Mem.myfree_movable(Nextnode);
1241  }
1242  if(Points)
1243  {
1244  Mem.myfree_movable(Points);
1245  }
1246 
1247  Mem.myfree_movable(Nodes + MaxPart + D->NTopnodes);
1248 
1249  if(TreeSharedMem_ThisTask == 0)
1250  {
1251  if(TreeSharedMem_NTask == Shmem.World_NTask || D->NTask < Shmem.Sim_NTask)
1252  {
1253  Mem.myfree_movable(TopNodes + MaxPart);
1254  Mem.myfree_movable(NodeIndex);
1255  Mem.myfree_movable(NodeSibling);
1256  Mem.myfree_movable(NodeLevel);
1257  }
1258  else
1259  {
1261 
1262  // tell the ghost rank to free the storage
1263  MPI_Send(&TreeInfoHandle, 1, MPI_INT, ghost_rank, TAG_TOPNODE_FREE, MPI_COMM_WORLD);
1264  }
1265  }
1266 
1267  Mem.myfree(TreeSharedMemBaseAddr);
1268 
1269  Mem.myfree(TreePS_offsets);
1270  Mem.myfree(TreeSphP_offsets);
1271  Mem.myfree(TreeP_offsets);
1272  Mem.myfree(TreeForeign_Points_offsets);
1273  Mem.myfree(TreeForeign_Nodes_offsets);
1274  Mem.myfree(TreeNextnode_offsets);
1275  Mem.myfree(TreePoints_offsets);
1276  Mem.myfree(TreeNodes_offsets);
1277 
1278  Mem.myfree_movable(Recv_offset);
1279  Mem.myfree_movable(Recv_count);
1280  Mem.myfree_movable(Send_offset);
1281  Mem.myfree_movable(Send_count);
1282 
1283  Nodes = NULL;
1284  TopNodes = NULL;
1285  NodeIndex = NULL;
1286  NodeSibling = NULL;
1287  NodeLevel = NULL;
1288  Points = NULL;
1289  Nextnode = NULL;
1290  Father = NULL;
1291  }
1292  else
1293  Terminate("trying to free the tree even though it's not allocated");
1294 }
1295 
1296 template <typename node, typename partset, typename point_data, typename foreign_point_data>
1298  offset_tuple off)
1299 {
1300  int task = D->TaskOfLeaf[no - (MaxPart + MaxNodes)];
1301  int nodeindex = NodeIndex[no - (MaxPart + MaxNodes)];
1302 
1303  tree_export_node_threads_by_task_and_node(task, nodeindex, i, thread, off);
1304 }
1305 
1306 template <typename node, typename partset, typename point_data, typename foreign_point_data>
1308  thread_data *thread,
1309  offset_tuple off)
1310 {
1311  if(task < 0 || task >= D->NTask)
1312  Terminate("task < 0 || task >= D->NTask");
1313 
1314  if(task != D->ThisTask)
1315  {
1316  if(thread->Exportflag[task] != i)
1317  {
1318  thread->Exportflag[task] = i;
1319  int nexp = thread->Nexport++;
1320  thread->PartList[nexp].Task = task;
1321  thread->PartList[nexp].Index = i;
1322  thread->ExportSpace -= thread->ItemSize;
1323  }
1324 
1325  int nexp = thread->NexportNodes++;
1326  nexp = -1 - nexp;
1327  data_nodelist *nodelist = (data_nodelist *)(((char *)thread->PartList) + thread->InitialSpace);
1328  nodelist[nexp].Task = task;
1329  nodelist[nexp].Index = i;
1330  nodelist[nexp].NodeInfo.Node = nodeindex;
1331  thread->ExportSpace -= (sizeof(data_nodelist) + sizeof(int));
1332  }
1333 }
1334 
1335 #include "../ngbtree/ngbtree.h"
1337 
1338 #include "../gravtree/gravtree.h"
1340 
1341 #ifdef FOF
1342 #include "../fof/foftree.h"
1344 #if defined(LIGHTCONE) && (defined(LIGHTCONE_PARTICLES_GROUPS) || defined(LIGHTCONE_IMAGE_COMP_HSML_VELDISP))
1345 /* make sure that we instantiate the template */
1346 #include "../data/lcparticles.h"
1349 #endif
1350 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:68
char * Base
Definition: mymalloc.h:49
MyIntPosType ls
Definition: dtypes.h:210
int * GetNodeIDForSimulCommRank
int Island_ThisTask
void inform_offset_table(void *p)
int Island_NTask
void ** SharedMemBaseAddr
int * GetShmRankForSimulCommRank
tree_storage_info tree_info[MAX_TREE_INFOS]
int Island_Smallest_WorldTask
int Sim_ThisTask
int * GetGhostRankForSimulCommRank
int MyShmRankInGlobal
int World_ThisTask
Definition: tree.h:91
void tree_export_node_threads(int no, int i, thread_data *thread, offset_tuple off=0)
Definition: tree.cc:1297
void treeallocate_share_topnode_addresses(void)
Definition: tree.cc:890
void cleanup_shared_memory_access(void)
Definition: tree.cc:966
void treeallocate(int max_partindex, partset *Pptr, domain< partset > *Dptr)
Definition: tree.cc:776
void tree_fetch_foreign_nodes(enum ftype fetch_type)
Definition: tree.cc:979
void prepare_shared_memory_access(void)
Definition: tree.cc:909
void treefree(void)
Definition: tree.cc:1228
int treebuild(int ninsert, int *indexlist)
Definition: tree.cc:52
ftype
Definition: tree.h:265
void tree_initialize_leaf_node_access_info(void)
Definition: tree.cc:174
void tree_export_node_threads_by_task_and_node(int task, int nodeindex, int i, thread_data *thread, offset_tuple off=0)
Definition: tree.cc:1307
#define MAXLEN_PATH
Definition: constants.h:300
#define BASENUMBER
Definition: constants.h:303
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
#define MAX_TREE_ALLOC_FACTOR
Definition: gravtree.h:39
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:19
shmem Shmem
Definition: main.cc:45
#define TAG_SPHDATA
Definition: mpi_utils.h:28
#define TAG_HEADER
Definition: mpi_utils.h:26
#define TAG_FETCH_SPH_DENSITY
Definition: mpi_utils.h:103
#define TAG_FETCH_SPH_TREETIMESTEP
Definition: mpi_utils.h:105
#define TAG_DENS_A
Definition: mpi_utils.h:50
#define TAG_TOPNODE_FREE
Definition: mpi_utils.h:19
#define TAG_PDATA
Definition: mpi_utils.h:27
#define TAG_FETCH_GRAVTREE
Definition: mpi_utils.h:102
#define TAG_TOPNODE_ALLOC
Definition: mpi_utils.h:21
#define TAG_N
Definition: mpi_utils.h:25
#define TAG_FETCH_SPH_HYDRO
Definition: mpi_utils.h:104
#define TAG_TOPNODE_OFFSET
Definition: mpi_utils.h:20
#define TAG_METDATA
Definition: mpi_utils.h:101
memory Mem
Definition: main.cc:44
peanokey peano_hilbert_key(MyIntPosType x, MyIntPosType y, MyIntPosType z, int bits)
Definition: peano.cc:83
int Index
Definition: tree.h:84
node_info NodeInfo
Definition: tree.h:85
int Task
Definition: tree.h:83
integertime Ti_Current
Definition: allvars.h:188
int Node
Definition: tree.h:78
size_t ItemSize
Definition: dtypes.h:345
int NexportNodes
Definition: dtypes.h:336
int * Exportflag
Definition: dtypes.h:354
size_t InitialSpace
Definition: dtypes.h:344
size_t ExportSpace
Definition: dtypes.h:343
data_partlist * PartList
Definition: dtypes.h:351
int Nexport
Definition: dtypes.h:335
int foreigntask
Definition: tree.h:257
int foreignnode
Definition: tree.h:258
#define TREE_MAX_ITER
Definition: tree.h:24
#define TREE_MODE_BRANCH
Definition: tree.h:19
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16
#define TREE_MODE_TOPLEVEL
Definition: tree.h:20