GADGET-4
halotrees.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef MERGERTREE
15 
16 #include <gsl/gsl_rng.h>
17 #include <hdf5.h>
18 #include <math.h>
19 #include <mpi.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26 
27 #include "../data/allvars.h"
28 #include "../data/dtypes.h"
29 #include "../data/mymalloc.h"
30 #include "../fof/fof.h"
31 #include "../fof/fof_io.h"
32 #include "../io/hdf5_util.h"
33 #include "../io/io.h"
34 #include "../logs/timer.h"
35 #include "../main/main.h"
36 #include "../main/simulation.h"
37 #include "../mergertree/io_descendant.h"
38 #include "../mergertree/io_halotrees.h"
39 #include "../mergertree/io_progenitors.h"
40 #include "../mergertree/io_treelinks.h"
41 #include "../mergertree/mergertree.h"
42 #include "../mpi_utils/mpi_utils.h"
43 #include "../sort/parallel_sort.h"
44 #include "../subfind/subfind.h"
45 #include "../system/system.h"
46 
47 /* This is the main function for constructing the halo trees.
48  */
49 void mergertree::halotrees_construct(int lastsnapnr)
50 {
51  domain<simparticles> Domain{Communicator, Sp};
52  fof<simparticles> FoF{Communicator, Sp, &Domain};
53 
54  LastSnapShotNr = lastsnapnr;
55 
56  Cats = (halo_catalogue *)Mem.mymalloc_clear("Cats", sizeof(halo_catalogue) * (LastSnapShotNr + 1));
57  CatTimes = (times_catalogue *)Mem.mymalloc("CatTimes", sizeof(times_catalogue) * (LastSnapShotNr + 1));
58 
59  /* Let's load all catalogs. */
60  halotrees_load_catalogues(&FoF);
61  mpi_printf("\nMERGERTREE: Catalogues loaded\n");
62 
63  /* As preliminary work, let everybody know the number of subhalos on each rank, and assign a global SubhaloNr to each. */
64  halotrees_assign_global_subhalonr_and_groupnr();
65  mpi_printf("\nMERGERTREE: SubhaloNr assigned\n");
66 
67  /* Assign the halos to disjoint trees - initially, each halo is in its own tree, which will then be linked up to form bigger trees.
68  * To get a reasonably memory-balanced distribution, we assign random tasks to them initially. */
69  halotrees_initial_treeassignment();
70  mpi_printf("\nMERGERTREE: Initial tree assignment\n");
71 
72  /* We now proceed by linking the halos according to the descendant information */
73  halotrees_link_trees();
74  mpi_printf("\nMERGERTREE: Halo tree linked\n");
75 
76  /* because the previously selected first progenitor is not necessarily the most significant branch, we make a new choice
77  * by sorting the progenitors by their significance, and then redetermine the first progenitor among the set of progenitors */
78  halotrees_determine_mainprogenitor();
79  mpi_printf("\nMERGERTREE: Determined the main progenitor\n");
80 
81  /* Now the trees are linked, i.e. all halos belonging to the same tree have a common tree-id. We proceed by assigning new indices to
82  * all halos within the same treeid. */
83  halotrees_assign_new_treeindices();
84  mpi_printf("\nMERGERTREE: New tree indices assigned\n");
85 
86  /* next, we need to remap the global descendant and first/next progenitor pointers to another set of such pointers that use the local
87  * tree ids */
88  halotrees_remap_treepointers();
89  mpi_printf("\nMERGERTREE: Remapping done\n");
90 
91  /* now we can go ahead and move the halos belonging to the same tree together, and set up a table with the trees */
92  halotrees_collect_treehalos();
93  mpi_printf("\nMERGERTREE: Treehalos collected\n\n");
94 
95  /* finally, output the trees to files */
96  halotrees_io TreesIO{this, this->Communicator, All.SnapFormat};
97  TreesIO.halotrees_save_trees();
98 
99  mpi_printf("\nMERGERTREE: Trees saved: A total number of %lld subhalos have been assembled in %lld trees.", TotNhalos, TotNtrees);
100  mpi_printf("\nMERGERTREE: Largest tree contains %d halos.\n\n", LargestHaloCount);
101 
102  /* now also output links from the subhalo catalogues to the trees */
103  halotrees_save_subhalo_treelinks();
104  mpi_printf("\nMERGERTREE: Tree links saved\n\n");
105 }
106 
107 /* This function is in charge of loading all the group catalogues.
108  */
109 void mergertree::halotrees_load_catalogues(fof<simparticles> *FoF)
110 {
111  /* let's first get the catalogs */
112  for(int num = 0; num <= LastSnapShotNr; num++)
113  {
114  /* load the group catalog */
115  fof_io<simparticles> FoF_IO{FoF, Communicator, All.SnapFormat};
116  FoF_IO.fof_subfind_load_groups(num);
117 
118  /* bring all groups and subhalos into the order in which they were stored in the files, because the
119  * parallel load routine may have mangled up the order.
120  */
121  mycxxsort_parallel(FoF->Group, FoF->Group + FoF->Ngroups, compare_Group_FileOffset, Communicator);
122  mycxxsort_parallel(FoF->Subhalo, FoF->Subhalo + FoF->Nsubhalos, compare_Subhalo_FileOffset, Communicator);
123 
124  if(FoF_IO.LegacyFormat)
125  {
126  mpi_printf("\nFOF/SUBFIND: Legacy format from Arepo detected, trying to adjust for this.\n");
127  FoF->subfind_redetermine_groupnr();
128  FoF->fof_assign_group_offset();
129  FoF->subfind_assign_subhalo_offsettype();
130  }
131 
132  /* save the groups info for later use */
133  Cats[num].Ngroups = FoF->Ngroups;
134  Cats[num].TotNgroups = FoF->TotNgroups;
135 
136  Cats[num].Nsubhalos = FoF->Nsubhalos;
137  Cats[num].TotNsubhalos = FoF->TotNsubhalos;
138 
139  CatTimes[num].Time = FoF->Time;
140  CatTimes[num].Redshift = FoF->Redshift;
141 
142  // we copy the group catalogue data here. Simply setting the pointer will violate the rule to not
143  // copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
144  Cats[num].Group = (fof<simparticles>::group_properties *)Mem.mymalloc_movable(
145  &Cats[num].Group, "Cats[num].Group", FoF->Ngroups * sizeof(fof<simparticles>::group_properties));
146  memcpy(Cats[num].Group, FoF->Group, FoF->Ngroups * sizeof(fof<simparticles>::group_properties));
147  Mem.myfree_movable(FoF->Group);
148 
149  Cats[num].Subhalo = (fof<simparticles>::subhalo_properties *)Mem.mymalloc_movable(
150  &Cats[num].Subhalo, "Subhalo", Cats[num].Nsubhalos * sizeof(fof<simparticles>::subhalo_properties));
151  memcpy(Cats[num].Subhalo, FoF->Subhalo, Cats[num].Nsubhalos * sizeof(fof<simparticles>::subhalo_properties));
152  Mem.myfree_movable(FoF->Subhalo);
153 
154  /* allocate some storage for extra subhalo info */
155  Cats[num].SubExt =
156  (subhalo_extension *)Mem.mymalloc_movable(&Cats[num].SubExt, "Cats[num].SubExt", FoF->Nsubhalos * sizeof(subhalo_extension));
157  }
158 
159  /* now let's load the descendant tree information (except for the last snapshot) */
160  for(int num = 0; num < LastSnapShotNr; num++)
161  {
162  /* fetch the descendants for the catalogs */
163  descendant_io DescIO{this, this->Communicator, All.SnapFormat};
164  DescIO.mergertree_read_descendants(num);
165 
166  /* if needed, restore proper order as in files */
167  mycxxsort_parallel(Descendants, Descendants + DescIO.Nsubhalos, compare_Desc_FileOffset, Communicator);
168 
169  if(DescIO.TotNsubhalos != Cats[num].TotNsubhalos)
170  Terminate("inconsistency: DescIO.TotNsubhalos=%lld != Cats[num].TotNsubhalos=%lld", DescIO.TotNsubhalos,
171  Cats[num].TotNsubhalos);
172 
173  /* it's possible that the local number of descendants, Nsubhalos, does not match Cats[num].Nsubhalos,
174  * implying that we need to reshuffle how things are distributed over processors.
175  */
176  halotrees_reshuffle((char **)&Descendants, sizeof(desc_list), DescIO.Nsubhalos, Cats[num].Nsubhalos);
177 
178  /* retain info for later use
179  * we copy the data here. Simply setting the pointer will violate the rule to not
180  * copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
181  */
182  Cats[num].Descendants =
183  (desc_list *)Mem.mymalloc_movable(&Cats[num].Descendants, "Cats[num].Descendants", Cats[num].Nsubhalos * sizeof(desc_list));
184  memcpy(Cats[num].Descendants, Descendants, Cats[num].Nsubhalos * sizeof(desc_list));
185  Mem.myfree_movable(Descendants);
186  }
187 
188  /* load the progenitor tree information (except for the first snapshot) */
189  for(int num = 1; num <= LastSnapShotNr; num++)
190  {
191  /* fetch from files */
192  progenitors_io ProgIO{this, this->Communicator, All.SnapFormat};
193  ProgIO.mergertree_read_progenitors(num);
194 
195  /* if needed, restore proper order as in files */
196  mycxxsort_parallel(Progenitors, Progenitors + ProgIO.Nsubhalos, compare_Prog_FileOffset, Communicator);
197 
198  if(ProgIO.TotNsubhalos != Cats[num].TotNsubhalos)
199  Terminate("inconsistency: ProgIO.TotNsubhalos=%lld != Cats[num].TotNsubhalos=%lld", ProgIO.TotNsubhalos,
200  Cats[num].TotNsubhalos);
201 
202  /* it's possible that the local number of descendants, Nsubhalos, does not match Cats[num].Nsubhalos,
203  * implying that we need to reshuffle how things are distributed over processors.
204  */
205  halotrees_reshuffle((char **)&Progenitors, sizeof(prog_list), ProgIO.Nsubhalos, Cats[num].Nsubhalos);
206 
207  /* retain info for later use
208  * we copy the data here. Simply setting the pointer will violate the rule to not
209  * copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
210  */
211  Cats[num].Progenitors =
212  (prog_list *)Mem.mymalloc_movable(&Cats[num].Progenitors, "Cats[num].Progenitors", Cats[num].Nsubhalos * sizeof(prog_list));
213  memcpy(Cats[num].Progenitors, Progenitors, Cats[num].Nsubhalos * sizeof(prog_list));
214  Mem.myfree_movable(Progenitors);
215  }
216 }
217 
218 void mergertree::halotrees_save_subhalo_treelinks(void)
219 {
220  for(int num = 0; num <= LastSnapShotNr; num++)
221  {
222  treelinks_io TreeLinkIO{this, this->Communicator, All.SnapFormat};
223 
224  TreeLinkIO.Nsubhalos = Cats[num].Nsubhalos;
225  TreeLinkIO.TotNsubhalos = Cats[num].TotNsubhalos;
226 
227  TreeLink = (treelink_data *)Mem.mymalloc("TreeLink", TreeLinkIO.Nsubhalos * sizeof(treelink_data));
228  for(int i = 0; i < TreeLinkIO.Nsubhalos; i++)
229  {
230  TreeLink[i].TreeID = Cats[num].Subhalo[i].TreeID;
231  TreeLink[i].TreeIndex = Cats[num].Subhalo[i].TreeIndex;
232  }
233 
234  /* save the tree-link info */
235  TreeLinkIO.treelinks_save(num);
236 
237  Mem.myfree(TreeLink);
238  }
239 }
240 
241 /* Here we collect the group and subhalo numbers stored on each processor for each snapshot, for communication purposes.
242  * We also assign for the subhalos of each catalogue a running global subhalo number which is then a unique identifier within this
243  * snapshot's catalogue.
244  */
245 void mergertree::halotrees_assign_global_subhalonr_and_groupnr(void)
246 {
247  long long grprev = 0;
248 
249  for(int num = 0; num <= LastSnapShotNr; num++)
250  {
251  Cats[num].TabNgroups = (int *)Mem.mymalloc("Cats[num].TabNgroups", NTask * sizeof(int));
252  MPI_Allgather(&Cats[num].Ngroups, 1, MPI_INT, Cats[num].TabNgroups, 1, MPI_INT, Communicator);
253 
254  Cats[num].TabNsubhalos = (int *)Mem.mymalloc("Cats[num].TabNsubhalos", NTask * sizeof(int));
255  MPI_Allgather(&Cats[num].Nsubhalos, 1, MPI_INT, Cats[num].TabNsubhalos, 1, MPI_INT, Communicator);
256 
257  long long subprev = 0;
258 
259  for(int i = 0; i < ThisTask; i++)
260  subprev += Cats[num].TabNsubhalos[i];
261 
262  for(int i = 0; i < Cats[num].Nsubhalos; i++)
263  Cats[num].Subhalo[i].SubhaloNr = subprev + i;
264 
265  /* Note: SubhaloNr should be now the quantity to which Descendant/FirstProgenitor/NextProgenitor refer to.
266  */
267 
268  /* also set the Group[].GroupNr field (was not stored in the field that were read in)
269  * In contrast, Subhalo[].GroupNr was read in */
270  long long nbefore = 0;
271 
272  for(int i = 0; i < ThisTask; i++)
273  nbefore += Cats[num].TabNgroups[i];
274 
275  for(int i = 0; i < Cats[num].Ngroups; i++)
276  Cats[num].Group[i].GroupNr = nbefore + i;
277 
278  /* define a GroupNr field for a unique group number */
279  for(int i = 0; i < Cats[num].Nsubhalos; i++)
280  Cats[num].Subhalo[i].UniqueGroupNr = Cats[num].Subhalo[i].GroupNr + grprev;
281 
282  grprev += Cats[num].TotNgroups;
283 
284  /* assign some special properties to the subhalos in the tree which come from the FOF group catalogue (like M200, etc.) */
285 
286  for(int i = 0; i < Cats[num].Nsubhalos; i++)
287  Cats[num].Subhalo[i].M_Crit200 = 0;
288 
289  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
290  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
291  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
292  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
293 
294  struct exch_data
295  {
296  long long GroupNr;
297  MyFloat M_Crit200;
298  int loc_index;
299  };
300 
301  exch_data *export_data = NULL, *import_data = NULL;
302  int nimport = 0, nexport = 0;
303 
304  /* for communication bookkeeping reasons, we traverse the counting pattern twice */
305  for(int mode = 0; mode < 2; mode++)
306  {
307  for(int i = 0; i < NTask; i++)
308  Send_count[i] = 0;
309 
310  int target = 0;
311  long long ngroup_previous = 0;
312 
313  for(int i = 0; i < Cats[num].Nsubhalos; i++)
314  {
315  /* select only the main subhalos */
316  if(Cats[num].Subhalo[i].SubRankInGr == 0)
317  {
318  while(target < NTask - 1 && Cats[num].Subhalo[i].GroupNr >= (ngroup_previous + Cats[num].TabNgroups[target]))
319  {
320  ngroup_previous += Cats[num].TabNgroups[target];
321  target++;
322  }
323 
324  if(mode == 0)
325  Send_count[target]++;
326  else
327  {
328  int off = Send_offset[target] + Send_count[target]++;
329 
330  export_data[off].loc_index = i;
331  export_data[off].GroupNr = Cats[num].Subhalo[i].GroupNr;
332  }
333  }
334  }
335 
336  if(mode == 0)
337  {
338  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
339  Recv_offset[0] = Send_offset[0] = 0;
340  for(int j = 0; j < NTask; j++)
341  {
342  nimport += Recv_count[j];
343  nexport += Send_count[j];
344  if(j > 0)
345  {
346  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
347  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
348  }
349  }
350 
351  export_data = (exch_data *)Mem.mymalloc("export_data", nexport * sizeof(exch_data));
352  import_data = (exch_data *)Mem.mymalloc("import_data", nimport * sizeof(exch_data));
353  }
354  }
355 
356  /* send data to target processors */
357  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
358  {
359  int recvTask = ThisTask ^ ngrp;
360  if(recvTask < NTask)
361  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
362  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask,
363  TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE,
364  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
365  }
366 
367  long long firstgrnr = 0;
368  for(int i = 0; i < ThisTask; i++)
369  firstgrnr += Cats[num].TabNgroups[i];
370 
371  /* now read out the information we want from the groups */
372 
373  for(int i = 0; i < nimport; i++)
374  {
375  int index = import_data[i].GroupNr - firstgrnr;
376 
377  if(Cats[num].Group[index].GroupNr != import_data[i].GroupNr)
378  Terminate(
379  "bummer: num=%d i=%d Cats[num].Ngroups=%d nimport=%d index=%d Cats[num].Group[index].GroupNr=%lld "
380  "import_data[i].GroupNr=%lld\n",
381  num, i, Cats[num].Ngroups, nimport, index, Cats[num].Group[index].GroupNr, import_data[i].GroupNr);
382 
383  import_data[i].M_Crit200 = Cats[num].Group[index].M_Crit200;
384  }
385 
386  /* send the results back */
387  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
388  {
389  int recvTask = ThisTask ^ ngrp;
390  if(recvTask < NTask)
391  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
392  MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask,
393  TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE,
394  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
395  }
396 
397  /* now read it out and assign the data */
398  for(int i = 0; i < nexport; i++)
399  Cats[num].Subhalo[export_data[i].loc_index].M_Crit200 = export_data[i].M_Crit200;
400 
401  Mem.myfree(import_data);
402  Mem.myfree(export_data);
403 
404  Mem.myfree(Recv_offset);
405  Mem.myfree(Recv_count);
406  Mem.myfree(Send_offset);
407  Mem.myfree(Send_count);
408  }
409 }
410 
411 /* Give initially each subhalo its own unique TreeID, and a randomly placed processor.
412  */
413 void mergertree::halotrees_initial_treeassignment(void)
414 {
415  long long previous = 0;
416  for(int num = LastSnapShotNr; num >= 0; num--)
417  {
418  long long prevtask = 0;
419  for(int i = 0; i < ThisTask; i++)
420  prevtask += Cats[num].TabNsubhalos[i];
421 
422  for(int i = 0; i < Cats[num].Nsubhalos; i++)
423  {
424  Cats[num].Subhalo[i].TreeTask = (int)(get_random_number() * NTask);
425  Cats[num].Subhalo[i].TreeID = previous + prevtask + i;
426  }
427 
428  previous += Cats[num].TotNsubhalos;
429  }
430 }
431 
432 void mergertree::halotrees_select_interior_min_newtreeid(int mode, tlink *treehalos, long long totnsubs)
433 {
434  for(int backforth = -1; backforth <= 1; backforth += 2)
435  {
436  long long i;
437 
438  if(backforth == -1)
439  i = 0;
440  else
441  i = totnsubs - 2;
442 
443  while(i >= 0 && i <= totnsubs - 2)
444  {
445  if((mode == 0 && treehalos[i].UniqueGroupNr == treehalos[i + 1].UniqueGroupNr) ||
446  (mode == 1 && treehalos[i].TreeID == treehalos[i + 1].TreeID))
447  {
448  if(treehalos[i].NewTreeID > treehalos[i + 1].NewTreeID)
449  {
450  treehalos[i].NewTreeID = treehalos[i + 1].NewTreeID;
451  treehalos[i].NewTreeTask = treehalos[i + 1].NewTreeTask;
452  }
453  else if(treehalos[i].NewTreeID < treehalos[i + 1].NewTreeID)
454  {
455  treehalos[i + 1].NewTreeID = treehalos[i].NewTreeID;
456  treehalos[i + 1].NewTreeTask = treehalos[i].NewTreeTask;
457  }
458  }
459  if(backforth == -1)
460  i++;
461  else
462  i--;
463  }
464  }
465 }
466 
467 long long mergertree::halotrees_join_trees_via_fof_or_mostboundid_bridges(int mode)
468 {
469  long long totnsubs = 0;
470 
471  for(int num = 0; num <= LastSnapShotNr; num++)
472  totnsubs += Cats[num].Nsubhalos;
473 
474  tlink *treehalos = (tlink *)Mem.mymalloc("treehalos", (totnsubs + 1) * sizeof(tlink));
475 
476  long long count = 0;
477 
478  for(int num = 0; num <= LastSnapShotNr; num++)
479  for(int i = 0; i < Cats[num].Nsubhalos; i++)
480  {
481  treehalos[count].TreeID = Cats[num].Subhalo[i].TreeID;
482  treehalos[count].TreeTask = Cats[num].Subhalo[i].TreeTask;
483 
484  treehalos[count].NewTreeID = Cats[num].Subhalo[i].TreeID;
485  treehalos[count].NewTreeTask = Cats[num].Subhalo[i].TreeTask;
486 
487  treehalos[count].OrigTask = ThisTask;
488  treehalos[count].OrderIndex = count;
489 
490  if(mode == 0)
491  treehalos[count].UniqueGroupNr = Cats[num].Subhalo[i].UniqueGroupNr;
492  else
493  treehalos[count].UniqueGroupNr = Cats[num].Subhalo[i].SubMostBoundID;
494 
495  count++;
496  }
497 
498  long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
499  MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
500 
501  int next_task = -1;
502  for(int i = ThisTask + 1; i < NTask; i++)
503  if(count_list[i] > 0)
504  {
505  next_task = i;
506  break;
507  }
508 
509  int prev_task = -1;
510  for(int i = ThisTask - 1; i >= 0; i--)
511  if(count_list[i] > 0)
512  {
513  prev_task = i;
514  break;
515  }
516 
517  tlink *elem_first = (tlink *)Mem.mymalloc("elem_first", NTask * sizeof(tlink));
518  tlink *elem_last = (tlink *)Mem.mymalloc("elem_last", NTask * sizeof(tlink));
519 
520  for(int mode = 0; mode < 2; mode++)
521  {
522  if(mode == 0)
523  {
524  /* bring halos in the same group together */
525  mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_GroupNr, Communicator);
526  halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
527  }
528  else
529  {
530  /* bring halos in the same tree together */
531  mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_TreeID, Communicator);
532  halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
533  }
534 
535  long long totchanges = 0;
536  int iter = 0;
537  do
538  {
539  int changes = 0;
540 
541  /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
542  MPI_Allgather(&treehalos[0], sizeof(tlink), MPI_BYTE, elem_first, sizeof(tlink), MPI_BYTE, Communicator);
543  MPI_Allgather(&treehalos[totnsubs > 0 ? totnsubs - 1 : 0], sizeof(tlink), MPI_BYTE, elem_last, sizeof(tlink), MPI_BYTE,
544  Communicator);
545 
546  if(prev_task >= 0 && totnsubs > 0 &&
547  ((mode == 0 && elem_last[prev_task].UniqueGroupNr == treehalos[0].UniqueGroupNr) ||
548  (mode == 1 && elem_last[prev_task].TreeID == treehalos[0].TreeID)) &&
549  elem_last[prev_task].NewTreeID < treehalos[0].NewTreeID)
550  {
551  treehalos[0].NewTreeID = elem_last[prev_task].NewTreeID;
552  treehalos[0].NewTreeTask = elem_last[prev_task].NewTreeTask;
553  changes++;
554  }
555 
556  if(next_task >= 0 && totnsubs > 0 &&
557  ((mode == 0 && elem_first[next_task].UniqueGroupNr == treehalos[totnsubs - 1].UniqueGroupNr) ||
558  (mode == 1 && elem_first[next_task].TreeID == treehalos[totnsubs - 1].TreeID)) &&
559  elem_first[next_task].NewTreeID < treehalos[totnsubs - 1].NewTreeID)
560  {
561  treehalos[totnsubs - 1].NewTreeID = elem_first[next_task].NewTreeID;
562  treehalos[totnsubs - 1].NewTreeTask = elem_first[next_task].NewTreeTask;
563  changes++;
564  }
565 
566  halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
567 
568  sumup_large_ints(1, &changes, &totchanges, Communicator);
569 
570  if(++iter > MAXITER)
571  Terminate("too many iterations. mode=%d changes=%d", mode, changes);
572  }
573  while(totchanges > 0);
574  }
575 
576  Mem.myfree(elem_last);
577  Mem.myfree(elem_first);
578  Mem.myfree(count_list);
579 
580  /* now bring halos into original order */
581  mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_OrigTask_OrderIndex, Communicator);
582 
583  /* transfer new TreeIDs back */
584  count = 0;
585 
586  long long flips = 0;
587 
588  for(int num = 0; num <= LastSnapShotNr; num++)
589  for(int i = 0; i < Cats[num].Nsubhalos; i++)
590  {
591  if(treehalos[count].NewTreeID != treehalos[count].TreeID)
592  {
593  Cats[num].Subhalo[i].TreeID = treehalos[count].NewTreeID;
594  Cats[num].Subhalo[i].TreeTask = treehalos[count].NewTreeTask;
595 
596  flips++;
597  }
598 
599  count++;
600  }
601 
602  Mem.myfree(treehalos);
603 
604  return flips;
605 }
606 
607 /* This function is in charge of linking the halos according to the descendant information.
608  */
609 void mergertree::halotrees_link_trees(void)
610 {
611  /* we will build up the trees by assigning any two subhalos which are in the same tree the same TreeID, and the same TreeTask */
612 
613  long long changesA = 0, changesB = 0;
614  int iter = 0;
615 
616  do
617  {
618  changesA = 0;
619 
620  /* propagate the TreeIDs from level num to level num-1, via the descendant relationship from num-1 to num */
621  if(iter & 1)
622  for(int num = 1; num <= LastSnapShotNr; num++)
623  changesA += halotrees_join_via_descendants(num);
624  else
625  for(int num = LastSnapShotNr; num > 0; num--)
626  changesA += halotrees_join_via_descendants(num);
627 
628  changesB = 0;
629 
630  /* propagate the TreeIDs from level num to level num+1, via the progenitor relationship from num+1 to num */
631  if(iter & 1)
632  for(int num = LastSnapShotNr - 1; num >= 0; num--)
633  changesB += halotrees_join_via_progenitors(num);
634  else
635  for(int num = 0; num < LastSnapShotNr; num++)
636  changesB += halotrees_join_via_progenitors(num);
637 
638  MPI_Allreduce(MPI_IN_PLACE, &changesA, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
639  MPI_Allreduce(MPI_IN_PLACE, &changesB, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
640 
641  mpi_printf("MERGERTREE: iteration %d of joining trees via descendants and progenitor, %lld %lld links\n", iter, changesA,
642  changesB);
643 
644  if(++iter > MAXITER)
645  Terminate("too many iterations");
646  }
647  while(changesA + changesB > 0);
648 
649  /* Now link all subhalos that are in a common FOF halo, if they have most bound IDs in common, or if they are linked by a progenitor
650  * relation, so that they appear in the same tree */
651  for(int mode = 0; mode < 2; mode++)
652  {
653  long long totlinks = 0;
654  int iter = 0;
655  do
656  {
657  long long links = halotrees_join_trees_via_fof_or_mostboundid_bridges(mode);
658 
659  MPI_Allreduce(&links, &totlinks, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
660 
661  if(mode == 0)
662  mpi_printf("MERGERTREE: iteration %d of joining trees via common FOF bridges yielded %lld links\n", iter, totlinks);
663  else
664  mpi_printf("MERGERTREE: iteration %d of joining trees via common MostboundID bridges yielded %lld links\n", iter,
665  totlinks);
666 
667  if(++iter > MAXITER)
668  Terminate("too many iterations");
669  }
670  while(totlinks > 0);
671  }
672 }
673 
674 /* this functions brings the halos of trees together according to the TreeID, so that they are ready for output to file.
675  * we are also assigning new TreeIDs to get a contiguous numbering, and we set up a table that gives the number of
676  * halos for each tree, and a cumulative offset into the output file where the tree starts
677  */
678 void mergertree::halotrees_collect_treehalos(void)
679 {
680  Nhalos = 0;
681 
682  for(int num = 0; num <= LastSnapShotNr; num++)
683  Nhalos += Cats[num].Nsubhalos;
684 
685  /* get the total number of all subhalos in all trees */
686  sumup_large_ints(1, &Nhalos, &TotNhalos, Communicator);
687 
688  /* we now release some memory that is not needed any more in order to reduce peak memory usage */
689  for(int num = LastSnapShotNr; num >= 0; num--)
690  {
691  Mem.myfree(Cats[num].TabNsubhalos);
692  Cats[num].TabNsubhalos = NULL;
693 
694  Mem.myfree(Cats[num].TabNgroups);
695  Cats[num].TabNgroups = NULL;
696  }
697 
698  for(int num = LastSnapShotNr; num >= 1; num--)
699  {
700  Mem.myfree(Cats[num].Progenitors);
701  Cats[num].Progenitors = NULL;
702  }
703 
704  for(int num = LastSnapShotNr - 1; num >= 0; num--)
705  {
706  Mem.myfree(Cats[num].Descendants);
707  Cats[num].Descendants = NULL;
708  }
709 
710  for(int num = LastSnapShotNr; num >= 0; num--)
711  {
712  Mem.myfree_movable(Cats[num].Group);
713  Cats[num].Group = NULL;
714  }
715 
716  Halos = (treehalo_type *)Mem.mymalloc_movable(&Halos, "Halos", (Nhalos + 1) * sizeof(treehalo_type));
717 
718  /* set up the halo data for the tree output */
719  long long off = 0;
720  for(int num = 0; num <= LastSnapShotNr; num++)
721  for(int i = 0; i < Cats[num].Nsubhalos; i++)
722  {
723  Halos[off].TreeID = Cats[num].Subhalo[i].TreeID;
724  Halos[off].TreeIndex = Cats[num].Subhalo[i].TreeIndex;
725 
726  Halos[off].TreeMainProgenitor = Cats[num].SubExt[i].TreeMainProgenitor;
727  Halos[off].TreeFirstProgenitor = Cats[num].SubExt[i].TreeFirstProgenitor;
728  Halos[off].TreeNextProgenitor = Cats[num].SubExt[i].TreeNextProgenitor;
729  Halos[off].TreeDescendant = Cats[num].SubExt[i].TreeDescendant;
730 
731  Halos[off].TreeFirstDescendant = Cats[num].SubExt[i].TreeFirstDescendant;
732  Halos[off].TreeNextDescendant = Cats[num].SubExt[i].TreeNextDescendant;
733  Halos[off].TreeProgenitor = Cats[num].SubExt[i].TreeProgenitor;
734 
735  Halos[off].SnapNum = num;
736  Halos[off].SubhaloNr = Cats[num].Subhalo[i].SubhaloNr;
737  Halos[off].GroupNr = Cats[num].Subhalo[i].GroupNr;
738  Halos[off].UniqueGroupNr = Cats[num].Subhalo[i].UniqueGroupNr;
739 
740  Halos[off].SubProp = Cats[num].Subhalo[i];
741 
742  off++;
743  }
744 
745  /* release some memory to reduce peak memory usage */
746  for(int num = LastSnapShotNr; num >= 0; num--)
747  {
748  Mem.myfree_movable(Cats[num].SubExt);
749  Cats[num].SubExt = NULL;
750  }
751 
752  long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
753  long long totnsubs = Nhalos;
754  MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
755 
756  /* first set the fields TreeFirstHaloInFOFgroup and TreeNextHaloInFOFgroup */
757  mycxxsort_parallel(Halos, Halos + Nhalos, compare_Halos_UniqueGroupNr_SubhaloNr, Communicator);
758 
759  long long previous_UniqueGroupNr = HALONR_MAX;
760  long long new_treeindex = HALONR_MAX;
761  for(int i = 0; i < Nhalos; i++)
762  {
763  if(Halos[i].UniqueGroupNr != previous_UniqueGroupNr)
764  {
765  previous_UniqueGroupNr = Halos[i].UniqueGroupNr;
766  new_treeindex = Halos[i].TreeIndex;
767  }
768 
769  Halos[i].TreeFirstHaloInFOFgroup = new_treeindex;
770 
771  if(i < Nhalos - 1 && Halos[i].UniqueGroupNr == Halos[i + 1].UniqueGroupNr)
772  Halos[i].TreeNextHaloInFOFgroup = Halos[i + 1].TreeIndex;
773  else
774  Halos[i].TreeNextHaloInFOFgroup = -1;
775  }
776 
777  treehalo_type *efirst = (treehalo_type *)Mem.mymalloc("efirst", NTask * sizeof(treehalo_type));
778  treehalo_type *elast = (treehalo_type *)Mem.mymalloc("elast", NTask * sizeof(treehalo_type));
779  /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
780  MPI_Allgather(&Halos[0], sizeof(treehalo_type), MPI_BYTE, efirst, sizeof(treehalo_type), MPI_BYTE, Communicator);
781  MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elast, sizeof(treehalo_type), MPI_BYTE,
782  Communicator);
783 
784  if(Nhalos > 0)
785  for(int task = ThisTask + 1; task < NTask; task++)
786  if(count_list[task] > 0)
787  {
788  if(Halos[Nhalos - 1].UniqueGroupNr == efirst[task].UniqueGroupNr)
789  Halos[Nhalos - 1].TreeNextHaloInFOFgroup = efirst[task].TreeIndex;
790 
791  break;
792  }
793 
794  if(Nhalos > 0)
795  {
796  long long previous_UniqueGroupNr = HALONR_MAX;
797  long long new_treeindex = HALONR_MAX;
798 
799  for(int task = ThisTask - 1; task >= 0; task--)
800  {
801  if(count_list[task] > 0)
802  {
803  if(elast[task].UniqueGroupNr == Halos[0].UniqueGroupNr)
804  {
805  previous_UniqueGroupNr = elast[task].UniqueGroupNr;
806  new_treeindex = elast[task].TreeFirstHaloInFOFgroup;
807  }
808  }
809  }
810 
811  for(int i = 0; i < Nhalos; i++)
812  {
813  if(Halos[i].UniqueGroupNr != previous_UniqueGroupNr)
814  {
815  previous_UniqueGroupNr = Halos[i].UniqueGroupNr;
816  new_treeindex = Halos[i].TreeIndex;
817  break;
818  }
819 
820  Halos[i].TreeFirstHaloInFOFgroup = new_treeindex;
821  }
822  }
823 
824  Mem.myfree(elast);
825  Mem.myfree(efirst);
826 
827  /* parallel sort according to treeid and treeindex - this brings the tree halos together, in ascending order */
828  mycxxsort_parallel(Halos, Halos + Nhalos, compare_Halos_TreeID_TreeIndex, Communicator);
829 
830  treehalo_type *elem_last = (treehalo_type *)Mem.mymalloc("elem_last", NTask * sizeof(treehalo_type));
831  /* now count the trees and overwrite the TreeIDs with new continuous IDs starting from zero */
832  MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elem_last, sizeof(treehalo_type), MPI_BYTE,
833  Communicator);
834 
835  long long treeid_previous = -1;
836  for(int task = ThisTask - 1; task >= 0; task--)
837  {
838  if(count_list[task] > 0)
839  {
840  treeid_previous = elem_last[task].TreeID;
841  break;
842  }
843  }
844 
845  Ntrees = 0;
846  for(int i = 0; i < Nhalos; i++)
847  {
848  if(Halos[i].TreeID != treeid_previous)
849  Ntrees++;
850 
851  treeid_previous = Halos[i].TreeID;
852  Halos[i].TreeID = Ntrees - 1;
853  }
854 
855  int *ntrees_list = (int *)Mem.mymalloc("ntrees_list", NTask * sizeof(int));
856  MPI_Allgather(&Ntrees, 1, MPI_INT, ntrees_list, 1, MPI_INT, Communicator);
857 
858  TotNtrees = 0;
859  long long ntrees_before = 0;
860  for(int i = 0; i < NTask; i++)
861  {
862  TotNtrees += ntrees_list[i];
863  if(i < ThisTask)
864  ntrees_before += ntrees_list[i];
865  }
866 
867  for(int i = 0; i < Nhalos; i++)
868  Halos[i].TreeID += ntrees_before;
869 
870  /* let's now allocate the table of trees, with an extra [-1] element */
871  TreeTable = (halotrees_table *)Mem.mymalloc_movable(&TreeTable, "TreeTable", (Ntrees + 1) * sizeof(halotrees_table));
872  memset(TreeTable, 0, (Ntrees + 1) * sizeof(halotrees_table));
873  TreeTable += 1;
874 
875  /* update what we have stored for the last element */
876  MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elem_last, sizeof(treehalo_type), MPI_BYTE,
877  Communicator);
878 
879  treeid_previous = -1;
880  for(int task = ThisTask - 1; task >= 0; task--)
881  {
882  if(count_list[task] > 0)
883  {
884  treeid_previous = elem_last[task].TreeID;
885  break;
886  }
887  }
888 
889  Ntrees = 0;
890  for(int i = 0; i < Nhalos; i++)
891  {
892  if(Halos[i].TreeID != treeid_previous)
893  Ntrees++;
894 
895  treeid_previous = Halos[i].TreeID;
896  TreeTable[Ntrees - 1].HaloCount++;
897  TreeTable[Ntrees - 1].TreeID = Halos[i].TreeID;
898  }
899 
900  /* in the TreeTable[-1] element we have counted segments of trees that do not start on the local processor */
901 
902  halotrees_table *elem_first = (halotrees_table *)Mem.mymalloc("elem_first", NTask * sizeof(halotrees_table));
903 
904  MPI_Allgather(&TreeTable[-1], sizeof(halotrees_table), MPI_BYTE, elem_first, sizeof(halotrees_table), MPI_BYTE, Communicator);
905 
906  if(Ntrees > 0)
907  for(int task = ThisTask + 1; task < NTask; task++)
908  {
909  if(TreeTable[Ntrees - 1].TreeID == elem_first[task].TreeID)
910  TreeTable[Ntrees - 1].HaloCount += elem_first[task].HaloCount;
911  else
912  break;
913  }
914 
915  Mem.myfree(elem_first);
916 
917  long long sumhalos = 0;
918  LargestHaloCount = 0;
919  for(int i = 0; i < Ntrees; i++)
920  {
921  sumhalos += TreeTable[i].HaloCount;
922  if(TreeTable[i].HaloCount > LargestHaloCount)
923  LargestHaloCount = TreeTable[i].HaloCount;
924  }
925  MPI_Allreduce(MPI_IN_PLACE, &LargestHaloCount, 1, MPI_INT, MPI_MAX, Communicator);
926 
927  long long *list_sumhalos = (long long *)Mem.mymalloc("list_sumhalos", NTask * sizeof(long long));
928  MPI_Allgather(&sumhalos, sizeof(long long), MPI_BYTE, list_sumhalos, sizeof(long long), MPI_BYTE, Communicator);
929 
930  long long sum_check = 0;
931  for(int i = 0; i < NTask; i++)
932  sum_check += list_sumhalos[i];
933 
934  if(sum_check != TotNhalos)
935  Terminate("TotNTrees=%lld sum_check=%lld != TotNhalos=%lld", (long long)TotNtrees, sum_check, (long long)TotNhalos);
936 
937  if(Ntrees > 0)
938  {
939  TreeTable[0].FirstHalo = 0;
940  for(int task = 0; task < ThisTask; task++)
941  TreeTable[0].FirstHalo += list_sumhalos[task];
942  }
943 
944  Mem.myfree(list_sumhalos);
945 
946  for(int i = 1; i < Ntrees; i++)
947  TreeTable[i].FirstHalo = TreeTable[i - 1].FirstHalo + TreeTable[i - 1].HaloCount;
948 
949  Mem.myfree_movable(ntrees_list);
950  Mem.myfree_movable(elem_last);
951  Mem.myfree_movable(count_list);
952 }
953 
954 /*--------------------------------------------------------------------------------------------------------------*/
955 
956 /* This function redistributes a subdivided global array with local pieces stored in an address pointed to by ptr.
957  * On the local processor, there are currently 'ncurrent' elements, but we want 'ntarget' elements. The size of
958  * one element is given by 'len'. The buffer, whose address is stored in *ptr, must be resizable and will be
959  * reallocated to the new size of ntarget*len bytes.
960  */
961 void mergertree::halotrees_reshuffle(char **ptr, size_t len, int ncurrent, int ntarget)
962 {
963  int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * NTask);
964  int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * NTask);
965  int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * NTask);
966  int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * NTask);
967 
968  /* copy current data to an auxiliary buffer */
969  char *buf = (char *)Mem.mymalloc_movable(&buf, "buf", ncurrent * len);
970  memcpy(buf, *ptr, ncurrent * len);
971 
972  /* now resize source buffer of data to be able to accommodate the data of the desired target size */
973  *ptr = (char *)Mem.myrealloc_movable(*ptr, ntarget * len);
974 
975  /* collect the current and target layout of the array */
976  int *tab_ncurrent = (int *)Mem.mymalloc("tab_ncurrent", NTask * sizeof(int));
977  MPI_Allgather(&ncurrent, 1, MPI_INT, tab_ncurrent, 1, MPI_INT, Communicator);
978 
979  int *tab_ntarget = (int *)Mem.mymalloc("tab_ntarget", NTask * sizeof(int));
980  MPI_Allgather(&ntarget, 1, MPI_INT, tab_ntarget, 1, MPI_INT, Communicator);
981 
982  /* now work out where our local data should go */
983  int nexport = 0, nimport = 0;
984 
985  for(int i = 0; i < NTask; i++)
986  Send_count[i] = 0;
987 
988  int nbefore = 0;
989  for(int i = 0; i < ThisTask; i++)
990  nbefore += tab_ncurrent[i];
991 
992  int target = 0, ncum = 0;
993  for(int i = 0; i < ncurrent; i++)
994  {
995  while(target < NTask - 1 && nbefore + i >= ncum + tab_ntarget[target])
996  ncum += tab_ntarget[target++];
997 
998  Send_count[target]++;
999  }
1000 
1001  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1002  Recv_offset[0] = Send_offset[0] = 0;
1003 
1004  for(int j = 0; j < NTask; j++)
1005  {
1006  nimport += Recv_count[j];
1007  nexport += Send_count[j];
1008 
1009  if(j > 0)
1010  {
1011  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1012  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1013  }
1014  }
1015 
1016  if(nimport != ntarget)
1017  Terminate("nimport != ntarget");
1018 
1019  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1020  {
1021  int recvTask = ThisTask ^ ngrp;
1022  if(recvTask < NTask)
1023  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1024  MPI_Sendrecv(&buf[Send_offset[recvTask] * len], Send_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B,
1025  *ptr + Recv_offset[recvTask] * len, Recv_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1026  MPI_STATUS_IGNORE);
1027  }
1028 
1029  Mem.myfree(tab_ntarget);
1030  Mem.myfree(tab_ncurrent);
1031  Mem.myfree(buf);
1032 
1033  Mem.myfree(Recv_offset);
1034  Mem.myfree(Recv_count);
1035  Mem.myfree(Send_offset);
1036  Mem.myfree(Send_count);
1037 }
1038 
1039 /* This function sets up new pointer for navigating within a given tree based on TreeIndex.
1040  * This is done by translating the old pointers to the new ones within a tree.
1041  */
1042 void mergertree::halotrees_remap_treepointers(void)
1043 {
1044  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1045  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1046  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1047  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1048 
1049  /* initialize new pointers to default values */
1050  for(int num = 0; num <= LastSnapShotNr; num++)
1051  for(int i = 0; i < Cats[num].Nsubhalos; i++)
1052  {
1053  Cats[num].SubExt[i].TreeMainProgenitor = -1;
1054  Cats[num].SubExt[i].TreeFirstProgenitor = -1;
1055  Cats[num].SubExt[i].TreeNextProgenitor = -1;
1056  Cats[num].SubExt[i].TreeDescendant = -1;
1057  Cats[num].SubExt[i].TreeFirstDescendant = -1;
1058  Cats[num].SubExt[i].TreeNextDescendant = -1;
1059  Cats[num].SubExt[i].TreeProgenitor = -1;
1060  }
1061 
1062  /* the three pointers we have to set point either one snapshot back (e.g. FirstProgenitor, delta=-1),
1063  * they are in the same snapshot (NextProgenitor, delta=0), or they point into the future (Descendant, delta=+1)
1064  */
1065  for(int set = 0; set < 3; set++)
1066  for(int delta = 1; delta >= -1; delta--)
1067  {
1068  if(set == 2 && delta != -1)
1069  continue;
1070 
1071  /* note that we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr */
1072 
1073  /* in snapshot number 'base' we want to remap the pointers */
1074  for(int base = 0; base <= LastSnapShotNr; base++)
1075  {
1076  /* this is the snapshot we are pointing to */
1077  int num = base + delta;
1078 
1079  if(set == 1 && delta != 1 && num == 0)
1080  continue;
1081 
1082  if((delta == -1 && num >= 0) || (delta >= 0 && base < LastSnapShotNr))
1083  {
1084  /* lets get the minimum and maximum subhalo numbers for the target snapshot number as a function of the task */
1085  long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1086  long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1087 
1088  long long min_subhalonr =
1089  Cats[num].Nsubhalos > 0 ? Cats[num].Subhalo[0].SubhaloNr : HALONR_MAX; // HALONR_MAX means empty here
1090  long long max_subhalonr = Cats[num].Nsubhalos > 0 ? Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr
1091  : HALONR_MAX; // HALONR_MAX means empty here
1092 
1093  MPI_Allgather(&min_subhalonr, sizeof(long long), MPI_BYTE, list_min_subhalonr, sizeof(long long), MPI_BYTE,
1094  Communicator);
1095  MPI_Allgather(&max_subhalonr, sizeof(long long), MPI_BYTE, list_max_subhalonr, sizeof(long long), MPI_BYTE,
1096  Communicator);
1097 
1098  /* prepare a list of the current pointer values on the local processor */
1099  data_list *list = (data_list *)Mem.mymalloc("list", Cats[base].Nsubhalos * sizeof(data_list));
1100  int count = 0;
1101  for(int i = 0; i < Cats[base].Nsubhalos; i++)
1102  {
1103  if(set == 0)
1104  switch(delta)
1105  {
1106  case -1:
1107  list[count].targetsubhalonr = Cats[base].Progenitors[i].FirstProgSubhaloNr;
1108  break;
1109  case 0:
1110  list[count].targetsubhalonr = Cats[base].Descendants[i].NextProgSubhaloNr;
1111  break;
1112  case +1:
1113  list[count].targetsubhalonr = Cats[base].Descendants[i].DescSubhaloNr;
1114  break;
1115  }
1116  else if(set == 1)
1117  switch(delta)
1118  {
1119  case -1:
1120  list[count].targetsubhalonr = Cats[base].Progenitors[i].ProgSubhaloNr;
1121  break;
1122  case 0:
1123  list[count].targetsubhalonr = Cats[base].Progenitors[i].NextDescSubhaloNr;
1124  break;
1125  case +1:
1126  list[count].targetsubhalonr = Cats[base].Descendants[i].FirstDescSubhaloNr;
1127  break;
1128  }
1129  else
1130  switch(delta)
1131  {
1132  case -1:
1133  list[count].targetsubhalonr = Cats[base].Progenitors[i].MainProgSubhaloNr;
1134  break;
1135  }
1136 
1137  list[count].originsubhalonr = Cats[base].Subhalo[i].SubhaloNr;
1138 
1139  if(list[count].targetsubhalonr >= 0 &&
1140  list[count].targetsubhalonr < (long long)HALONR_MAX) // need to only process existing pointers
1141  {
1142  if(list[count].targetsubhalonr >= Cats[num].TotNsubhalos)
1143  Terminate("set=%d delta=%d num=%d base=%d list[count].targetsubhalonr=%lld >= Cats[num].TotNsubhalos=%lld\n",
1144  set, delta, num, base, list[count].targetsubhalonr, Cats[num].TotNsubhalos);
1145 
1146  list[count].origin = i;
1147  list[count].intreeid = Cats[base].Subhalo[i].TreeID;
1148  count++;
1149  }
1150  }
1151 
1152  /* sort it by current pointer value (which is the subhalonr) */
1153  mycxxsort(list, list + count, compare_data_list_subhalonnr);
1154 
1155  /* we now need to send them to other target processors since they may contain the corresponding subhalos */
1156 
1157  int nexport = 0, nimport = 0;
1158 
1159  remap_data *import_data = NULL, *export_data = NULL;
1160 
1161  for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
1162  {
1163  for(int i = 0; i < NTask; i++)
1164  Send_count[i] = 0;
1165 
1166  int target = 0;
1167 
1168  for(int i = 0; i < count; i++)
1169  {
1170  while(target < NTask - 1 &&
1171  (list_min_subhalonr[target] == HALONR_MAX || list[i].targetsubhalonr > list_max_subhalonr[target]))
1172  target++;
1173 
1174  if(list_min_subhalonr[target] != HALONR_MAX && list[i].targetsubhalonr >= list_min_subhalonr[target] &&
1175  list[i].targetsubhalonr <= list_max_subhalonr[target])
1176  {
1177  if(mode == 0)
1178  Send_count[target]++;
1179  else
1180  {
1181  int off = Send_offset[target] + Send_count[target]++;
1182 
1183  export_data[off].loc_index = i;
1184  export_data[off].targetsubhalonr = list[i].targetsubhalonr;
1185  export_data[off].originsubhalonr = list[i].originsubhalonr;
1186  export_data[off].intreeid = list[i].intreeid;
1187  }
1188  }
1189  else
1190  Terminate(
1191  "this shouldn't happen: set=%d delta=%d num=%d base=%d delta=%d i=%d|count=%d "
1192  "list[i].targetsubhalonr=%lld "
1193  "target=%d "
1194  "list_min_subhalonr[target]=%lld list_max_subhalonr[target]=%lld\n",
1195  set, delta, num, base, delta, i, count, (long long)list[i].targetsubhalonr, target,
1196  list_min_subhalonr[target], list_max_subhalonr[target]);
1197  }
1198 
1199  if(mode == 0) // prepare offset tables
1200  {
1201  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1202  Recv_offset[0] = Send_offset[0] = 0;
1203  for(int j = 0; j < NTask; j++)
1204  {
1205  nimport += Recv_count[j];
1206  nexport += Send_count[j];
1207  if(j > 0)
1208  {
1209  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1210  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1211  }
1212  }
1213 
1214  export_data = (remap_data *)Mem.mymalloc("export_data", nexport * sizeof(remap_data));
1215  import_data = (remap_data *)Mem.mymalloc("import_data", nimport * sizeof(remap_data));
1216  }
1217  }
1218 
1219  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1220  {
1221  /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1222  int recvTask = ThisTask ^ ngrp;
1223  if(recvTask < NTask)
1224  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1225  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(remap_data), MPI_BYTE,
1226  recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
1227  Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1228  MPI_STATUS_IGNORE);
1229  }
1230 
1231  /* incoming data is not necessarily be sorted according to subhalorn, that's why we need to sort it now */
1232  for(int i = 0; i < nimport; i++)
1233  import_data[i].orig_index = i;
1234 
1235  mycxxsort(import_data, import_data + nimport, compare_remap_data_subhalonr);
1236 
1237  /* now we check the incoming data, and prepare new values for the pointers */
1238  for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1239  {
1240  if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].targetsubhalonr)
1241  i++;
1242  else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].targetsubhalonr)
1243  {
1244  Terminate("Can't find targetsubhalonr=%lld Cats[num].Subhalo[i].SubhaloNr=%lld i=%d|%d j=%d|%d",
1245  import_data[j].targetsubhalonr, Cats[num].Subhalo[i].SubhaloNr, i, Cats[num].Nsubhalos, j, nimport);
1246  }
1247  else // we have a match
1248  {
1249  import_data[j].new_treeindexptr = Cats[num].Subhalo[i].TreeIndex;
1250  import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1251 
1252  if(Cats[num].Subhalo[i].TreeID != import_data[j].intreeid)
1253  Terminate(
1254  "\nWe are not in the same tree, which shouldn't be the case: set=%d delta=%d num=%d\n"
1255  "Cats[num].Subhalo[i].SubhaloNr=%lld \nCats[num].Subhalo[i].DescSubhaloNr=%lld\n"
1256  "Cats[num].Subhalo[i].NextProgSubhalor=%lld\nCats[num].Subhalo[i].FirstProgSubhalor=%lld\n"
1257  "Cats[num].Subhalo[i].NextDescSubhalor=%lld\nCats[num].Subhalo[i].FirstDescSubhalor=%lld\n"
1258  "Cats[num].Subhalo[i].ProgSubhalorNr=%lld\n"
1259  "originsubgalonr=%lld targetsubgalonr=%lld Cats[num].Subhalo[i].TreeID=%lld "
1260  "import_data[j].intreeid=%lld i=%d|%d j=%d|%d num=%d",
1261  set, delta, num, Cats[num].Subhalo[i].SubhaloNr, Cats[num].Descendants[i].DescSubhaloNr,
1262  Cats[num].Descendants[i].NextProgSubhaloNr, Cats[num].Progenitors[i].FirstProgSubhaloNr,
1263  Cats[num].Progenitors[i].NextDescSubhaloNr, Cats[num].Descendants[i].FirstDescSubhaloNr,
1264  Cats[num].Progenitors[i].ProgSubhaloNr, import_data[j].originsubhalonr, import_data[j].targetsubhalonr,
1265  Cats[num].Subhalo[i].TreeID, import_data[j].intreeid, i, Cats[num].Nsubhalos, j, nimport, num);
1266 
1267  j++;
1268  }
1269  }
1270 
1271  mycxxsort(import_data, import_data + nimport, compare_remap_data_orig_index);
1272 
1273  /* send the results back */
1274  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1275  {
1276  /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1277  int recvTask = ThisTask ^ ngrp;
1278  if(recvTask < NTask)
1279  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1280  MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE,
1281  recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]],
1282  Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1283  MPI_STATUS_IGNORE);
1284  }
1285 
1286  for(int i = 0; i < nexport; i++)
1287  {
1288  int idx = list[export_data[i].loc_index].origin;
1289 
1290  if(Cats[base].Subhalo[idx].TreeID != export_data[i].treeid)
1291  Terminate("something is wrong: delta=%d i=%d|%d idx=%d|%d %lld %lld %lld", delta, i, nexport, idx,
1292  Cats[base].Nsubhalos, Cats[base].Subhalo[idx].TreeID, export_data[i].treeid, export_data[i].intreeid);
1293 
1294  if(set == 0)
1295  switch(delta)
1296  {
1297  case -1:
1298  Cats[base].SubExt[idx].TreeFirstProgenitor = export_data[i].new_treeindexptr;
1299  break;
1300 
1301  case 0:
1302  Cats[base].SubExt[idx].TreeNextProgenitor = export_data[i].new_treeindexptr;
1303  break;
1304 
1305  case +1:
1306  Cats[base].SubExt[idx].TreeDescendant = export_data[i].new_treeindexptr;
1307  break;
1308  }
1309  else if(set == 1)
1310  switch(delta)
1311  {
1312  case -1:
1313  Cats[base].SubExt[idx].TreeProgenitor = export_data[i].new_treeindexptr;
1314  break;
1315 
1316  case 0:
1317  Cats[base].SubExt[idx].TreeNextDescendant = export_data[i].new_treeindexptr;
1318  break;
1319 
1320  case +1:
1321  Cats[base].SubExt[idx].TreeFirstDescendant = export_data[i].new_treeindexptr;
1322  break;
1323  }
1324  else
1325  switch(delta)
1326  {
1327  case -1:
1328  Cats[base].SubExt[idx].TreeMainProgenitor = export_data[i].new_treeindexptr;
1329  break;
1330  }
1331  }
1332 
1333  Mem.myfree(import_data);
1334  Mem.myfree(export_data);
1335  Mem.myfree(list);
1336  Mem.myfree(list_max_subhalonr);
1337  Mem.myfree(list_min_subhalonr);
1338  }
1339  }
1340  }
1341 
1342  Mem.myfree(Recv_offset);
1343  Mem.myfree(Recv_count);
1344  Mem.myfree(Send_offset);
1345  Mem.myfree(Send_count);
1346 }
1347 
1348 /*--------------------------------------------------------------------------------------------------------------*/
1349 
1350 /* This function creates a new subhalo numbering within each tree, as identified by the set of subhalos with the same treeid.
1351  */
1352 void mergertree::halotrees_assign_new_treeindices(void)
1353 {
1354  long long totnsubs = 0;
1355 
1356  for(int num = 0; num <= LastSnapShotNr; num++)
1357  totnsubs += Cats[num].Nsubhalos;
1358 
1359  assign_data *a_data = (assign_data *)Mem.mymalloc("a_data", (totnsubs + 1) * sizeof(assign_data));
1360 
1361  long long count = 0;
1362  for(int num = 0; num <= LastSnapShotNr; num++)
1363  for(int i = 0; i < Cats[num].Nsubhalos; i++)
1364  {
1365  a_data[count].origin_task = ThisTask;
1366  a_data[count].origin_num = num;
1367  a_data[count].origin_index = i;
1368  a_data[count].treeid = Cats[num].Subhalo[i].TreeID;
1369  count++;
1370  }
1371 
1372  mycxxsort_parallel(a_data, a_data + totnsubs, compare_assign_data_treeid_origin_num_origin_task_origin_index, Communicator);
1373 
1374  long long newid = 0;
1375 
1376  for(long long i = 0, treeindex = 0; i < totnsubs; i++)
1377  {
1378  if(i == 0 || a_data[i].treeid != a_data[i - 1].treeid)
1379  treeindex = 0;
1380 
1381  a_data[i].treeindex = treeindex++;
1382 
1383  if(i > 0 && a_data[i].treeid != a_data[i - 1].treeid)
1384  newid++;
1385 
1386  a_data[i].newtreeid = newid;
1387  }
1388 
1389  long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
1390  long long *newid_list = (long long *)Mem.mymalloc("newid_list", NTask * sizeof(long long));
1391  MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
1392  MPI_Allgather(&newid, sizeof(long long), MPI_BYTE, newid_list, sizeof(long long), MPI_BYTE, Communicator);
1393 
1394  assign_data *elem_last = (assign_data *)Mem.mymalloc("elem_last", NTask * sizeof(assign_data));
1395  assign_data *elem_first = (assign_data *)Mem.mymalloc("elem_first", NTask * sizeof(assign_data));
1396 
1397  /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
1398  MPI_Allgather(&a_data[totnsubs > 0 ? totnsubs - 1 : 0], sizeof(assign_data), MPI_BYTE, elem_last, sizeof(assign_data), MPI_BYTE,
1399  Communicator);
1400  MPI_Allgather(&a_data[0], sizeof(assign_data), MPI_BYTE, elem_first, sizeof(assign_data), MPI_BYTE, Communicator);
1401 
1402  if(count_list[ThisTask] > 0)
1403  {
1404  long long count_before = 0;
1405  for(int task = 0; task < ThisTask; task++)
1406  if(count_list[task] > 0)
1407  {
1408  if(a_data[0].treeid == elem_last[task].treeid)
1409  count_before += (elem_last[task].treeindex + 1);
1410  }
1411  if(count_before > 0)
1412  {
1413  for(long long i = 0; i < totnsubs; i++)
1414  {
1415  if(a_data[i].treeid != a_data[0].treeid)
1416  break;
1417 
1418  a_data[i].treeindex += count_before;
1419  }
1420  }
1421 
1422  long long newidoff = 0;
1423  for(int task = 0; task < ThisTask; task++)
1424  newidoff += newid_list[task];
1425 
1426  /* now also need to check whether there are treeid changes that line up with task boundaries. Be aware that that some tasks could
1427  * be empty */
1428 
1429  int taleft = 0;
1430 
1431  do
1432  {
1433  while(taleft < ThisTask && count_list[taleft] == 0)
1434  taleft++;
1435 
1436  if(taleft < ThisTask)
1437  {
1438  int taright = taleft + 1;
1439 
1440  while(count_list[taright] == 0)
1441  taright++;
1442 
1443  // taright may be at most equal to ThisTask here
1444 
1445  if(elem_last[taleft].treeid != elem_first[taright].treeid)
1446  newidoff++;
1447 
1448  taleft = taright;
1449  }
1450  }
1451  while(taleft < ThisTask);
1452 
1453  /* now assignn new TreeIDs that form consecutive numbers without gaps */
1454  for(long long i = 0; i < totnsubs; i++)
1455  a_data[i].newtreeid += newidoff;
1456  }
1457 
1458  Mem.myfree(elem_first);
1459  Mem.myfree(elem_last);
1460  Mem.myfree(newid_list);
1461  Mem.myfree(count_list);
1462 
1463  mycxxsort_parallel(a_data, a_data + totnsubs, compare_assign_data_origin_task_origin_num_origin_index, Communicator);
1464 
1465  count = 0;
1466  for(int num = 0; num <= LastSnapShotNr; num++)
1467  for(int i = 0; i < Cats[num].Nsubhalos; i++)
1468  {
1469  Cats[num].Subhalo[i].TreeIndex = a_data[count].treeindex;
1470  Cats[num].Subhalo[i].TreeID = a_data[count].newtreeid;
1471  count++;
1472  }
1473 
1474  Mem.myfree(a_data);
1475 }
1476 
1477 /*--------------------------------------------------------------------------------------------------------------*/
1478 
1479 /* This function puts subhalos from snapshot "num" that are linked by a descendant relation from snapshot "num-1" into the same tree
1480  * by unifying their tree-id.
1481  */
1482 int mergertree::halotrees_join_via_descendants(int num)
1483 {
1484  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1485  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1486  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1487  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1488 
1489  /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1490 
1491  /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1492  long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1493  long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1494 
1495  /* this value flags that there are no subhalos on the corresponding processor */
1496  long long empty = HALONR_MAX;
1497 
1498  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1499  sizeof(long long), MPI_BYTE, Communicator);
1500  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1501  list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1502 
1503  int nexport = 0, nimport = 0;
1504 
1505  halotrees_data *import_data = NULL, *export_data = NULL;
1506 
1507  /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
1508  * let's create an auxiliary list for facilitating this.
1509  */
1510  descnr_data *sorted_list = (descnr_data *)Mem.mymalloc("sorted_list", Cats[num - 1].Nsubhalos * sizeof(descnr_data));
1511 
1512  for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1513  {
1514  sorted_list[i].DescSubhaloNr = Cats[num - 1].Descendants[i].DescSubhaloNr;
1515  sorted_list[i].TreeID = Cats[num - 1].Subhalo[i].TreeID;
1516  sorted_list[i].TreeTask = Cats[num - 1].Subhalo[i].TreeTask;
1517  sorted_list[i].orig_index = i;
1518  }
1519 
1520  mycxxsort(sorted_list, sorted_list + Cats[num - 1].Nsubhalos, compare_sorted_list_descsubhalonr);
1521 
1522  /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1523  for(int mode = 0; mode < 2; mode++)
1524  {
1525  for(int i = 0; i < NTask; i++)
1526  Send_count[i] = 0;
1527 
1528  int target = 0;
1529 
1530  for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1531  {
1532  while(target < NTask - 1 &&
1533  (list_min_subhalonr[target] == empty || sorted_list[i].DescSubhaloNr > list_max_subhalonr[target]))
1534  target++;
1535 
1536  if(list_min_subhalonr[target] != empty && sorted_list[i].DescSubhaloNr >= list_min_subhalonr[target] &&
1537  sorted_list[i].DescSubhaloNr <= list_max_subhalonr[target])
1538  {
1539  if(mode == 0)
1540  Send_count[target]++;
1541  else
1542  {
1543  int off = Send_offset[target] + Send_count[target]++;
1544 
1545  export_data[off].loc_index = sorted_list[i].orig_index;
1546  export_data[off].descendantnr = sorted_list[i].DescSubhaloNr;
1547  export_data[off].treeid = sorted_list[i].TreeID;
1548  export_data[off].treetask = sorted_list[i].TreeTask;
1549  }
1550  }
1551  }
1552 
1553  if(mode == 0)
1554  {
1555  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1556  Recv_offset[0] = Send_offset[0] = 0;
1557  for(int j = 0; j < NTask; j++)
1558  {
1559  nimport += Recv_count[j];
1560  nexport += Send_count[j];
1561  if(j > 0)
1562  {
1563  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1564  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1565  }
1566  }
1567 
1568  export_data = (halotrees_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_data));
1569  import_data = (halotrees_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_data));
1570  }
1571  }
1572 
1573  /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1574  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1575  {
1576  int recvTask = ThisTask ^ ngrp;
1577  if(recvTask < NTask)
1578  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1579  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1580  TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1581  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1582  }
1583 
1584  /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient matching
1585  */
1586  for(int i = 0; i < nimport; i++)
1587  import_data[i].orig_order = i;
1588 
1589  mycxxsort(import_data, import_data + nimport, compare_halotrees_data_descendantnr);
1590 
1591  int changes = 0;
1592 
1593  /* now do the matching */
1594  for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1595  {
1596  if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].descendantnr)
1597  i++;
1598  else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].descendantnr)
1599  j++;
1600  else
1601  {
1602  if(import_data[j].treeid > Cats[num].Subhalo[i].TreeID)
1603  {
1604  import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1605  import_data[j].treetask = Cats[num].Subhalo[i].TreeTask;
1606  changes++;
1607  }
1608  else if(import_data[j].treeid < Cats[num].Subhalo[i].TreeID)
1609  {
1610  Cats[num].Subhalo[i].TreeID = import_data[j].treeid;
1611  Cats[num].Subhalo[i].TreeTask = import_data[j].treetask;
1612  changes++;
1613  }
1614  j++;
1615  }
1616  }
1617 
1618  /* reestablish original order */
1619  mycxxsort(import_data, import_data + nimport, compare_halotrees_data_orig_order);
1620 
1621  /* send the results back */
1622  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1623  {
1624  int recvTask = ThisTask ^ ngrp;
1625  if(recvTask < NTask)
1626  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1627  MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1628  TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1629  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1630  }
1631 
1632  /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
1633  for(int i = 0; i < nexport; i++)
1634  {
1635  if(export_data[i].treeid != HALONR_MAX)
1636  {
1637  Cats[num - 1].Subhalo[export_data[i].loc_index].TreeID = export_data[i].treeid;
1638  Cats[num - 1].Subhalo[export_data[i].loc_index].TreeTask = export_data[i].treetask;
1639  }
1640  }
1641 
1642  Mem.myfree(import_data);
1643  Mem.myfree(export_data);
1644  Mem.myfree(sorted_list);
1645  Mem.myfree(list_max_subhalonr);
1646  Mem.myfree(list_min_subhalonr);
1647 
1648  Mem.myfree(Recv_offset);
1649  Mem.myfree(Recv_count);
1650  Mem.myfree(Send_offset);
1651  Mem.myfree(Send_count);
1652 
1653  return changes;
1654 }
1655 
1656 int mergertree::halotrees_join_via_progenitors(int num)
1657 {
1658  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1659  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1660  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1661  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1662 
1663  /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1664 
1665  /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1666  long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1667  long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1668 
1669  /* this value flags that there are no subhalos on the corresponding processor */
1670  long long empty = HALONR_MAX;
1671 
1672  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1673  sizeof(long long), MPI_BYTE, Communicator);
1674  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1675  list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1676 
1677  int nexport = 0, nimport = 0;
1678 
1679  halotrees_data *import_data = NULL, *export_data = NULL;
1680 
1681  /* for efficiency reasons, we need to traverse the local progenitor pointers in increasing order of their target subhalo number, so
1682  * let's create an auxiliary list for facilitating this.
1683  */
1684  prognr_data *sorted_list = (prognr_data *)Mem.mymalloc("sorted_list", Cats[num + 1].Nsubhalos * sizeof(prognr_data));
1685 
1686  for(int i = 0; i < Cats[num + 1].Nsubhalos; i++)
1687  {
1688  sorted_list[i].ProgSubhaloNr = Cats[num + 1].Progenitors[i].ProgSubhaloNr;
1689  sorted_list[i].TreeID = Cats[num + 1].Subhalo[i].TreeID;
1690  sorted_list[i].TreeTask = Cats[num + 1].Subhalo[i].TreeTask;
1691  sorted_list[i].orig_index = i;
1692  }
1693 
1694  mycxxsort(sorted_list, sorted_list + Cats[num + 1].Nsubhalos, compare_sorted_list_progsubhalonr);
1695 
1696  /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1697  for(int mode = 0; mode < 2; mode++)
1698  {
1699  for(int i = 0; i < NTask; i++)
1700  Send_count[i] = 0;
1701 
1702  int target = 0;
1703 
1704  for(int i = 0; i < Cats[num + 1].Nsubhalos; i++)
1705  {
1706  while(target < NTask - 1 &&
1707  (list_min_subhalonr[target] == empty || sorted_list[i].ProgSubhaloNr > list_max_subhalonr[target]))
1708  target++;
1709 
1710  if(list_min_subhalonr[target] != empty && sorted_list[i].ProgSubhaloNr >= list_min_subhalonr[target] &&
1711  sorted_list[i].ProgSubhaloNr <= list_max_subhalonr[target])
1712  {
1713  if(mode == 0)
1714  Send_count[target]++;
1715  else
1716  {
1717  int off = Send_offset[target] + Send_count[target]++;
1718 
1719  export_data[off].loc_index = sorted_list[i].orig_index;
1720  export_data[off].progenitornr = sorted_list[i].ProgSubhaloNr;
1721  export_data[off].treeid = sorted_list[i].TreeID;
1722  export_data[off].treetask = sorted_list[i].TreeTask;
1723  }
1724  }
1725  }
1726 
1727  if(mode == 0)
1728  {
1729  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1730  Recv_offset[0] = Send_offset[0] = 0;
1731  for(int j = 0; j < NTask; j++)
1732  {
1733  nimport += Recv_count[j];
1734  nexport += Send_count[j];
1735  if(j > 0)
1736  {
1737  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1738  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1739  }
1740  }
1741 
1742  export_data = (halotrees_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_data));
1743  import_data = (halotrees_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_data));
1744  }
1745  }
1746 
1747  /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1748  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1749  {
1750  int recvTask = ThisTask ^ ngrp;
1751  if(recvTask < NTask)
1752  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1753  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1754  TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1755  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1756  }
1757 
1758  /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient
1759  * matching
1760  */
1761  for(int i = 0; i < nimport; i++)
1762  import_data[i].orig_order = i;
1763 
1764  mycxxsort(import_data, import_data + nimport, compare_halotrees_data_progenitornr);
1765 
1766  int changes = 0;
1767 
1768  /* now do the matching */
1769  for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1770  {
1771  if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].progenitornr)
1772  i++;
1773  else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].progenitornr)
1774  j++;
1775  else
1776  {
1777  if(import_data[j].treeid > Cats[num].Subhalo[i].TreeID)
1778  {
1779  import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1780  import_data[j].treetask = Cats[num].Subhalo[i].TreeTask;
1781  changes++;
1782  }
1783  else if(import_data[j].treeid < Cats[num].Subhalo[i].TreeID)
1784  {
1785  Cats[num].Subhalo[i].TreeID = import_data[j].treeid;
1786  Cats[num].Subhalo[i].TreeTask = import_data[j].treetask;
1787  changes++;
1788  }
1789  j++;
1790  }
1791  }
1792 
1793  /* reestablish original order */
1794  mycxxsort(import_data, import_data + nimport, compare_halotrees_data_orig_order);
1795 
1796  /* send the results back */
1797  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1798  {
1799  int recvTask = ThisTask ^ ngrp;
1800  if(recvTask < NTask)
1801  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1802  MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1803  TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1804  recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1805  }
1806 
1807  /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
1808  for(int i = 0; i < nexport; i++)
1809  {
1810  if(export_data[i].treeid != HALONR_MAX)
1811  {
1812  Cats[num + 1].Subhalo[export_data[i].loc_index].TreeID = export_data[i].treeid;
1813  Cats[num + 1].Subhalo[export_data[i].loc_index].TreeTask = export_data[i].treetask;
1814  }
1815  }
1816 
1817  Mem.myfree(import_data);
1818  Mem.myfree(export_data);
1819  Mem.myfree(sorted_list);
1820  Mem.myfree(list_max_subhalonr);
1821  Mem.myfree(list_min_subhalonr);
1822 
1823  Mem.myfree(Recv_offset);
1824  Mem.myfree(Recv_count);
1825  Mem.myfree(Send_offset);
1826  Mem.myfree(Send_count);
1827 
1828  return changes;
1829 }
1830 
1831 void mergertree::halotrees_determine_mainprogenitor(void)
1832 {
1833  for(int num = 1; num <= LastSnapShotNr; num++)
1834  for(int i = 0; i < Cats[num].Nsubhalos; i++)
1835  Cats[num].Progenitors[i].MainProgSubhaloNr = HALONR_MAX;
1836 
1837  /* initialize the maximum branch length field */
1838  for(int num = 0; num <= LastSnapShotNr; num++)
1839  for(int i = 0; i < Cats[num].Nsubhalos; i++)
1840  Cats[num].SubExt[i].MaxLenProgBranch = Cats[num].Subhalo[i].Len;
1841 
1842  /* propagate the branch length of subhalos from snapshot "num-1" to those in snapshot "num" via the
1843  * descendant information */
1844  for(int num = 1; num <= LastSnapShotNr; num++)
1845  halotrees_propagate_max_branch_length_descendants(num);
1846 
1847  /* propagate the branch length of subhalos from snapshot "num-1" to those in snapshot "num" via the
1848  * progenitor information */
1849  /* we disable this as it could cause a spurious jump to a larger progenitor that shed a few particles to a newly formed group */
1850  // for(int num = 1; num <= LastSnapShotNr; num++)
1851  // halotrees_propagate_max_branch_length_progenitors(num);
1852 
1853  mpi_printf("MERGERTREE: determination of main progenitor branch done\n");
1854 }
1855 
1856 /* This function determines the maximum branch size of subhalos from snapshot "num-1" to those snapshot "num" via the
1857  * descendant information
1858  */
1859 void mergertree::halotrees_propagate_max_branch_length_descendants(int num)
1860 {
1861  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1862  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1863  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1864  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1865 
1866  /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1867 
1868  /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1869  long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1870  long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1871 
1872  /* this value flags that there are no subhalos on the corresponding processor */
1873  long long empty = HALONR_MAX;
1874 
1875  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1876  sizeof(long long), MPI_BYTE, Communicator);
1877  MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1878  list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1879 
1880  int nexport = 0, nimport = 0;
1881 
1882  halotrees_propagate_data *import_data = NULL, *export_data = NULL;
1883 
1884  /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
1885  * let's create an auxiliary list for facilitating this.
1886  */
1887  halotrees_propagate_data *sorted_list =
1888  (halotrees_propagate_data *)Mem.mymalloc("sorted_list", Cats[num - 1].Nsubhalos * sizeof(halotrees_propagate_data));
1889 
1890  for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1891  {
1892  sorted_list[i].DescSubhaloNr = Cats[num - 1].Descendants[i].DescSubhaloNr;
1893  sorted_list[i].SubhaloNr = Cats[num - 1].Descendants[i].PrevSubhaloNr;
1894  sorted_list[i].MaxLenProgBranch = Cats[num - 1].SubExt[i].MaxLenProgBranch;
1895  }
1896 
1897  mycxxsort(sorted_list, sorted_list + Cats[num - 1].Nsubhalos, compare_halotrees_propagate_data_DescSubhaloNr);
1898 
1899  /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1900  for(int mode = 0; mode < 2; mode++)
1901  {
1902  for(int i = 0; i < NTask; i++)
1903  Send_count[i] = 0;
1904 
1905  int target = 0;
1906 
1907  for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1908  {
1909  while(target < NTask - 1 &&
1910  (list_min_subhalonr[target] == empty || sorted_list[i].DescSubhaloNr > list_max_subhalonr[target]))
1911  target++;
1912 
1913  if(list_min_subhalonr[target] != empty && sorted_list[i].DescSubhaloNr >= list_min_subhalonr[target] &&
1914  sorted_list[i].DescSubhaloNr <= list_max_subhalonr[target])
1915  {
1916  if(mode == 0)
1917  Send_count[target]++;
1918  else
1919  {
1920  int off = Send_offset[target] + Send_count[target]++;
1921 
1922  export_data[off].DescSubhaloNr = sorted_list[i].DescSubhaloNr;
1923  export_data[off].SubhaloNr = sorted_list[i].SubhaloNr;
1924  export_data[off].MaxLenProgBranch = sorted_list[i].MaxLenProgBranch;
1925  }
1926  }
1927  }
1928 
1929  if(mode == 0)
1930  {
1931  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1932  Recv_offset[0] = Send_offset[0] = 0;
1933  for(int j = 0; j < NTask; j++)
1934  {
1935  nimport += Recv_count[j];
1936  nexport += Send_count[j];
1937  if(j > 0)
1938  {
1939  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1940  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1941  }
1942  }
1943 
1944  export_data = (halotrees_propagate_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_propagate_data));
1945  import_data = (halotrees_propagate_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_propagate_data));
1946  }
1947  }
1948 
1949  /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1950  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1951  {
1952  int recvTask = ThisTask ^ ngrp;
1953  if(recvTask < NTask)
1954  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1955  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
1956  recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
1957  Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1958  MPI_STATUS_IGNORE);
1959  }
1960 
1961  /* the collection of incoming data is not necessarily sorted according to DescSubhaloNr, so we need to sort it for efficient
1962  * matching
1963  */
1964  mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_DescSubhaloNr);
1965 
1966  /* now do the matching */
1967  for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1968  {
1969  if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].DescSubhaloNr)
1970  i++;
1971  else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].DescSubhaloNr)
1972  j++;
1973  else
1974  {
1975  if(import_data[j].MaxLenProgBranch > Cats[num].SubExt[i].MaxLenProgBranch - Cats[num].Subhalo[i].Len)
1976  {
1977  Cats[num].SubExt[i].MaxLenProgBranch = import_data[j].MaxLenProgBranch + Cats[num].Subhalo[i].Len;
1978  Cats[num].Progenitors[i].MainProgSubhaloNr = import_data[j].SubhaloNr;
1979  }
1980 
1981  j++;
1982  }
1983  }
1984 
1985  Mem.myfree(import_data);
1986  Mem.myfree(export_data);
1987  Mem.myfree(sorted_list);
1988  Mem.myfree(list_max_subhalonr);
1989  Mem.myfree(list_min_subhalonr);
1990 
1991  Mem.myfree(Recv_offset);
1992  Mem.myfree(Recv_count);
1993  Mem.myfree(Send_offset);
1994  Mem.myfree(Send_count);
1995 }
1996 
1997 /* This function determines the maximum branch size of subhalos from snapshot "num-1" to those snapshot "num" via the
1998  * progenitor information
1999  */
2000 void mergertree::halotrees_propagate_max_branch_length_progenitors(int num)
2001 {
2002  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
2003  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
2004  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
2005  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
2006 
2007  /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
2008 
2009  /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
2010  long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
2011  long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
2012 
2013  /* this value flags that there are no subhalos on the corresponding processor */
2014  long long empty = HALONR_MAX;
2015 
2016  MPI_Allgather(Cats[num - 1].Nsubhalos > 0 ? &Cats[num - 1].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
2017  list_min_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
2018  MPI_Allgather(Cats[num - 1].Nsubhalos > 0 ? &Cats[num - 1].Subhalo[Cats[num - 1].Nsubhalos - 1].SubhaloNr : &empty,
2019  sizeof(long long), MPI_BYTE, list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
2020 
2021  int nexport = 0, nimport = 0;
2022 
2023  halotrees_propagate_data *import_data = NULL, *export_data = NULL;
2024 
2025  /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
2026  * let's create an auxiliary list for facilitating this.
2027  */
2028  halotrees_propagate_data *sorted_list =
2029  (halotrees_propagate_data *)Mem.mymalloc("sorted_list", Cats[num].Nsubhalos * sizeof(halotrees_propagate_data));
2030 
2031  for(int i = 0; i < Cats[num].Nsubhalos; i++)
2032  {
2033  sorted_list[i].ProgSubhaloNr = Cats[num].Progenitors[i].ProgSubhaloNr;
2034  sorted_list[i].index = i;
2035  }
2036 
2037  mycxxsort(sorted_list, sorted_list + Cats[num].Nsubhalos, compare_halotrees_propagate_data_ProgSubhaloNr);
2038 
2039  /* for communication bookkeeping reasons, we traverse the counting pattern twice */
2040  for(int mode = 0; mode < 2; mode++)
2041  {
2042  for(int i = 0; i < NTask; i++)
2043  Send_count[i] = 0;
2044 
2045  int target = 0;
2046 
2047  for(int i = 0; i < Cats[num].Nsubhalos; i++)
2048  {
2049  while(target < NTask - 1 &&
2050  (list_min_subhalonr[target] == empty || sorted_list[i].ProgSubhaloNr > list_max_subhalonr[target]))
2051  target++;
2052 
2053  if(list_min_subhalonr[target] != empty && sorted_list[i].ProgSubhaloNr >= list_min_subhalonr[target] &&
2054  sorted_list[i].ProgSubhaloNr <= list_max_subhalonr[target])
2055  {
2056  if(mode == 0)
2057  Send_count[target]++;
2058  else
2059  {
2060  int off = Send_offset[target] + Send_count[target]++;
2061 
2062  export_data[off].ProgSubhaloNr = sorted_list[i].ProgSubhaloNr;
2063  export_data[off].index = sorted_list[i].index;
2064  }
2065  }
2066  }
2067 
2068  if(mode == 0)
2069  {
2070  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
2071  Recv_offset[0] = Send_offset[0] = 0;
2072  for(int j = 0; j < NTask; j++)
2073  {
2074  nimport += Recv_count[j];
2075  nexport += Send_count[j];
2076  if(j > 0)
2077  {
2078  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
2079  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
2080  }
2081  }
2082 
2083  export_data = (halotrees_propagate_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_propagate_data));
2084  import_data = (halotrees_propagate_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_propagate_data));
2085  }
2086  }
2087 
2088  /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
2089  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
2090  {
2091  int recvTask = ThisTask ^ ngrp;
2092  if(recvTask < NTask)
2093  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
2094  MPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
2095  recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
2096  Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
2097  MPI_STATUS_IGNORE);
2098  }
2099 
2100  for(int i = 0; i < nimport; i++)
2101  import_data[i].orig_order = i;
2102 
2103  /* the collection of incoming data is not necessarily sorted according to ProgSubhaloNr, so we need to sort it for efficient
2104  * matching
2105  */
2106  mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_ProgSubhaloNr);
2107 
2108  /* now do the matching */
2109  for(int i = 0, j = 0; i < Cats[num - 1].Nsubhalos && j < nimport;)
2110  {
2111  if(Cats[num - 1].Subhalo[i].SubhaloNr < import_data[j].ProgSubhaloNr)
2112  i++;
2113  else if(Cats[num - 1].Subhalo[i].SubhaloNr > import_data[j].ProgSubhaloNr)
2114  j++;
2115  else
2116  {
2117  import_data[j].MaxLenProgBranch = Cats[num - 1].SubExt[i].MaxLenProgBranch;
2118  import_data[j].SubhaloNr = Cats[num - 1].Subhalo[i].SubhaloNr;
2119 
2120  if(Cats[num - 1].Subhalo[i].SubhaloNr != import_data[j].ProgSubhaloNr)
2121  Terminate("Cats[num - 1].Subhalo[i].SubhaloNr != import_data[j].ProgSubhaloNr)");
2122 
2123  j++;
2124  }
2125  }
2126 
2127  /* reestablish original order */
2128  mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_orig_order);
2129 
2130  /* send data back */
2131  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
2132  {
2133  int recvTask = ThisTask ^ ngrp;
2134  if(recvTask < NTask)
2135  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
2136  MPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
2137  recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]],
2138  Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
2139  MPI_STATUS_IGNORE);
2140  }
2141 
2142  /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
2143  for(int i = 0; i < nexport; i++)
2144  {
2145  int q = export_data[i].index;
2146 
2147  if(export_data[i].MaxLenProgBranch > Cats[num].SubExt[q].MaxLenProgBranch - Cats[num].Subhalo[q].Len)
2148  {
2149  Cats[num].SubExt[q].MaxLenProgBranch = export_data[i].MaxLenProgBranch + Cats[num].Subhalo[q].Len;
2150  Cats[num].Progenitors[q].MainProgSubhaloNr = export_data[i].SubhaloNr;
2151  }
2152  }
2153 
2154  Mem.myfree(import_data);
2155  Mem.myfree(export_data);
2156  Mem.myfree(sorted_list);
2157  Mem.myfree(list_max_subhalonr);
2158  Mem.myfree(list_min_subhalonr);
2159 
2160  Mem.myfree(Recv_offset);
2161  Mem.myfree(Recv_count);
2162  Mem.myfree(Send_offset);
2163  Mem.myfree(Send_count);
2164 }
2165 
2166 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: fof_io.h:20
#define MAXITER
Definition: constants.h:305
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
float MyFloat
Definition: dtypes.h:86
#define HALONR_MAX
Definition: idstorage.h:20
#define Terminate(...)
Definition: macros.h:19
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
#define TAG_DENS_B
Definition: mpi_utils.h:51
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
double get_random_number(void)
Definition: system.cc:49