GADGET-4
descendant.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef MERGERTREE
15 
16 #include <gsl/gsl_rng.h>
17 #include <hdf5.h>
18 #include <math.h>
19 #include <mpi.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26 
27 #include "../data/allvars.h"
28 #include "../data/dtypes.h"
29 #include "../data/mymalloc.h"
30 #include "../fof/fof.h"
31 #include "../io/hdf5_util.h"
32 #include "../io/io.h"
33 #include "../logs/timer.h"
34 #include "../main/main.h"
35 #include "../main/simulation.h"
36 #include "../mergertree/io_descendant.h"
37 #include "../mergertree/io_progenitors.h"
38 #include "../mergertree/mergertree.h"
39 #include "../mpi_utils/mpi_utils.h"
40 #include "../sort/parallel_sort.h"
41 #include "../subfind/subfind.h"
42 #include "../system/system.h"
43 
44 /* This function allocates and fills the "Descendants" array, which gives the number of the descendant subhalo in the newly created
45  * subhalo catalogue for every subhalo from the previous catalogue.
46  */
47 void mergertree::mergertree_determine_descendants_postproc(int num)
48 {
49  Descendants = (desc_list *)Mem.mymalloc_movable(&Descendants, "Descendants", (PrevNsubhalos + 1) * sizeof(desc_list));
50  Progenitors = (prog_list *)Mem.mymalloc_movable(&Progenitors, "Progenitors", (CurrNsubhalos + 1) * sizeof(prog_list));
51 
52  Sp->NumPart = MtrP_NumPart;
53 
54  /* allocate a work structure */
55  desc = (desc_partdata *)Mem.mymalloc_movable(&desc, "desc", sizeof(desc_partdata) * Sp->NumPart);
56 
57  /* let's fill in some relevant data into the work-structure */
58  for(int i = 0; i < Sp->NumPart; i++)
59  {
60  desc[i].CurrSubhaloNr.set(MtrP[i].SubhaloNr);
61  desc[i].CurrRankInSubhalo.set(MtrP[i].RankInSubhalo);
62 
63  desc[i].PrevSubhaloNr.set(MtrP[i].PrevSubhaloNr);
64  desc[i].PrevRankInSubhalo.set(MtrP[i].PrevRankInSubhalo);
65 
66  if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() != HALONR_MAX)
67  Terminate("strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (long long)desc[i].PrevSubhaloNr.get(),
68  (long long)PrevTotNsubhalos);
69  }
70 
71  mergertree_determine_descendants(num);
72 
73  mpi_printf("DONE!\n");
74 }
75 
76 /* This function allocates and fills the "Descendants" array, which gives the number of the descendant subhalo in the newly created
77  * subhalo catalogue for every subhalo from the previous catalogue.
78  */
79 void mergertree::mergertree_determine_descendants_on_the_fly(int num)
80 {
81  if(num == 0) // for the first output, we don't yet have anything to link
82  return;
83 
84  Descendants = (desc_list *)Mem.mymalloc_movable(&Descendants, "Descendants", (PrevNsubhalos + 1) * sizeof(desc_list));
85  Progenitors = (prog_list *)Mem.mymalloc_movable(&Progenitors, "Progenitors", (CurrNsubhalos + 1) * sizeof(prog_list));
86 
87  /* allocate a work structure */
88  desc = (desc_partdata *)Mem.mymalloc_movable(&desc, "desc", sizeof(desc_partdata) * Sp->NumPart);
89 
90  /* Let's fill in some relevant data into a work-structure defined for every particle
91  * For each particle, we know the new and the old subhalo number, as well as its rank in the previous subhalo
92  */
93  for(int i = 0; i < Sp->NumPart; i++)
94  {
95  desc[i].CurrSubhaloNr = Sp->PS[i].SubhaloNr;
96  desc[i].CurrRankInSubhalo = Sp->PS[i].RankInSubhalo;
97 
98  desc[i].PrevSubhaloNr = Sp->P[i].PrevSubhaloNr;
99  desc[i].PrevRankInSubhalo = Sp->P[i].PrevRankInSubhalo;
100 
101  if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() != HALONR_MAX)
102  Terminate("strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (long long)desc[i].PrevSubhaloNr.get(),
103  (long long)PrevTotNsubhalos);
104  }
105 
106  mergertree_determine_descendants(num);
107 
108  Mem.myfree(desc);
109  Mem.myfree(Progenitors);
110  Mem.myfree(Descendants);
111 }
112 
113 void mergertree::mergertree_determine_descendants(int num)
114 {
115  /* determine matching pieces of subhalos and their mutual scores */
116  int nmatch = mergertree_find_matching_segments_and_scores();
117 
118  /* select the main descendants */
119  mergertree_select_maximum_score_descendants(nmatch);
120 
121  /* select the main progenitors */
122  mergertree_select_maximum_score_progenitors(nmatch);
123 
124  /* let's determine the next-progenitor fields, which chain up those subhalos that have the same descendant */
125  mergertree_chain_up_progenitors_with_same_descendant();
126 
127  /* set first progenitor field for chaining up those subhalos that have the same descendant */
128  mergertree_set_first_progenitor_with_same_descendant();
129 
130  /* let's determine the next-descendant fields, which chain up those subhalos that have the same progenitor */
131  mergertree_chain_up_descendants_with_same_progenitor();
132 
133  /* set first descendant field for chaining up those subhalos that have the same progenitor */
134  mergertree_set_first_descendant_with_same_progenitor();
135 
136  /**** write stuff to files *****/
137 
138  descendant_io Desc(this, this->Communicator, All.SnapFormat); /* get an I/O object */
139  Desc.mergertree_save_descendants(num);
140 
141  progenitors_io Prog(this, this->Communicator, All.SnapFormat);
142  Prog.mergertree_save_progenitors(num);
143 
144  /* calculate and output some statistics to characterize linking */
145 
146  int count_links = 0;
147  for(int i = 0; i < PrevNsubhalos; i++)
148  if(Descendants[i].DescSubhaloNr != HALONR_MAX)
149  count_links++;
150 
151  long long tot_count_links;
152  sumup_large_ints(1, &count_links, &tot_count_links, Communicator);
153 
154  mpi_printf("MERGERTREE: Was able to identify descendants for %lld out of %lld subhalos, i.e. for a fraction of %g\n",
155  tot_count_links, (long long)PrevTotNsubhalos, tot_count_links / (PrevTotNsubhalos + SMALLNUM));
156 
157  int count_splits = 0;
158  for(int i = 0; i < CurrNsubhalos; i++)
159  if(Progenitors[i].NextDescSubhaloNr != HALONR_MAX)
160  count_splits++;
161 
162  long long tot_count_splits;
163  sumup_large_ints(1, &count_splits, &tot_count_splits, Communicator);
164 
165  mpi_printf("MERGERTREE: We have found secondary descendants for %lld halos out of %lld subhalos, i.e. for a fraction of %g\n",
166  tot_count_splits, (long long)CurrTotNsubhalos, tot_count_splits / (CurrTotNsubhalos + SMALLNUM));
167 }
168 
169 int mergertree::mergertree_find_matching_segments_and_scores(void)
170 {
171  /* let's eliminate unbound particles from the work list */
172  int nmatch = Sp->NumPart;
173 
174  for(int i = 0; i < nmatch; i++)
175  if(desc[i].CurrSubhaloNr.get() == HALONR_MAX || desc[i].PrevSubhaloNr.get() == HALONR_MAX)
176  {
177  desc[i] = desc[nmatch - 1];
178  nmatch--;
179  i--;
180  }
181 
182  /* let's do the scoring */
183  for(int i = 0; i < nmatch; i++)
184  {
185  if(desc[i].PrevRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
186  desc[i].DescScore = 1.0 / (1 + pow(desc[i].PrevRankInSubhalo.get(), 0.5));
187  else
188  desc[i].DescScore = 0;
189 
190  if(desc[i].CurrRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
191  desc[i].ProgScore = 1.0 / (1 + pow(desc[i].CurrRankInSubhalo.get(), 0.5));
192  else
193  desc[i].ProgScore = 0;
194  }
195 
196  /* Now we sort the list such that the old subhalos are grouped together, and the new subhalo numbers are consecutive within them
197  */
198  mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_PrevSubNr_NewSubNr, Communicator);
199 
200  /* eliminate duplicate matched pairs on local processor and sum up the scores
201  */
202  int start = 0;
203  int count = 0;
204 
205  if(nmatch > 0)
206  count = 1;
207 
208  for(int i = 1; i < nmatch; i++)
209  if(desc[i].PrevSubhaloNr == desc[start].PrevSubhaloNr && desc[i].CurrSubhaloNr == desc[start].CurrSubhaloNr)
210  {
211  desc[start].DescScore += desc[i].DescScore;
212  desc[start].ProgScore += desc[i].ProgScore;
213  }
214  else
215  {
216  desc[count] = desc[i];
217  start = count;
218  count++;
219  }
220 
221  nmatch = count;
222 
223  /* now we consolidate duplicate matched pairs on different processors. The list is still ordered, but there could be gaps
224  * (i.e. processors with only one or potentially zero entries)
225  */
226 
227  /* obtain last and first element of each processor, and the counts from each processor */
228  desc_partdata *desc_first = (desc_partdata *)Mem.mymalloc("desc_first", NTask * sizeof(desc_partdata));
229  desc_partdata *desc_last = (desc_partdata *)Mem.mymalloc("desc_last", NTask * sizeof(desc_partdata));
230  int *nmatch_list = (int *)Mem.mymalloc("nmatch_list", NTask * sizeof(int));
231 
232  MPI_Allgather(&desc[0], sizeof(desc_partdata), MPI_BYTE, desc_first, sizeof(desc_partdata), MPI_BYTE,
233  Communicator); /* note: the 0th element is guaranteed to be allocated */
234  MPI_Allgather(&desc[nmatch > 0 ? nmatch - 1 : 0], sizeof(desc_partdata), MPI_BYTE, desc_last, sizeof(desc_partdata), MPI_BYTE,
235  Communicator);
236  MPI_Allgather(&nmatch, 1, MPI_INT, nmatch_list, 1, MPI_INT, Communicator);
237 
238  /* We go from the back through the tasks, and for the first element in each case, we move it ahead in the list as far as possible.
239  * Eliminated items are marked through DescScore = -1, and are then filtered out later.
240  */
241  for(int i = NTask - 1; i > 0; i--)
242  if(nmatch_list[i] > 0)
243  {
244  int target = -1;
245 
246  for(int j = i - 1; j >= 0; j--)
247  {
248  if(nmatch_list[j] > 0)
249  {
250  if(nmatch_list[j] > 1)
251  {
252  if(desc_last[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
253  desc_last[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
254  target = j;
255  }
256  else /* nmatch_list[j] == 1 */
257  {
258  if(desc_first[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
259  desc_first[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
260  target = j;
261  }
262 
263  break;
264  }
265  }
266 
267  if(target >= 0)
268  {
269  if(nmatch_list[target] > 1)
270  {
271  desc_last[target].DescScore += desc_first[i].DescScore;
272  desc_last[target].ProgScore += desc_first[i].ProgScore;
273 
274  if(ThisTask == target)
275  {
276  desc[nmatch - 1].DescScore += desc_first[i].DescScore;
277  desc[nmatch - 1].ProgScore += desc_first[i].ProgScore;
278  }
279  }
280  else
281  {
282  desc_first[target].DescScore += desc_first[i].DescScore;
283  desc_first[target].ProgScore += desc_first[i].ProgScore;
284 
285  if(ThisTask == target)
286  {
287  desc[0].DescScore += desc_first[i].DescScore;
288  desc[0].ProgScore += desc_first[i].ProgScore;
289  }
290  }
291 
292  desc_first[i].DescScore = -1;
293  desc_first[i].ProgScore = -1;
294 
295  if(ThisTask == i)
296  {
297  desc[0].DescScore = -1;
298  desc[0].ProgScore = -1;
299  }
300  }
301  }
302 
303  Mem.myfree(nmatch_list);
304  Mem.myfree(desc_last);
305  Mem.myfree(desc_first);
306 
307  /* now eliminate the ones with negative score
308  */
309  if(nmatch > 0 && desc[0].DescScore < 0)
310  {
311  nmatch--;
312  memmove(desc, desc + 1, nmatch * sizeof(desc_partdata));
313  }
314 
315  return nmatch;
316 }
317 
318 void mergertree::mergertree_chain_up_descendants_with_same_progenitor(void)
319 {
320  /* sort by progenitor to bring equal ones next to each other */
321  mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
322 
323  prog_list *elem_first = (prog_list *)Mem.mymalloc("elem_first", NTask * sizeof(prog_list));
324  prog_list *elem_last = (prog_list *)Mem.mymalloc("elem_last", NTask * sizeof(prog_list));
325 
326  /* note: the 0th element is guaranteed to be allocated even on ranks with zero CurrNsubhalos */
327  MPI_Allgather(&Progenitors[0], sizeof(prog_list), MPI_BYTE, elem_first, sizeof(prog_list), MPI_BYTE, Communicator);
328  MPI_Allgather(&Progenitors[CurrNsubhalos > 0 ? CurrNsubhalos - 1 : 0], sizeof(prog_list), MPI_BYTE, elem_last, sizeof(prog_list),
329  MPI_BYTE, Communicator);
330 
331  /* get list of the subhalo count on each processor, and the cumulative number stored before */
332  int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
333  MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
334 
335  int next_task = -1;
336  for(int i = ThisTask + 1; i < NTask; i++)
337  if(tab_CurrNsubhalos[i] > 0)
338  {
339  next_task = i;
340  break;
341  }
342 
343  int prev_task = -1;
344  for(int i = ThisTask - 1; i >= 0; i--)
345  if(tab_CurrNsubhalos[i] > 0)
346  {
347  prev_task = i;
348  break;
349  }
350 
351  for(int i = 0; i < CurrNsubhalos; i++)
352  {
353  if(i < CurrNsubhalos - 1)
354  {
355  if(Progenitors[i].ProgSubhaloNr == Progenitors[i + 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
356  Progenitors[i].NextDescSubhaloNr = Progenitors[i + 1].SubhaloNr;
357  else
358  Progenitors[i].NextDescSubhaloNr = HALONR_MAX;
359  }
360  else
361  {
362  if(next_task >= 0 && Progenitors[i].ProgSubhaloNr == elem_first[next_task].ProgSubhaloNr &&
363  Progenitors[i].ProgSubhaloNr != HALONR_MAX)
364  Progenitors[i].NextDescSubhaloNr = elem_first[next_task].SubhaloNr;
365  else
366  Progenitors[i].NextDescSubhaloNr = HALONR_MAX;
367  }
368 
369  if(i > 0)
370  {
371  if(Progenitors[i].ProgSubhaloNr != Progenitors[i - 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
372  Progenitors[i].FirstDescFlag = 1; /* flags the first progenitors */
373  else
374  Progenitors[i].FirstDescFlag = 0;
375  }
376  else
377  {
378  if(Progenitors[i].ProgSubhaloNr != HALONR_MAX &&
379  (ThisTask == 0 || (prev_task >= 0 && Progenitors[i].ProgSubhaloNr != elem_last[prev_task].ProgSubhaloNr)))
380  Progenitors[i].FirstDescFlag = 1; /* flags the first progenitors */
381  else
382  Progenitors[i].FirstDescFlag = 0;
383  }
384  }
385 
386  Mem.myfree(tab_CurrNsubhalos);
387  Mem.myfree(elem_last);
388  Mem.myfree(elem_first);
389 
390  /* bring back into original order */
391  mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
392 }
393 
394 /* This function determines the next progenitor field, which chains up those subhalos that have the same descendant */
395 void mergertree::mergertree_chain_up_progenitors_with_same_descendant(void)
396 {
397  /* sort by descendant to bring equal ones next to each other */
398  mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
399 
400  desc_list *elem_first = (desc_list *)Mem.mymalloc("elem_first", NTask * sizeof(desc_list));
401  desc_list *elem_last = (desc_list *)Mem.mymalloc("elem_last", NTask * sizeof(desc_list));
402 
403  /* note: the 0th element is guaranteed to be allocated even on ranks with zero PrevNsubhalos */
404  MPI_Allgather(&Descendants[0], sizeof(desc_list), MPI_BYTE, elem_first, sizeof(desc_list), MPI_BYTE, Communicator);
405  MPI_Allgather(&Descendants[PrevNsubhalos > 0 ? PrevNsubhalos - 1 : 0], sizeof(desc_list), MPI_BYTE, elem_last, sizeof(desc_list),
406  MPI_BYTE, Communicator);
407 
408  /* get list of the subhalo count on each processor, and the cumulative number stored before */
409  int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
410  MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
411 
412  int next_task = -1;
413  for(int i = ThisTask + 1; i < NTask; i++)
414  if(tab_PrevNsubhalos[i] > 0)
415  {
416  next_task = i;
417  break;
418  }
419 
420  int prev_task = -1;
421  for(int i = ThisTask - 1; i >= 0; i--)
422  if(tab_PrevNsubhalos[i] > 0)
423  {
424  prev_task = i;
425  break;
426  }
427 
428  for(int i = 0; i < PrevNsubhalos; i++)
429  {
430  if(i < PrevNsubhalos - 1)
431  {
432  if(Descendants[i].DescSubhaloNr == Descendants[i + 1].DescSubhaloNr && Descendants[i].DescSubhaloNr != HALONR_MAX)
433  Descendants[i].NextProgSubhaloNr = Descendants[i + 1].PrevSubhaloNr;
434  else
435  Descendants[i].NextProgSubhaloNr = HALONR_MAX;
436  }
437  else
438  {
439  if(next_task >= 0 && Descendants[i].DescSubhaloNr == elem_first[next_task].DescSubhaloNr &&
440  Descendants[i].DescSubhaloNr != HALONR_MAX)
441  Descendants[i].NextProgSubhaloNr = elem_first[next_task].PrevSubhaloNr;
442  else
443  Descendants[i].NextProgSubhaloNr = HALONR_MAX;
444  }
445 
446  if(i > 0)
447  {
448  if(Descendants[i].DescSubhaloNr != Descendants[i - 1].DescSubhaloNr && Descendants[i].DescSubhaloNr != HALONR_MAX)
449  Descendants[i].FirstProgFlag = 1; /* flags the first progenitors */
450  else
451  Descendants[i].FirstProgFlag = 0;
452  }
453  else
454  {
455  if(Descendants[i].DescSubhaloNr != HALONR_MAX &&
456  (ThisTask == 0 || (prev_task >= 0 && Descendants[i].DescSubhaloNr != elem_last[prev_task].DescSubhaloNr)))
457  Descendants[i].FirstProgFlag = 1; /* flags the first progenitors */
458  else
459  Descendants[i].FirstProgFlag = 0;
460  }
461  }
462 
463  Mem.myfree(tab_PrevNsubhalos);
464  Mem.myfree(elem_last);
465  Mem.myfree(elem_first);
466 
467  /* bring back into original order */
468  mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
469 }
470 
471 /******** set first progenitor field ****/
472 void mergertree::mergertree_set_first_progenitor_with_same_descendant(void)
473 {
474  /* sort by descendant to bring equal ones next to each other */
475  mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
476 
477  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
478  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
479  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
480  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
481 
482  int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
483  MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
484 
485  long long cumul_currnsubhalos = 0;
486  for(int i = 0; i < ThisTask; i++)
487  cumul_currnsubhalos += tab_CurrNsubhalos[i];
488 
489  struct pair_data
490  {
491  long long subhalonr;
492  long long firstprognr;
493  };
494 
495  pair_data *send_data = NULL;
496  pair_data *recv_data = NULL;
497  int nexport = 0, nimport = 0;
498 
499  for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
500  {
501  for(int i = 0; i < NTask; i++)
502  Send_count[i] = 0;
503 
504  int task = 0;
505  long long first = 0;
506 
507  for(int i = 0; i < PrevNsubhalos; i++)
508  {
509  if(Descendants[i].FirstProgFlag && Descendants[i].DescSubhaloNr != HALONR_MAX)
510  {
511  while(task < NTask - 1 && Descendants[i].DescSubhaloNr >= first + tab_CurrNsubhalos[task])
512  {
513  first += tab_CurrNsubhalos[task];
514  task++;
515  }
516 
517  if(mode == 0)
518  Send_count[task]++;
519  else
520  {
521  int off = Send_offset[task] + Send_count[task]++;
522 
523  send_data[off].subhalonr = Descendants[i].DescSubhaloNr;
524  send_data[off].firstprognr = Descendants[i].PrevSubhaloNr;
525  }
526  }
527  }
528 
529  if(mode == 0) // prepare offset tables
530  {
531  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
532 
533  Recv_offset[0] = 0;
534  Send_offset[0] = 0;
535 
536  for(int j = 0; j < NTask; j++)
537  {
538  nexport += Send_count[j];
539  nimport += Recv_count[j];
540 
541  if(j > 0)
542  {
543  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
544  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
545  }
546  }
547 
548  send_data = (pair_data *)Mem.mymalloc("pair_data", nexport * sizeof(pair_data));
549  recv_data = (pair_data *)Mem.mymalloc("pair_data", nimport * sizeof(pair_data));
550  }
551  }
552 
553  /* exchange data */
554  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
555  {
556  int recvTask = ThisTask ^ ngrp;
557 
558  if(recvTask < NTask)
559  {
560  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
561  MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
562  &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
563  Communicator, MPI_STATUS_IGNORE);
564  }
565  }
566 
567  for(int i = 0; i < CurrNsubhalos; i++)
568  Progenitors[i].FirstProgSubhaloNr = HALONR_MAX;
569 
570  for(int i = 0; i < nimport; i++)
571  {
572  int off = recv_data[i].subhalonr - cumul_currnsubhalos;
573 
574  if(off < 0 || off >= CurrNsubhalos)
575  Terminate("off = %d CurrNsubhalos = %d", off, CurrNsubhalos);
576 
577  Progenitors[off].FirstProgSubhaloNr = recv_data[i].firstprognr;
578  }
579 
580  Mem.myfree(recv_data);
581  Mem.myfree(send_data);
582 
583  Mem.myfree(tab_CurrNsubhalos);
584 
585  Mem.myfree(Recv_offset);
586  Mem.myfree(Recv_count);
587  Mem.myfree(Send_offset);
588  Mem.myfree(Send_count);
589 
590  /* bring back into original order */
591  mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
592 }
593 
594 /********** pick the progenitor with the maximum score *****/
595 void mergertree::mergertree_select_maximum_score_progenitors(int nmatch)
596 {
597  mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_NewSubNr_PrevSubNr, Communicator);
598 
599  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
600  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
601  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
602  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
603 
604  int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
605  MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
606 
607  long long cumul_currnsubhalos = 0;
608  for(int i = 0; i < ThisTask; i++)
609  cumul_currnsubhalos += tab_CurrNsubhalos[i];
610 
611  desc_partdata *send_data = NULL;
612  desc_partdata *recv_data = NULL;
613  int nexport = 0, nimport = 0;
614  for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
615  {
616  for(int i = 0; i < NTask; i++)
617  Send_count[i] = 0;
618 
619  int task = 0;
620  unsigned long long first = 0;
621  for(int i = 0; i < nmatch; i++)
622  {
623  while(task < NTask - 1 && desc[i].CurrSubhaloNr.get() >= first + tab_CurrNsubhalos[task])
624  {
625  first += tab_CurrNsubhalos[task];
626  task++;
627  }
628 
629  if(mode == 0)
630  Send_count[task]++;
631  else
632  {
633  int off = Send_offset[task] + Send_count[task]++;
634 
635  send_data[off] = desc[i];
636  }
637  }
638 
639  if(mode == 0) // prepare offset tables
640  {
641  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
642 
643  Recv_offset[0] = 0;
644  Send_offset[0] = 0;
645 
646  for(int j = 0; j < NTask; j++)
647  {
648  nexport += Send_count[j];
649  nimport += Recv_count[j];
650 
651  if(j > 0)
652  {
653  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
654  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
655  }
656  }
657 
658  send_data = (desc_partdata *)Mem.mymalloc("send_data", nexport * sizeof(desc_partdata));
659  recv_data = (desc_partdata *)Mem.mymalloc("recv_data", nimport * sizeof(desc_partdata));
660  }
661  }
662 
663  /* exchange data */
664  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
665  {
666  int recvTask = ThisTask ^ ngrp;
667 
668  if(recvTask < NTask)
669  {
670  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
671  MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask,
672  TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE,
673  recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
674  }
675  }
676 
677  for(int i = 0; i < CurrNsubhalos; i++)
678  {
679  Progenitors[i].SubhaloNr = cumul_currnsubhalos + i;
680  Progenitors[i].ProgSubhaloNr = HALONR_MAX;
681  Progenitors[i].MaxScore = 0;
682  }
683 
684  for(int i = 0; i < nimport; i++)
685  {
686  int index = recv_data[i].CurrSubhaloNr.get() - cumul_currnsubhalos;
687 
688  if(index < 0 || index >= CurrNsubhalos)
689  Terminate("index=%d CurrNsubhalos=%d", index, CurrNsubhalos);
690 
691  if(recv_data[i].ProgScore > Progenitors[index].MaxScore)
692  {
693  Progenitors[index].MaxScore = recv_data[i].ProgScore;
694  Progenitors[index].ProgSubhaloNr = recv_data[i].PrevSubhaloNr.get();
695  }
696  }
697 
698  Mem.myfree(recv_data);
699  Mem.myfree(send_data);
700 
701  Mem.myfree(tab_CurrNsubhalos);
702 
703  Mem.myfree(Recv_offset);
704  Mem.myfree(Recv_count);
705  Mem.myfree(Send_offset);
706  Mem.myfree(Send_count);
707 }
708 
709 /********** determine the descendant with the maximum score *****/
710 void mergertree::mergertree_select_maximum_score_descendants(int nmatch)
711 {
712  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
713  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
714  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
715  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
716 
717  /* get lists of the subhalo count on each processors, and the cumulative number stored before */
718  int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
719  MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
720 
721  long long cumul_prevnsubhalos = 0;
722  for(int i = 0; i < ThisTask; i++)
723  cumul_prevnsubhalos += tab_PrevNsubhalos[i];
724 
725  desc_partdata *send_data = NULL;
726  desc_partdata *recv_data = NULL;
727  int nexport = 0, nimport = 0;
728  for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
729  {
730  for(int i = 0; i < NTask; i++)
731  Send_count[i] = 0;
732 
733  int task = 0;
734  unsigned long long first = 0;
735  for(int i = 0; i < nmatch; i++)
736  {
737  if(PrevTotNsubhalos < 1)
738  Terminate("PrevTotNsubhalos = %lld", PrevTotNsubhalos);
739 
740  while(task < NTask - 1 && desc[i].PrevSubhaloNr.get() >= first + tab_PrevNsubhalos[task])
741  {
742  first += tab_PrevNsubhalos[task];
743  task++;
744  }
745 
746  if(mode == 0)
747  Send_count[task]++;
748  else
749  {
750  int off = Send_offset[task] + Send_count[task]++;
751 
752  send_data[off] = desc[i];
753  }
754  }
755 
756  if(mode == 0) // prepare offset tables
757  {
758  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
759 
760  Recv_offset[0] = 0;
761  Send_offset[0] = 0;
762 
763  for(int j = 0; j < NTask; j++)
764  {
765  nexport += Send_count[j];
766  nimport += Recv_count[j];
767 
768  if(j > 0)
769  {
770  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
771  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
772  }
773  }
774 
775  send_data = (desc_partdata *)Mem.mymalloc("send_data", nexport * sizeof(desc_partdata));
776  recv_data = (desc_partdata *)Mem.mymalloc("recv_data", nimport * sizeof(desc_partdata));
777  }
778  }
779 
780  /* exchange data */
781  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
782  {
783  int recvTask = ThisTask ^ ngrp;
784 
785  if(recvTask < NTask)
786  {
787  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
788  MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask,
789  TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE,
790  recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
791  }
792  }
793 
794  for(int i = 0; i < PrevNsubhalos; i++)
795  {
796  Descendants[i].PrevSubhaloNr = cumul_prevnsubhalos + i;
797  Descendants[i].DescSubhaloNr = HALONR_MAX;
798  Descendants[i].MaxScore = 0;
799  }
800 
801  for(int i = 0; i < nimport; i++)
802  {
803  int index = recv_data[i].PrevSubhaloNr.get() - cumul_prevnsubhalos;
804 
805  if(index < 0 || index >= PrevNsubhalos)
806  Terminate(
807  "index=%d i=%d nimport=%d PrevNsubhalos=%d recv_data[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld "
808  "cumul_prevnsubhalos=%lld",
809  index, i, nimport, PrevNsubhalos, (long long)recv_data[i].PrevSubhaloNr.get(), PrevTotNsubhalos, cumul_prevnsubhalos);
810 
811  if(recv_data[i].DescScore > Descendants[index].MaxScore)
812  {
813  Descendants[index].MaxScore = recv_data[i].DescScore;
814  Descendants[index].DescSubhaloNr = recv_data[i].CurrSubhaloNr.get();
815  }
816  }
817 
818  Mem.myfree(recv_data);
819  Mem.myfree(send_data);
820 
821  Mem.myfree(tab_PrevNsubhalos);
822 
823  Mem.myfree(Recv_offset);
824  Mem.myfree(Recv_count);
825  Mem.myfree(Send_offset);
826  Mem.myfree(Send_count);
827 }
828 
829 void mergertree::mergertree_set_first_descendant_with_same_progenitor(void)
830 {
831  /* sort by progenitor to bring equal ones next to each other */
832  mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
833 
834  int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
835  int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
836  int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
837  int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
838 
839  int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
840  MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
841 
842  long long cumul_prevnsubhalos = 0;
843  for(int i = 0; i < ThisTask; i++)
844  cumul_prevnsubhalos += tab_PrevNsubhalos[i];
845 
846  struct pair_data
847  {
848  long long subhalonr;
849  long long firstdescnr;
850  };
851 
852  pair_data *send_data = NULL;
853  pair_data *recv_data = NULL;
854  int nexport = 0, nimport = 0;
855 
856  for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
857  {
858  for(int i = 0; i < NTask; i++)
859  Send_count[i] = 0;
860 
861  int task = 0;
862  long long first = 0;
863 
864  for(int i = 0; i < CurrNsubhalos; i++)
865  {
866  if(Progenitors[i].FirstDescFlag && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
867  {
868  while(task < NTask - 1 && Progenitors[i].ProgSubhaloNr >= first + tab_PrevNsubhalos[task])
869  {
870  first += tab_PrevNsubhalos[task];
871  task++;
872  }
873 
874  if(mode == 0)
875  Send_count[task]++;
876  else
877  {
878  int off = Send_offset[task] + Send_count[task]++;
879 
880  send_data[off].subhalonr = Progenitors[i].ProgSubhaloNr;
881  send_data[off].firstdescnr = Progenitors[i].SubhaloNr;
882  }
883  }
884  }
885 
886  if(mode == 0) // prepare offset tables
887  {
888  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
889 
890  Recv_offset[0] = 0;
891  Send_offset[0] = 0;
892 
893  for(int j = 0; j < NTask; j++)
894  {
895  nexport += Send_count[j];
896  nimport += Recv_count[j];
897 
898  if(j > 0)
899  {
900  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
901  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
902  }
903  }
904 
905  send_data = (pair_data *)Mem.mymalloc("pair_data", nexport * sizeof(pair_data));
906  recv_data = (pair_data *)Mem.mymalloc("pair_data", nimport * sizeof(pair_data));
907  }
908  }
909 
910  /* exchange data */
911  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
912  {
913  int recvTask = ThisTask ^ ngrp;
914 
915  if(recvTask < NTask)
916  {
917  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
918  MPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
919  &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
920  Communicator, MPI_STATUS_IGNORE);
921  }
922  }
923 
924  for(int i = 0; i < PrevNsubhalos; i++)
925  Descendants[i].FirstDescSubhaloNr = HALONR_MAX;
926 
927  for(int i = 0; i < nimport; i++)
928  {
929  int off = recv_data[i].subhalonr - cumul_prevnsubhalos;
930 
931  if(off < 0 || off >= PrevNsubhalos)
932  Terminate("off = %d PrevNsubhalos = %d", off, PrevNsubhalos);
933 
934  Descendants[off].FirstDescSubhaloNr = recv_data[i].firstdescnr;
935  }
936 
937  Mem.myfree(recv_data);
938  Mem.myfree(send_data);
939 
940  Mem.myfree(tab_PrevNsubhalos);
941 
942  Mem.myfree(Recv_offset);
943  Mem.myfree(Recv_count);
944  Mem.myfree(Send_offset);
945  Mem.myfree(Send_count);
946 
947  /* bring back into original order */
948  mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
949 }
950 
951 #endif
global_data_all_processes All
Definition: main.cc:40
#define SMALLNUM
Definition: constants.h:83
#define HALONR_MAX
Definition: idstorage.h:20
#define Terminate(...)
Definition: macros.h:19
#define TAG_DENS_A
Definition: mpi_utils.h:50
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44
expr pow(half base, half exp)
Definition: half.hpp:2803
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)