GADGET-4
subfind_history.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef SUBFIND
15 #if defined(MERGERTREE) && defined(SUBFIND_HBT)
16 
17 #include <gsl/gsl_math.h>
18 #include <mpi.h>
19 #include <algorithm>
20 #include <cmath>
21 #include <cstdio>
22 #include <cstdlib>
23 #include <cstring>
24 
25 #include "../data/allvars.h"
26 #include "../data/dtypes.h"
27 #include "../data/intposconvert.h"
28 #include "../data/mymalloc.h"
29 #include "../domain/domain.h"
30 #include "../fof/fof.h"
31 #include "../gravtree/gravtree.h"
32 #include "../logs/timer.h"
33 #include "../main/simulation.h"
34 #include "../mpi_utils/mpi_utils.h"
35 #include "../sort/cxxsort.h"
36 #include "../sort/parallel_sort.h"
37 #include "../sort/peano.h"
38 #include "../subfind/subfind.h"
39 #include "../system/system.h"
40 
41 template <typename partset>
42 void fof<partset>::subfind_hbt_single_group(domain<partset> *SubDomain, domain<partset> *SingleDomain, domain_options mode, int gr)
43 {
44  /* get total group length */
45  long long totgrouplen;
46  sumup_large_ints(1, &NumPartGroup, &totgrouplen, SubComm);
47 
48  /******************* determine subhalo candidates based on previous subhalo catalogue ***************/
49 
50  hbt_pcand_t *pcand = (hbt_pcand_t *)Mem.mymalloc_movable(&pcand, "pcand", (NumPartGroup + 1) * sizeof(hbt_pcand_t));
51 
52  for(int k = 0; k < NumPartGroup; k++)
53  {
54  /* provisionally assign a new subhalo number based on the previous group catalogue - this will be modified in the following */
55  Tp->PS[IndexList[k]].SubhaloNr = Tp->P[IndexList[k]].PrevSubhaloNr;
56 
57  pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
58  pcand[k].PrevSizeOfSubhalo = Tp->P[IndexList[k]].PrevSizeOfSubhalo;
59  pcand[k].index = -1; // just put here to signal that this is invalid at this stage (note that we'll do a parallel sort)
60  }
61 
62  /* sort according to subhalonr */
63  mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
64 
65  int *NumPartGroup_list = (int *)Mem.mymalloc_movable(&NumPartGroup_list, "NumPartGroup_list", SubNTask * sizeof(int));
66  MPI_Allgather(&NumPartGroup, sizeof(int), MPI_BYTE, NumPartGroup_list, sizeof(int), MPI_BYTE, SubComm);
67 
68  /* get the last element of each task */
69  hbt_pcand_t *elem_last = (hbt_pcand_t *)Mem.mymalloc_movable(&elem_last, "elem_last", SubNTask * sizeof(hbt_pcand_t));
70 
71  /* note: the 0th element is guaranteed to be allocated even on ranks with zero NumPartGroup */
72  MPI_Allgather(&pcand[NumPartGroup > 0 ? NumPartGroup - 1 : 0], sizeof(hbt_pcand_t), MPI_BYTE, elem_last, sizeof(hbt_pcand_t),
73  MPI_BYTE, SubComm);
74 
75  /* if a new section begins on the current processor, we register it on this processor as a candidate */
76  /* initialize the number of current candidates */
77  bool element_before_present = false;
78  hbt_pcand_t element_before{};
79 
80  for(int task = SubThisTask - 1; task >= 0; task--)
81  {
82  if(NumPartGroup_list[task] > 0)
83  {
84  element_before_present = true;
85  element_before = elem_last[task];
86  break;
87  }
88  }
89 
90  int marked = 0;
91  count_cand = 0;
92 
93  for(int i = 0; i < NumPartGroup; i++)
94  {
95  if(i == 0 && !element_before_present)
96  count_cand++;
97  else
98  {
99  MyHaloNrType prevnr;
100 
101  if(i == 0 && element_before_present)
102  prevnr = element_before.SubhaloNr;
103  else
104  prevnr = pcand[i - 1].SubhaloNr;
105 
106  if(pcand[i].SubhaloNr != prevnr)
107  count_cand++;
108  }
109  }
110 
111  /* allocate a list to store local subhalo candidates for this group */
112  hbt_subcand_t *loc_candidates =
113  (hbt_subcand_t *)Mem.mymalloc_movable(&loc_candidates, "loc_candidates", count_cand * sizeof(hbt_subcand_t));
114 
115  count_cand = 0;
116 
117  for(int i = 0; i < NumPartGroup; i++)
118  {
119  if(i == 0 && !element_before_present)
120  {
121  loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
122  count_cand++;
123  }
124  else
125  {
126  MyHaloNrType prevnr;
127 
128  if(i == 0 && element_before_present)
129  prevnr = element_before.SubhaloNr;
130  else
131  prevnr = pcand[i - 1].SubhaloNr;
132 
133  if(pcand[i].SubhaloNr != prevnr)
134  {
135  loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
136  count_cand++;
137  }
138  }
139  }
140 
141  /* establish total number of candidates */
142  long long totcand;
143  sumup_large_ints(1, &count_cand, &totcand, SubComm);
144 
145  int nsubhalos_old = Nsubhalos;
146 
147  if(Nsubhalos + totcand + 1 > MaxNsubhalos)
148  {
149  // warn("Nsubhalos=%d + totcand=%lld >= MaxNsubhalos=%d", Nsubhalos, totcand, MaxNsubhalos);
150 
151  MaxNsubhalos = 1.25 * (Nsubhalos + totcand + 1);
152 
153  Subhalo = (subhalo_properties *)Mem.myrealloc_movable(Subhalo, MaxNsubhalos * sizeof(subhalo_properties));
154  }
155 
156  /* assemble a list of the candidates on all tasks */
157  hbt_subcand_t *all_candidates =
158  (hbt_subcand_t *)Mem.mymalloc_movable(&all_candidates, "all_candidates", (totcand + 1) * sizeof(hbt_subcand_t));
159 
160  int *countlist = (int *)Mem.mymalloc_movable(&countlist, "countlist", SubNTask * sizeof(int));
161  int *offset = (int *)Mem.mymalloc_movable(&offset, "offset", SubNTask * sizeof(int));
162 
163  int count = count_cand * sizeof(hbt_subcand_t); /* length in bytes */
164  MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
165 
166  offset[0] = 0;
167  for(int i = 1; i < SubNTask; i++)
168  offset[i] = offset[i - 1] + countlist[i - 1];
169 
170  MPI_Allgatherv(loc_candidates, count, MPI_BYTE, all_candidates, countlist, offset, MPI_BYTE, SubComm);
171 
172  /* sort the candidates by subhalonr */
173  mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
174 
175  /* now determine the size of the candidates */
176  long long *size_list = (long long *)Mem.mymalloc_clear("size_list", totcand * sizeof(long long));
177  long long *summed_prevsize_list = (long long *)Mem.mymalloc_clear("summed_prevsize_list", totcand * sizeof(long long));
178 
179  int j = 0;
180  for(int k = 0; k < NumPartGroup; k++)
181  {
182  if(j >= totcand)
183  Terminate("can't be: k=%d j=%d NumPartGroup=%d totcand=%lld\n", k, j, NumPartGroup, totcand);
184 
185  while(j < totcand && pcand[k].SubhaloNr > all_candidates[j].SubhaloNr)
186  j++;
187 
188  if(pcand[k].SubhaloNr != all_candidates[j].SubhaloNr)
189  Terminate("can't be: k=%d NumPartGroup=%d pcand[k].SubhaloNr=%lld j=%d all_candidates[j].SubhaloNr=%lld\n", k,
190  NumPartGroup, (long long)pcand[k].SubhaloNr.get(), j, (long long)all_candidates[j].SubhaloNr.get());
191 
192  size_list[j]++;
193  summed_prevsize_list[j] += pcand[k].PrevSizeOfSubhalo.get();
194  }
195 
196  MPI_Allreduce(MPI_IN_PLACE, size_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
197  MPI_Allreduce(MPI_IN_PLACE, summed_prevsize_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
198 
199  for(int i = 0; i < totcand; i++)
200  {
201  all_candidates[i].len = size_list[i];
202  all_candidates[i].summedprevlen = summed_prevsize_list[i];
203  }
204 
205  Mem.myfree(summed_prevsize_list);
206  Mem.myfree(size_list);
207 
208  /* do a sanity test */
209  long long lensum = 0;
210  for(int i = 0; i < totcand; i++)
211  lensum += all_candidates[i].len;
212 
213  if(lensum != totgrouplen)
214  Terminate("lensum=%lld != totgrouplen=%lld\n", lensum, totgrouplen);
215 
216  /*******************************************/
217 
218  /* find the group of previously unbound ones, and if this candidate exists, eliminate it */
219  for(int i = 0; i < totcand; i++)
220  if(all_candidates[i].SubhaloNr.get() == HALONR_MAX)
221  {
222  all_candidates[i] = all_candidates[totcand - 1];
223  totcand--;
224  break;
225  }
226 
227  /* let's now eliminate small groups and flag the corresponding particles as unbound */
228  {
229  /* sort the candidates according to previous subhalonr */
230  mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
231 
232  /* reestablish a locally sorted pcand */
233  for(int k = 0; k < NumPartGroup; k++)
234  {
235  pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
236  pcand[k].index = IndexList[k];
237  }
238  mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
239 
240  long long sum = 0;
241 
242  int p = 0;
243 
244  for(int i = 0; i < totcand; i++)
245  if(all_candidates[i].len < All.DesLinkNgb)
246  {
247  while(p < NumPartGroup && pcand[p].SubhaloNr < all_candidates[i].SubhaloNr)
248  p++;
249 
250  while(p < NumPartGroup && pcand[p].SubhaloNr == all_candidates[i].SubhaloNr)
251  {
252  if(Tp->PS[pcand[p].index].SubhaloNr != all_candidates[i].SubhaloNr)
253  Terminate(
254  "we have an issue! p=%d NumPartGroup=%d pcand[p].index=%d pcand[p].SubhaloNr=%lld "
255  "all_candidates[i].SubhaloNr=%lld Tp->P[pcand[p].index].SubhaloNr=%lld",
256  p, NumPartGroup, pcand[p].index, (long long)pcand[p].SubhaloNr.get(), (long long)all_candidates[i].SubhaloNr.get(),
257  (long long)Tp->PS[pcand[p].index].SubhaloNr.get());
258 
259  pcand[p].SubhaloNr.set(HALONR_MAX);
260  Tp->PS[pcand[p].index].SubhaloNr.set(HALONR_MAX);
261  p++;
262  sum++;
263  }
264  }
265 
266  MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_LONG_LONG, MPI_SUM, SubComm);
267 
268  long long sum2 = 0;
269 
270  for(int i = 0; i < totcand; i++)
271  if(all_candidates[i].len < All.DesLinkNgb)
272  {
273  sum2 += all_candidates[i].len;
274  all_candidates[i] = all_candidates[totcand - 1];
275  totcand--;
276  i--;
277  }
278 
279  if(sum != sum2)
280  Terminate("consistency check failed sum = %lld sum2 = %lld\n", sum, sum2);
281 
282  /* sort according to subhalonr */
283  mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
284  }
285 
286  /* if a largest one exists, eliminate it, because this is the one we will treat as background halo */
287  if(totcand > 0)
288  {
289  /* sort the candidates by size */
290  mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_len);
291 
292  /* sort the candidates by summed previous length, as this is arguably a more robust decision of which one should be the largest
293  */
294  mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_summedprevlen);
295 
296  totcand--;
297  for(int k = 0; k < NumPartGroup; k++)
298  if(Tp->PS[IndexList[k]].SubhaloNr == all_candidates[totcand].SubhaloNr)
299  Tp->PS[IndexList[k]].SubhaloNr.set(HALONR_MAX);
300  }
301 
302  /* sort the candidates according to previous subhalonr */
303  mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
304 
305  subfind_collective_printf("SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
306 
307  /*******************************************/
308  /* Let's now see which candidates can be treated with serial CPUs, which is more efficient than doing them all collectively.
309  * We identify them with those candidates that are sufficiently small, which should be most of them by number.
310  */
311 
312  int n_small_cand = 0;
313  int max_length = 0;
314 
315  int task = 0;
316  int task_scatter = 0;
317  for(int i = 0; i < totcand; i++)
318  {
319  if(all_candidates[i].len < 0.20 * Tp->TotNumPart / NTask) // small enough
320  {
321  all_candidates[i].DoIt = true;
322  all_candidates[i].TargetTask = task++;
323  all_candidates[i].TargetIndex = n_small_cand;
324 
325  if(task >= SubNTask)
326  task = 0;
327 
328  if(all_candidates[i].len > max_length)
329  max_length = all_candidates[i].len;
330 
331  n_small_cand++;
332  }
333  else
334  {
335  all_candidates[i].DoIt = false;
336  all_candidates[i].TargetTask = task_scatter++;
337  all_candidates[i].TargetIndex = INT_MAX;
338  if(task_scatter >= SubNTask)
339  task_scatter = 0;
340  }
341  }
342 
343  subfind_collective_printf(
344  "SUBFIND: root-task=%d: number of subhalo candidates small enough to be done with one cpu: %d. (Largest size %d)\n", ThisTask,
345  n_small_cand, max_length);
346 
347  /* now get target information to particles */
348  for(int k = 0; k < NumPartGroup; k++)
349  {
350  pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
351  pcand[k].index = IndexList[k];
352  }
353 
354  // note: local serial sort sufficient here
355  mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
356 
357  if(SubNTask > 1)
358  {
359  /* we only need to redistribute the particles if we are processing groups collectively */
360  /* Note: setting of TargetIndex make sure that the particles are grouped together on the target task */
361 
362  /* set default values for current particle distribution */
363  for(int i = 0; i < Tp->NumPart; i++)
364  {
365  Tp->PS[i].u.s.origintask = SubThisTask;
366  Tp->PS[i].u.s.originindex = i;
367 
368  Tp->PS[i].TargetTask = SubThisTask;
369  Tp->PS[i].TargetIndex = INT_MAX;
370  }
371 
372  int i = 0;
373  for(int k = 0; k < totcand; k++)
374  if(all_candidates[k].DoIt)
375  {
376  while(i < NumPartGroup && pcand[i].SubhaloNr < all_candidates[k].SubhaloNr)
377  i++;
378 
379  while(i < NumPartGroup && pcand[i].SubhaloNr == all_candidates[k].SubhaloNr)
380  {
381  Tp->PS[pcand[i].index].TargetTask = all_candidates[k].TargetTask;
382  Tp->PS[pcand[i].index].TargetIndex = all_candidates[k].TargetIndex;
383  i++;
384  }
385  }
386 
387  /* assemble the particles on individual processors (note: IndexList[] becomes temporarily meaningless) */
388  subfind_distribute_particles(SubComm);
389  }
390 
391  /* now do the serial unbinding */
392  /*----------------------------------------------------*/
393 
394  {
395  unbind_list = (int *)Mem.mymalloc("unbind_list", Tp->NumPart * sizeof(int));
396 
397  int i = 0; // particle index
398 
399  for(int k = 0; k < totcand; k++)
400  if(all_candidates[k].DoIt)
401  if(all_candidates[k].TargetTask == SubThisTask)
402  {
403  int len = 0;
404 
405  if(SubNTask > 1)
406  {
407  while(i < Tp->NumPart && Tp->PS[i].SubhaloNr < all_candidates[k].SubhaloNr)
408  i++;
409 
410  while(i < Tp->NumPart && Tp->PS[i].SubhaloNr == all_candidates[k].SubhaloNr && Tp->PS[i].GroupNr.get() == GroupNr)
411  {
412  unbind_list[len] = i;
413  len++;
414  i++;
415  }
416  }
417  else
418  {
419  while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr < all_candidates[k].SubhaloNr)
420  i++;
421 
422  while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr == all_candidates[k].SubhaloNr &&
423  Tp->PS[pcand[i].index].GroupNr.get() == GroupNr)
424  {
425  unbind_list[len] = pcand[i].index;
426  len++;
427  i++;
428  }
429  }
430 
431  if(len != all_candidates[k].len)
432  Terminate("this is unexpected: k=%d len=%lld != all_candidates[k].len=%lld) \n", k, (long long)len,
433  (long long)all_candidates[k].len);
434 
435  /* default is that all particles end up unbound */
436  for(int n = 0; n < len; n++)
437  Tp->PS[unbind_list[n]].SubhaloNr.set(HALONR_MAX);
438 
439  /* call the serial unbind function */
440  len = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, len);
441 
442  if(len >= All.DesLinkNgb)
443  {
444  /* set as provisional group number the previous group number */
445  for(int n = 0; n < len; n++)
446  {
447  Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
448  Tp->PS[unbind_list[n]].SizeOfSubhalo.set(len);
449  }
450 
451  if(Nsubhalos >= MaxNsubhalos)
452  Terminate("no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
453  nsubhalos_old, totcand);
454 
455  /* ok, we found a substructure */
456  marked += subfind_determine_sub_halo_properties(unbind_list, len, &Subhalo[Nsubhalos], SingleDomain->Communicator);
457 
458  Subhalo[Nsubhalos].GroupNr = GroupNr;
459  Subhalo[Nsubhalos].SubParentRank = 0;
460  Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
461 
462  Nsubhalos++;
463  }
464  }
465 
466  Mem.myfree(unbind_list);
467  }
468 
469  if(SubNTask > 1)
470  {
471  /* bring them back to their original processor */
472  for(int i = 0; i < Tp->NumPart; i++)
473  {
474  Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
475  Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
476  }
477 
478  subfind_distribute_particles(SubComm);
479  }
480 
481  double t0 = Logs.second();
482 
483  /**************************************************/
484  /**************************************************/
485  /******* now do remaining ones collectively *****/
486 
487  /* first, add a fiducial candidate which will be our background halo, swallowing all unbound particles */
488 
489  all_candidates[totcand].DoIt = false; /* marks collective ones */
490  all_candidates[totcand].SubhaloNr.set(HALONR_MAX);
491  totcand++;
492 
493  for(int k = 0; k < totcand; k++)
494  if(all_candidates[k].DoIt == false)
495  {
496  domain<partset> SubUnbindDomain{SubComm, Tp};
497 
498  int *unbind_list;
499 
500  if(mode == COLL_SUBFIND)
501  {
502  for(int i = 0; i < Tp->NumPart; i++)
503  {
504  Tp->PS[i].u.s.origintask = SubThisTask;
505  Tp->PS[i].u.s.originindex = i;
506  Tp->PS[i].DomainFlag = 0;
507  }
508 
509  /* mark the one to be unbound in PS[] */
510  for(int i = 0; i < NumPartGroup; i++)
511  if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
512  Tp->PS[IndexList[i]].DomainFlag = 1;
513 
514  SubUnbindDomain.domain_decomposition(mode);
515  subfind_distribute_particles(SubComm);
516 
517  LocalLen = 0;
518  for(int i = 0; i < Tp->NumPart; i++)
519  if(Tp->PS[i].DomainFlag)
520  LocalLen++;
521 
522  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", LocalLen * sizeof(int));
523 
524  /* refill unbind_list */
525  LocalLen = 0;
526  for(int i = 0; i < Tp->NumPart; i++)
527  if(Tp->PS[i].DomainFlag)
528  unbind_list[LocalLen++] = i;
529  }
530 
531  else
532  {
533  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
534 
535  LocalLen = 0;
536  for(int i = 0; i < NumPartGroup; i++)
537  if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
538  unbind_list[LocalLen++] = IndexList[i];
539  }
540 
541  /* default is that all particles end up unbound */
542  for(int n = 0; n < LocalLen; n++)
543  Tp->PS[unbind_list[n]].SubhaloNr.set(HALONR_MAX);
544 
545  if(mode == COLL_SUBFIND)
546  LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
547  else
548  LocalLen = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, LocalLen);
549 
550  int FullLen;
551  MPI_Allreduce(&LocalLen, &FullLen, 1, MPI_INT, MPI_SUM, SubComm);
552 
553  if(FullLen >= All.DesLinkNgb)
554  {
555  if(all_candidates[k].SubhaloNr.get() == HALONR_MAX)
556  all_candidates[k].SubhaloNr.set(HALONR_MAX - 1);
557 
558  /* set as provisional group number the previous group number */
559  for(int n = 0; n < LocalLen; n++)
560  {
561  Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
562 #if defined(MERGERTREE)
563  Tp->PS[unbind_list[n]].SizeOfSubhalo.set(FullLen);
564 #endif
565  }
566 
567  if(Nsubhalos >= MaxNsubhalos)
568  Terminate("no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
569  nsubhalos_old, totcand);
570 
571  marked += subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
572 
573  if(SubThisTask == 0)
574  {
575  Subhalo[Nsubhalos].GroupNr = GroupNr;
576  Subhalo[Nsubhalos].SubParentRank = 0;
577  Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
578 
579  Nsubhalos++;
580  }
581  }
582 
583  Mem.myfree(unbind_list);
584 
585  if(mode == COLL_SUBFIND)
586  {
587  SubUnbindDomain.domain_free();
588 
589  for(int i = 0; i < Tp->NumPart; i++)
590  {
591  Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
592  Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
593  }
594 
595  double t0 = Logs.second();
596  subfind_distribute_particles(SubComm); /* bring them back to their original processor */
597  double t1 = Logs.second();
598 
599  subfind_collective_printf("SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
600  Logs.timediff(t0, t1));
601 
602  /* Since we reestablished the original order, we can use IndexList[] again */
603  }
604  }
605 
606  double t1 = Logs.second();
607  subfind_collective_printf("SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
608  Logs.timediff(t0, t1));
609 
610  /* get the total substructure count */
611  int countloc = Nsubhalos - nsubhalos_old;
612  int countall;
613 
614  MPI_Allreduce(&countloc, &countall, 1, MPI_INT, MPI_SUM, SubComm);
615 
616  MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
617 
618  /* Now let's save some properties of the substructures */
619  if(SubThisTask == 0)
620  {
621  Group[gr].Nsubs = countall;
622 
623 #if defined(SUBFIND_ORPHAN_TREATMENT)
624  Group[gr].LenPrevMostBnd += marked;
625 #endif
626  }
627 
628  /* also need to set the field SubRankInGr in all found Subhalos */
629 
630  hbt_subhalo_t *subhalo_list = (hbt_subhalo_t *)Mem.mymalloc("subhalo_list", countloc * sizeof(hbt_subhalo_t));
631 
632  for(int n = 0; n < countloc; n++)
633  {
634  subhalo_list[n].Len = Subhalo[n + nsubhalos_old].Len;
635  subhalo_list[n].ThisTask = SubThisTask;
636  subhalo_list[n].ThisIndex = n;
637  subhalo_list[n].SubhaloNr = Subhalo[n + nsubhalos_old].SubhaloNr;
638  }
639 
640  mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_len, SubComm);
641 
642  int *countloc_list = (int *)Mem.mymalloc("countloc_list", SubNTask * sizeof(int));
643  MPI_Allgather(&countloc, 1, MPI_INT, countloc_list, 1, MPI_INT, SubComm);
644  int npreviousranks = 0;
645  for(int i = 0; i < SubThisTask; i++)
646  npreviousranks += countloc_list[i];
647 
648  for(int n = 0; n < countloc; n++)
649  subhalo_list[n].SubRankInGr = n + npreviousranks;
650 
651  mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_thistask_thisindex, SubComm);
652 
653  /* rank and index of main subhalo */
654  int taskmain = 0;
655  int indexmain = 0;
656 
657  for(int n = 0; n < countloc; n++)
658  {
659  Subhalo[n + nsubhalos_old].SubRankInGr = subhalo_list[n].SubRankInGr;
660 
661  if(subhalo_list[n].SubRankInGr == 0)
662  {
663  /* here we have the main subhalo */
664  taskmain = SubThisTask;
665  indexmain = n + nsubhalos_old;
666  }
667  }
668 
669  /* now we need to fill in SubRankInGr to T->PS[] so that the particles can be sorted in subhalo order
670  * we use subhalo_list[] as translation table
671  */
672 
673  for(int k = 0; k < NumPartGroup; k++)
674  {
675  pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
676  pcand[k].index = IndexList[k];
677  }
678 
679  /* sort locally */
680  mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
681 
682  int sizelocsubhalolist = countloc * sizeof(hbt_subhalo_t); /* length in bytes */
683  MPI_Allgather(&sizelocsubhalolist, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
684 
685  offset[0] = 0;
686  for(int i = 1; i < SubNTask; i++)
687  offset[i] = offset[i - 1] + countlist[i - 1];
688 
689  hbt_subhalo_t *all_subhalo_list = (hbt_subhalo_t *)Mem.mymalloc("all_subhalo_list", countall * sizeof(hbt_subhalo_t));
690 
691  MPI_Allgatherv(subhalo_list, sizelocsubhalolist, MPI_BYTE, all_subhalo_list, countlist, offset, MPI_BYTE, SubComm);
692 
693  /* sort locally */
694  mycxxsort(all_subhalo_list, all_subhalo_list + countall, subfind_hbt_compare_subhalolist_prevsubhalonr);
695 
696  int n = 0;
697 
698  for(int k = 0; k < NumPartGroup; k++)
699  {
700  if(pcand[k].SubhaloNr.get() == HALONR_MAX)
701  Tp->PS[pcand[k].index].SubRankInGr = INT_MAX;
702  else
703  {
704  while(n < countall && all_subhalo_list[n].SubhaloNr < (long long)pcand[k].SubhaloNr.get())
705  n++;
706 
707  if(n >= countall)
708  Terminate("unexpected: n=%d countall=%d", n, countall);
709 
710  if(all_subhalo_list[n].SubhaloNr != (long long)pcand[k].SubhaloNr.get())
711  Terminate("also unexpected: k=%d NumPartGroup=%d all_subhalo_list[n].SubhaloNr=%lld != pcand[k].SubhaloNr=%lld\n", k,
712  NumPartGroup, (long long)all_subhalo_list[n].SubhaloNr, (long long)pcand[k].SubhaloNr.get());
713 
714  Tp->PS[pcand[k].index].SubRankInGr = all_subhalo_list[n].SubRankInGr;
715  }
716  }
717 
718  if(countall > 0)
719  {
720  MPI_Allreduce(MPI_IN_PLACE, &taskmain, 1, MPI_INT, MPI_MAX, SubComm);
721  MPI_Allreduce(MPI_IN_PLACE, &indexmain, 1, MPI_INT, MPI_MAX, SubComm);
722 
723  subhalo_properties MainSubhalo;
724 
725  if(taskmain == SubThisTask)
726  {
727  MainSubhalo = Subhalo[indexmain];
728 
729  if(taskmain != 0)
730  MPI_Send(&MainSubhalo, sizeof(subhalo_properties), MPI_BYTE, 0, TAG_N, SubComm);
731  }
732 
733  if(SubThisTask == 0)
734  {
735  if(taskmain != 0)
736  MPI_Recv(&MainSubhalo, sizeof(subhalo_properties), MPI_BYTE, taskmain, TAG_N, SubComm, MPI_STATUS_IGNORE);
737 
738  for(int j = 0; j < 3; j++)
739  {
740  Group[gr].Pos[j] = MainSubhalo.Pos[j];
741  Group[gr].IntPos[j] = MainSubhalo.IntPos[j];
742  }
743  }
744  }
745 
746  Mem.myfree(all_subhalo_list);
747  Mem.myfree(countloc_list);
748  Mem.myfree(subhalo_list);
749  Mem.myfree(offset);
750  Mem.myfree(countlist);
751  Mem.myfree(all_candidates);
752  Mem.myfree(loc_candidates);
753  Mem.myfree(elem_last);
754  Mem.myfree(NumPartGroup_list);
755  Mem.myfree(pcand);
756 
757  subfind_collective_printf("SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
758  totgrouplen);
759 }
760 
761 /* now make sure that the following classes are really instantiated, otherwise we may get a linking problem */
762 #include "../data/simparticles.h"
763 template class fof<simparticles>;
764 
765 #if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
766 #include "../data/lcparticles.h"
767 template class fof<lcparticles>;
768 #endif
769 
770 #endif
771 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
void domain_decomposition(domain_options mode)
Definition: domain.cc:51
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
int ThisTask
Definition: setcomm.h:33
MPI_Comm Communicator
Definition: setcomm.h:31
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
#define HALONR_MAX
Definition: idstorage.h:20
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:19
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
#define TAG_N
Definition: mpi_utils.h:25
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)