GADGET-4
subfind_excursionset.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef SUBFIND
15 #ifndef SUBFIND_HBT
16 
17 #include <gsl/gsl_math.h>
18 #include <mpi.h>
19 #include <algorithm>
20 #include <cmath>
21 #include <cstdio>
22 #include <cstdlib>
23 #include <cstring>
24 
25 #include "../data/allvars.h"
26 #include "../data/dtypes.h"
27 #include "../data/intposconvert.h"
28 #include "../data/mymalloc.h"
29 #include "../domain/domain.h"
30 #include "../fof/fof.h"
31 #include "../gravtree/gravtree.h"
32 #include "../logs/timer.h"
33 #include "../main/simulation.h"
34 #include "../mpi_utils/mpi_utils.h"
35 #include "../sort/cxxsort.h"
36 #include "../sort/parallel_sort.h"
37 #include "../sort/peano.h"
38 #include "../subfind/subfind.h"
39 #include "../system/system.h"
40 
41 #define HIGHBIT (1 << 30)
42 
43 template <typename partset>
44 void fof<partset>::subfind_process_single_group(domain<partset> *SubDomain, domain<partset> *SingleDomain, domain_options mode, int gr)
45 {
46  /* set up an inverse look-up of the position of local particles in the index list, for later use
47  */
48  for(int i = 0; i < NumPartGroup; i++)
49  Tp->PS[IndexList[i]].InvIndex = i;
50 
51  /* allocated storage for an auxiliary array needed for sorting */
52  sd = (sort_density_data *)Mem.mymalloc_movable(&sd, "sd", NumPartGroup * sizeof(sort_density_data));
53 
54  /* construct a tree for all particles in the halo */
55 
56  FoFGravTree.treeallocate(Tp->NumPart, Tp, SubDomain);
57  FoFGravTree.treebuild(NumPartGroup, IndexList);
58 
59  /* determine the radius that encloses a certain number of link particles */
60  subfind_find_linkngb(SubDomain, NumPartGroup, IndexList);
61 
62  /* now determine the indices of the nearest two denser neighbours within this link region */
63 
64  /* first, allocate some auxialiary arrays for storing this info */
65 
66  Tp->R2Loc = (typename partset::nearest_r2_data *)Mem.mymalloc_movable(&Tp->R2Loc, "R2Loc",
67  Tp->NumPart * sizeof(typename partset::nearest_r2_data));
68 
69  /* find the nearest two denser particles around each point in the group */
70  subfind_find_nearesttwo(SubDomain, NumPartGroup, IndexList);
71 
72  /* create an array that we can conveniently sort according to density in group subsets */
73  for(int i = 0; i < NumPartGroup; i++)
74  {
75  if(IndexList[i] >= Tp->NumPart || IndexList[i] < 0)
76  Terminate("Really?");
77 
78  sd[i].density = Tp->PS[IndexList[i]].u.s.u.DM_Density;
79  sd[i].ngbcount = Tp->PS[IndexList[i]].nearest.count;
80  sd[i].index = {SubThisTask, i};
81  sd[i].ngb_index1 = Tp->PS[IndexList[i]].nearest.index[0];
82  sd[i].ngb_index2 = Tp->PS[IndexList[i]].nearest.index[1];
83 #ifdef MERGERTREE
84  sd[i].PrevSizeOfSubhalo = Tp->P[IndexList[i]].PrevSizeOfSubhalo;
85 #else
86  sd[i].PrevSizeOfSubhalo.set(0);
87 #endif
88  }
89  Mem.myfree(Tp->R2Loc);
90 
91  /* sort sd according to densities (in parallel if needed) */
92  mycxxsort_parallel(sd, sd + NumPartGroup, subfind_compare_densities, SubComm);
93 
94  subfind_collective_printf("SUBFIND: root-task=%d: parallel sort of 'sd' done.\n", ThisTask);
95 
96  /* can now release the tree */
97  FoFGravTree.treefree();
98 
99  /* allocate and initialize distributed link list for storing subhalo connectivity */
100  SFHead = (location *)Mem.mymalloc_movable(&SFHead, "SFHead", NumPartGroup * sizeof(location));
101  SFNext = (location *)Mem.mymalloc_movable(&SFNext, "SFNext", NumPartGroup * sizeof(location));
102  SFTail = (location *)Mem.mymalloc_movable(&SFTail, "SFTail", NumPartGroup * sizeof(location));
103  SFLen = (MyLenType *)Mem.mymalloc_movable(&SFLen, "SFLen", NumPartGroup * sizeof(MyLenType));
104  SFPrevLen = (double *)Mem.mymalloc_movable(&SFPrevLen, "SFPrevLen", NumPartGroup * sizeof(double));
105 
106  for(int i = 0; i < NumPartGroup; i++)
107  {
108  SFHead[i] = {-1, -1};
109  SFNext[i] = {-1, -1};
110  SFTail[i] = {-1, -1};
111  SFLen[i] = 0;
112  SFPrevLen[i] = 0;
113  }
114 
115  /* allocate a list to store subhalo candidates */
116  max_coll_candidates = std::max<int>((NumPartGroup / 50), 200);
117  coll_candidates =
118  (coll_cand_dat *)Mem.mymalloc_movable(&coll_candidates, "coll_candidates", max_coll_candidates * sizeof(coll_cand_dat));
119 
120  /* initialize the number of current candidates */
121  count_cand = 0;
122 
123  /* get total group length */
124  long long totgrouplen;
125  sumup_large_ints(1, &NumPartGroup, &totgrouplen, SubComm);
126 
127  /* determine subhalo candidates */
128  subfind_col_find_coll_candidates(totgrouplen);
129 
130  /* establish total number of candidates */
131  long long totcand;
132  sumup_large_ints(1, &count_cand, &totcand, SubComm);
133 
134  subfind_collective_printf("SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
135 
136  for(int i = 0; i < NumPartGroup; i++)
137  SFTail[i] = {-1, -1};
138 
139  /* default is to be not nested */
140  for(int i = 0; i < count_cand; i++)
141  coll_candidates[i].parent = 0;
142 
143  int count_leaves = 0;
144  long long nremaining = totcand;
145 
146  do
147  {
148  /* Let's see which coll_candidates can be unbound independent from each other.
149  * We identify them with those candidates that have no embedded other candidate, which are most of them by number.
150  */
151  double t0 = Logs.second();
152  coll_cand_dat *tmp_coll_candidates = 0;
153  if(SubThisTask == 0)
154  tmp_coll_candidates = (coll_cand_dat *)Mem.mymalloc("tmp_coll_candidates", totcand * sizeof(coll_cand_dat));
155 
156  int *countlist = (int *)Mem.mymalloc("countlist", SubNTask * sizeof(int));
157  int *offset = (int *)Mem.mymalloc("offset", SubNTask * sizeof(int));
158 
159  int count = count_cand * sizeof(coll_cand_dat);
160  MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
161 
162  offset[0] = 0;
163  for(int i = 1; i < SubNTask; i++)
164  offset[i] = offset[i - 1] + countlist[i - 1];
165 
166  /* assemble a list of the candidates on subtask 0 */
167  MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
168 
169  if(SubThisTask == 0)
170  {
171  for(int k = 0; k < totcand; k++)
172  {
173  tmp_coll_candidates[k].nsub = k;
174  tmp_coll_candidates[k].subnr = k;
175  }
176 
177  mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
178  for(int k = 0; k < totcand; k++)
179  {
180  if(tmp_coll_candidates[k].parent >= 0)
181  {
182  tmp_coll_candidates[k].parent = 0;
183 
184  for(int j = k + 1; j < totcand; j++)
185  {
186  if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
187  break;
188 
189  if(tmp_coll_candidates[j].parent < 0) /* ignore these */
190  continue;
191 
192  if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >=
193  tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
194  {
195  tmp_coll_candidates[k].parent++; /* we here count the number of subhalos that are enclosed */
196  }
197  else
198  {
199  Terminate("k=%d|%lld has rank=%d and len=%d. j=%d has rank=%d and len=%d\n", k, totcand,
200  (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, j,
201  (int)tmp_coll_candidates[j].rank, (int)tmp_coll_candidates[j].len);
202  }
203  }
204  }
205  }
206 
207  mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
208  }
209 
210  /* send the stuff back */
211  MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
212 
213  Mem.myfree(offset);
214  Mem.myfree(countlist);
215 
216  if(SubThisTask == 0)
217  Mem.myfree(tmp_coll_candidates);
218 
219  count_leaves = 0;
220  int max_length = 0;
221  for(int i = 0; i < count_cand; i++)
222  if(coll_candidates[i].parent == 0) /* if it's not a nested one, candidate is eligible for the independent/parallel unbinding */
223  {
224  /* if it seems large (heuristic criterion), let's rather do it collectively */
225  if(coll_candidates[i].len > 0.20 * Tp->TotNumPart / NTask)
226  coll_candidates[i].parent++; /* this trick will ensure that it is not considered in this round */
227  else
228  {
229  if(coll_candidates[i].len > max_length)
230  max_length = coll_candidates[i].len;
231 
232  count_leaves++;
233  }
234  }
235 
236  /* get total count of these eligible subhalos, and their maximum length */
237  MPI_Allreduce(MPI_IN_PLACE, &count_leaves, 1, MPI_INT, MPI_SUM, SubComm);
238  MPI_Allreduce(MPI_IN_PLACE, &max_length, 1, MPI_INT, MPI_MAX, SubComm);
239 
240  double t1 = Logs.second();
241 
242  subfind_collective_printf(
243  "SUBFIND: root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, finding took "
244  "%g sec)\n",
245  ThisTask, count_leaves, max_length, Logs.timediff(t0, t1));
246 
247  /* if there are none left, we break and do the reset collectively */
248  if(count_leaves <= 0)
249  {
250  subfind_collective_printf("SUBFIND: root-task=%d: too few, let's do the rest of %d collectively\n", ThisTask, nremaining);
251  break;
252  }
253 
254  /* seems large, let's rather do it collectively */
255  if(max_length > 0.5 * Tp->TotNumPart / NTask)
256  {
257  subfind_collective_printf("SUBFIND: root-task=%d: too big coll_candidates, I do the rest collectively\n", ThisTask);
258  break;
259  }
260 
261  nremaining -= count_leaves;
262 
263  /* set default values */
264  for(int i = 0; i < Tp->NumPart; i++)
265  {
266  Tp->PS[i].u.s.origintask = SubThisTask;
267  Tp->PS[i].u.s.originindex = i;
268 
269  Tp->PS[i].TargetTask = SubThisTask;
270  Tp->PS[i].submark = HIGHBIT;
271  }
272 
273  for(int i = 0; i < NumPartGroup; i++)
274  {
275  if(SFTail[i].index >= 0) /* this means this particle is already bound to a substructure */
276  Tp->PS[IndexList[i]].u.s.origintask |= HIGHBIT;
277  }
278 
279  /* we now mark the particles that are in subhalo candidates that can be processed independently in parallel */
280  int nsubs = 0;
281  for(int master = 0; master < SubNTask; master++)
282  {
283  int ncand = count_cand;
284  MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
285 
286  for(int k = 0; k < ncand; k++)
287  {
288  MyLenType len;
289  int parent;
290  if(SubThisTask == master)
291  {
292  len = coll_candidates[k].len;
293  parent = coll_candidates[k].parent; /* this is here actually the daughter count */
294  }
295 
296  MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
297  MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
298  MPI_Barrier(SubComm);
299 
300  if(parent == 0)
301  {
302  if(SubThisTask != master)
303  subfind_poll_for_requests();
304  else
305  {
306  location p = coll_candidates[k].head;
307  for(MyLenType i = 0; i < coll_candidates[k].len; i++)
308  {
309  subfind_distlinklist_mark_particle(p, master, nsubs);
310 
311  if(p.index < 0)
312  Terminate("Bummer\n");
313 
314  p = subfind_distlinklist_get_next(p);
315  }
316 
317  /* now tell the others to stop polling */
318  for(int i = 0; i < SubNTask; i++)
319  if(i != SubThisTask)
320  MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
321  }
322 
323  MPI_Barrier(SubComm);
324  }
325 
326  nsubs++;
327  }
328  }
329 
330  if(mode == COLL_SUBFIND)
331  {
332  /* this will make sure that the particles are grouped by submark on the target task */
333  for(int i = 0; i < Tp->NumPart; i++)
334  Tp->PS[i].TargetIndex = Tp->PS[i].submark;
335 
336  /* assemble the particles on individual processors (note: IndexList[] becomes temporarily meaningless) */
337  subfind_distribute_particles(SubComm);
338 
339  PPS = (PPS_data *)Mem.mymalloc("PPS", Tp->NumPart * sizeof(PPS_data));
340 
341  for(int i = 0; i < Tp->NumPart; i++)
342  PPS[i].index = i;
343  }
344  else
345  {
346  PPS = (PPS_data *)Mem.mymalloc("PPS", Tp->NumPart * sizeof(PPS_data));
347 
348  for(int i = 0; i < Tp->NumPart; i++)
349  {
350  PPS[i].submark = Tp->PS[i].submark;
351  PPS[i].index = i;
352  }
353 
354  mycxxsort(PPS, PPS + Tp->NumPart, subfind_compare_PPS);
355  }
356 
357  MPI_Barrier(SubComm);
358  double ta = Logs.second();
359 
360  subfind_unbind_independent_ones(SingleDomain, count_cand);
361 
362  MPI_Barrier(SubComm);
363  double tb = Logs.second();
364 
365  Mem.myfree(PPS);
366 
367  subfind_collective_printf("SUBFIND: root-task=%d: unbinding of independent ones took %g sec\n", ThisTask, Logs.timediff(ta, tb));
368 
369  if(mode == COLL_SUBFIND)
370  {
371  for(int i = 0; i < Tp->NumPart; i++)
372  {
373  Tp->PS[i].u.s.origintask &= (HIGHBIT - 1); /* clear high bit if set */
374  Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
375  Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
376  }
377 
378  t0 = Logs.second();
379  subfind_distribute_particles(SubComm); /* bring them back to their original processor */
380  t1 = Logs.second();
381 
382  subfind_collective_printf("SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
383  Logs.timediff(t0, t1));
384 
385  /* Since we reestablihed the original order, we can use IndexList[] again */
386  }
387 
388  /* now mark the bound particles */
389  for(int i = 0; i < NumPartGroup; i++)
390  if(Tp->PS[IndexList[i]].submark >= 0 && Tp->PS[IndexList[i]].submark < nsubs)
391  SFTail[i].index = Tp->PS[IndexList[i]].submark; /* we use this to flag bound parts of substructures */
392 
393  for(int i = 0; i < count_cand; i++)
394  if(coll_candidates[i].parent == 0)
395  coll_candidates[i].parent = -1;
396  }
397  while(count_leaves > 0);
398 
399  /*
400  * Now we do the unbinding of the subhalo candidates that contain other subhalo candidates.
401  * This will be done with several CPUs if needed.
402  */
403 
404  double t0 = Logs.second();
405 
406  for(int master = 0, nr = 0; master < SubNTask; master++)
407  {
408  int ncand = count_cand;
409  MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
410 
411  for(int k = 0; k < ncand; k++)
412  {
413  MyLenType len;
414  int parent, nsubs;
415  if(SubThisTask == master)
416  {
417  len = coll_candidates[k].len;
418  nsubs = coll_candidates[k].nsub;
419  parent = coll_candidates[k].parent; /* this is here actually the daughter count */
420  }
421 
422  MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
423  MPI_Barrier(SubComm);
424 
425  if(parent >= 0)
426  {
427  MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
428  MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
429 
430  subfind_collective_printf("SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr,
431  nremaining, (int)len);
432 
433  nr++;
434 
435  LocalLen = 0;
436 
437  double tt0 = Logs.second();
438 
439  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
440 
441  if(SubThisTask != master)
442  subfind_poll_for_requests();
443  else
444  {
445  location p = coll_candidates[k].head;
446  for(int i = 0; i < coll_candidates[k].len; i++)
447  {
448  if(p.index < 0)
449  Terminate("Bummer i=%d \n", i);
450 
451  subfind_distlinklist_add_particle(p);
452 
453  p = subfind_distlinklist_get_next(p);
454  }
455 
456  /* now tell the others to stop polling */
457  for(int i = 0; i < SubNTask; i++)
458  if(i != SubThisTask)
459  MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
460  }
461 
462  if(LocalLen > NumPartGroup)
463  Terminate("LocalLen=%d > NumPartGroup=%d", LocalLen, NumPartGroup);
464 
465  /* rewrite list of group indices to particle indices */
466  for(int i = 0; i < LocalLen; i++)
467  {
468  unbind_list[i] = IndexList[unbind_list[i]];
469  if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
470  Terminate("bad! unbind_list[i]=%d\n", unbind_list[i]);
471  }
472 
473  /* mark the one to be unbound in PS[] */
474  for(int i = 0; i < Tp->NumPart; i++)
475  {
476  Tp->PS[i].u.s.origintask = SubThisTask;
477  Tp->PS[i].u.s.originindex = i;
478  Tp->PS[i].DomainFlag = 0;
479  }
480 
481  for(int i = 0; i < LocalLen; i++)
482  Tp->PS[unbind_list[i]].DomainFlag = 1;
483 
484  Mem.myfree(unbind_list);
485 
486  domain<partset> SubUnbindDomain{SubComm, Tp};
487 
488  if(SubUnbindDomain.NumNodes != 0)
489  Terminate("SubDomain.NumNodes=%d\n", SubUnbindDomain.NumNodes);
490 
491  SubUnbindDomain.domain_decomposition(mode);
492 
493  if(mode == COLL_SUBFIND)
494  subfind_distribute_particles(SubComm);
495 
496  /* refill unbind_list */
497  LocalLen = 0;
498  for(int i = 0; i < Tp->NumPart; i++)
499  if(Tp->PS[i].DomainFlag)
500  LocalLen++;
501 
502  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", LocalLen * sizeof(int));
503 
504  /* refill unbind_list */
505  LocalLen = 0;
506  for(int i = 0; i < Tp->NumPart; i++)
507  if(Tp->PS[i].DomainFlag)
508  unbind_list[LocalLen++] = i;
509 
510  LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
511 
512  for(int i = 0; i < Tp->NumPart; i++)
513  {
514  Tp->PS[i].DomainFlag = 0;
515  Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
516  Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
517  }
518 
519  for(int i = 0; i < LocalLen; i++)
520  Tp->PS[unbind_list[i]].DomainFlag = 1;
521 
522  Mem.myfree(unbind_list);
523 
524  SubUnbindDomain.domain_free();
525 
526  double ta = Logs.second();
527  subfind_distribute_particles(SubComm); /* bring them back to their original processor */
528  double tb = Logs.second();
529 
530  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
531 
532  /* refill unbind_list */
533  LocalLen = 0;
534  for(int i = 0; i < Tp->NumPart; i++)
535  if(Tp->PS[i].DomainFlag)
536  {
537  if(LocalLen >= NumPartGroup)
538  Terminate("LocalLen=%d >= NumPartGroup=%d", LocalLen, NumPartGroup);
539  unbind_list[LocalLen++] = i;
540  }
541 
542  subfind_collective_printf("SUBFIND: root-task=%d: bringing the collective ones back took %g sec\n", ThisTask,
543  Logs.timediff(ta, tb));
544 
545  /* go from particle indices back to group indices */
546  for(int i = 0; i < LocalLen; i++)
547  {
548  unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex;
549  if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
550  Terminate("ups, bad! unbind_list[i]=%d NumPartGroup=%d\n", unbind_list[i], NumPartGroup);
551  }
552 
553  double tt1 = Logs.second();
554 
555  int oldlen = len;
556 
557  MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm);
558 
559  subfind_collective_printf(
560  "SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d, bound length=%d took %g sec\n", ThisTask,
561  nr - 1, nremaining, oldlen, (int)len, Logs.timediff(tt0, tt1));
562 
563  if(len >= All.DesLinkNgb)
564  {
565  /* ok, we found a substructure */
566  for(int i = 0; i < LocalLen; i++)
567  SFTail[unbind_list[i]].index = nsubs; /* we use this to flag the substructures */
568 
569  if(SubThisTask == master)
570  coll_candidates[k].bound_length = len;
571  }
572  else
573  {
574  /* bound particle count too low or zero */
575  if(SubThisTask == master)
576  coll_candidates[k].bound_length = 0;
577  }
578 
579  Mem.myfree(unbind_list);
580  }
581  }
582  }
583  double t1 = Logs.second();
584 
585  subfind_collective_printf("SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
586  Logs.timediff(t0, t1));
587 
588  /* get the total substructure count */
589  int countall = 0;
590  for(int k = 0; k < count_cand; k++)
591  if(coll_candidates[k].bound_length >= All.DesLinkNgb)
592  {
593  if(coll_candidates[k].len < All.DesLinkNgb)
594  Terminate("coll_candidates[k=%d].len=%lld bound=%lld\n", k, (long long)coll_candidates[k].len,
595  (long long)coll_candidates[k].bound_length);
596 
597  countall++;
598  }
599 
600  MPI_Allreduce(MPI_IN_PLACE, &countall, 1, MPI_INT, MPI_SUM, SubComm);
601 
602  subfind_collective_printf("SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
603  totgrouplen);
604 
605  /* now determine the parent subhalo for each candidate */
606  t0 = Logs.second();
607  mycxxsort_parallel(coll_candidates, coll_candidates + count_cand, subfind_compare_coll_candidates_boundlength, SubComm);
608 
609  coll_cand_dat *tmp_coll_candidates = 0;
610 
611  if(SubThisTask == 0)
612  tmp_coll_candidates = (coll_cand_dat *)Mem.mymalloc("tmp_coll_candidates", totcand * sizeof(coll_cand_dat));
613 
614  int *countlist = (int *)Mem.mymalloc("countlist", SubNTask * sizeof(int));
615  int *offset = (int *)Mem.mymalloc("offset", SubNTask * sizeof(int));
616 
617  int count_size = count_cand * sizeof(coll_cand_dat);
618  MPI_Allgather(&count_size, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
619 
620  offset[0] = 0;
621  for(int i = 1; i < SubNTask; i++)
622  offset[i] = offset[i - 1] + countlist[i - 1];
623 
624  MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
625 
626  if(SubThisTask == 0)
627  {
628  for(int k = 0; k < totcand; k++)
629  {
630  tmp_coll_candidates[k].subnr = k;
631  tmp_coll_candidates[k].parent = 0;
632  }
633 
634  mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
635 
636  for(int k = 0; k < totcand; k++)
637  {
638  for(int j = k + 1; j < totcand; j++)
639  {
640  if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
641  break;
642 
643  if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
644  {
645  if(tmp_coll_candidates[k].bound_length >= All.DesLinkNgb)
646  tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr;
647  }
648  else
649  {
650  Terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, countall,
651  (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len,
652  (int)tmp_coll_candidates[k].bound_length, (int)tmp_coll_candidates[j].rank,
653  (int)tmp_coll_candidates[j].len, (int)tmp_coll_candidates[j].bound_length);
654  }
655  }
656  }
657 
658  mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
659  }
660 
661  MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
662 
663  Mem.myfree(offset);
664  Mem.myfree(countlist);
665 
666  if(SubThisTask == 0)
667  Mem.myfree(tmp_coll_candidates);
668 
669  t1 = Logs.second();
670 
671  subfind_collective_printf("SUBFIND: root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n",
672  ThisTask, Logs.timediff(t0, t1), Mem.getAllocatedBytesInMB());
673 
674  /* Now let's save some properties of the substructures */
675  if(SubThisTask == 0)
676  Group[gr].Nsubs = countall;
677 
678  t0 = Logs.second();
679 
680  unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
681 
682  for(int master = 0, subnr = 0; master < SubNTask; master++)
683  {
684  int ncand = count_cand;
685  MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
686 
687  for(int k = 0; k < ncand; k++)
688  {
689  MyLenType len;
690  int parent, nsubs;
691  if(SubThisTask == master)
692  {
693  len = coll_candidates[k].bound_length;
694  nsubs = coll_candidates[k].nsub;
695  parent = coll_candidates[k].parent;
696  }
697 
698  MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
699  MPI_Barrier(SubComm);
700 
701  if(len > 0)
702  {
703  MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
704  MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
705 
706  LocalLen = 0;
707 
708  if(SubThisTask != master)
709  subfind_poll_for_requests();
710  else
711  {
712  location p = coll_candidates[k].head;
713  for(MyLenType i = 0; i < coll_candidates[k].len; i++)
714  {
715  subfind_distlinklist_add_bound_particles(p, nsubs);
716  p = subfind_distlinklist_get_next(p);
717  }
718 
719  /* now tell the others to stop polling */
720  for(int i = 0; i < SubNTask; i++)
721  if(i != SubThisTask)
722  MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
723  }
724 
725  int max_nsubhalos;
726  MPI_Allreduce(&Nsubhalos, &max_nsubhalos, 1, MPI_INT, MPI_MAX, SubComm);
727 
728  if(max_nsubhalos >= MaxNsubhalos)
729  {
730  if(ThisTask == 0)
731  warn("Nsubhalos=%d >= MaxNsubhalos=%d", max_nsubhalos, MaxNsubhalos);
732 
733  MaxNsubhalos = 1.25 * max_nsubhalos;
734 
735  Subhalo = (subhalo_properties *)Mem.myrealloc_movable(Subhalo, MaxNsubhalos * sizeof(subhalo_properties));
736  }
737 
738  for(int i = 0; i < LocalLen; i++)
739  {
740  unbind_list[i] = IndexList[unbind_list[i]]; /* move to particle index list */
741 
742  if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
743  Terminate("strange");
744  }
745 
746  int marked = subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
747 
748  for(int i = 0; i < LocalLen; i++)
749  {
750  unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex; /* move back to group index list */
751 
752  if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
753  Terminate("also strange");
754  }
755 
756  MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
757 
758  if(SubThisTask == 0)
759  {
760  if(subnr == 0)
761  {
762  for(int j = 0; j < 3; j++)
763  {
764  Group[gr].Pos[j] = Subhalo[Nsubhalos].Pos[j];
765  Group[gr].IntPos[j] = Subhalo[Nsubhalos].IntPos[j];
766  }
767  }
768 
769 #if defined(SUBFIND_ORPHAN_TREATMENT)
770  Group[gr].LenPrevMostBnd += marked;
771 #endif
772 
773  Subhalo[Nsubhalos].GroupNr = GroupNr;
774  Subhalo[Nsubhalos].SubRankInGr = subnr;
775  Subhalo[Nsubhalos].SubParentRank = parent;
776 
777  Nsubhalos++;
778  }
779 
780  /* Let's now assign the subhalo number within the group */
781  for(int i = 0; i < LocalLen; i++)
782  {
783  Tp->PS[IndexList[unbind_list[i]]].SubRankInGr = subnr;
784 #if defined(MERGERTREE)
785  Tp->PS[IndexList[unbind_list[i]]].SizeOfSubhalo.set(len);
786 #endif
787  }
788 
789  subnr++;
790  }
791  }
792  }
793 
794  subfind_collective_printf("SUBFIND: root-task=%d: determining substructure properties done\n", ThisTask);
795 
796  Mem.myfree(unbind_list);
797  Mem.myfree(coll_candidates);
798  Mem.myfree(SFPrevLen);
799  Mem.myfree(SFLen);
800  Mem.myfree(SFTail);
801  Mem.myfree(SFNext);
802  Mem.myfree(SFHead);
803  Mem.myfree(sd);
804 }
805 
806 /* This function finds the subhalo candidates (i.e. locally overdense structures bounded by a saddle point).
807  * They can be nested inside each other, and will later be subjected to an unbinding procedure.
808  */
809 template <typename partset>
810 void fof<partset>::subfind_col_find_coll_candidates(long long totgrouplen)
811 {
812  subfind_collective_printf("SUBFIND: root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask,
814 
815  double t0 = Logs.second();
816 
817  /* Now find the subhalo candidates by building up-link lists for them.
818  * We go through the processors in the group in sequence, starting with the first one holding the densest particles according to the
819  * sd[] array In each iteration, the processor we currently deal with is called 'master', the others are listening to incoming
820  * requests for information.
821  */
822  for(int master = 0; master < SubNTask; master++)
823  {
824  double tt0 = Logs.second();
825  if(SubThisTask != master)
826  subfind_poll_for_requests(); /* if we are not the master task, we react to incoming requests for information */
827  else
828  {
829  /* we go through the sd[] indices stored on the master task, which means we start with the densest particle */
830  for(int k = 0; k < NumPartGroup; k++)
831  {
832  int ngbcount = sd[k].ngbcount;
833  location ngb_index1 = sd[k].ngb_index1;
834  location ngb_index2 = sd[k].ngb_index2;
835 
836  switch(ngbcount)
837  /* treat the different possible cases */
838  {
839  case 0: /* this appears to be a lonely maximum -> new group */
840  subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, {-1, -1}, sd[k].PrevSizeOfSubhalo);
841  break;
842 
843  case 1: /* the particle is attached to exactly one group */
844  {
845  if(ngb_index1.task < 0 || ngb_index1.task >= SubNTask)
846  Terminate("ngb_index1.task=%d SubNTask=%d", ngb_index1.task, SubNTask);
847 
848  location head = subfind_distlinklist_get_head(ngb_index1);
849 
850  if(head.index == -1)
851  Terminate("We have a problem! head=%d for k=%d on task=%d\n", head.index, k, SubThisTask);
852 
853  location tail;
854  int retcode =
855  subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
856 
857  if(!(retcode & 1))
858  subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
859  if(!(retcode & 2))
860  subfind_distlinklist_set_next(tail, sd[k].index);
861  }
862  break;
863 
864  case 2: /* the particle merges two groups together */
865  {
866  location head, head_attach;
867 
868  if(ngb_index1.task < 0 || ngb_index1.task >= SubNTask)
869  Terminate("ngb_index1.task=%d SubNTask=%d", ngb_index1.task, SubNTask);
870 
871  if(ngb_index2.task < 0 || ngb_index2.task >= SubNTask)
872  Terminate("ngb_index2.task=%d SubNTask=%d", ngb_index2.task, SubNTask);
873 
874  if(ngb_index1.task == ngb_index2.task)
875  {
876  subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, head, head_attach);
877  }
878  else
879  {
880  head = subfind_distlinklist_get_head(ngb_index1);
881  head_attach = subfind_distlinklist_get_head(ngb_index2);
882  }
883 
884  if(head.index == -1 || head_attach.index == -1)
885  Terminate("We have a problem! head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", head.task, head.index,
886  head_attach.task, head_attach.index, k, SubThisTask);
887 
888  if(head != head_attach)
889  {
890  location tail, tail_attach;
891  MyLenType len, len_attach;
892  double prevlen, prevlen_attach;
893 
894  subfind_distlinklist_get_tailandlen(head, tail, len, prevlen);
895  subfind_distlinklist_get_tailandlen(head_attach, tail_attach, len_attach, prevlen_attach);
896 
897  bool swap_len = false;
898  bool swap_prevlen = false;
899 
900  if(len_attach > len || (len_attach == len && head_attach < head))
901  swap_len = true;
902 
903  if(prevlen > 0 && prevlen_attach > 0 && len >= All.DesLinkNgb && len_attach >= All.DesLinkNgb)
904  {
905  if(prevlen_attach > prevlen || (prevlen_attach == prevlen && swap_len == true))
906  swap_prevlen = true;
907  }
908  else
909  swap_prevlen = swap_len;
910 
911  /* if other group is longer, swap */
912  if(swap_prevlen)
913  {
914  location tmp = head;
915  head = head_attach;
916  head_attach = tmp;
917 
918  tmp = tail;
919  tail = tail_attach;
920  tail_attach = tmp;
921 
922  MyLenType tmplen = len;
923  len = len_attach;
924  len_attach = tmplen;
925 
926  double tmpprevlen = prevlen;
927  prevlen = prevlen_attach;
928  prevlen_attach = tmpprevlen;
929  }
930 
931  /* only in case the attached group is long enough we bother to register it
932  as a subhalo candidate */
933 
934  if(len_attach >= All.DesLinkNgb && len >= All.DesLinkNgb)
935  {
936  count_decisions++;
937 
938  if(swap_prevlen != swap_len)
939  {
940  printf(
941  "SUBFIND: TASK=%d: made a different main trunk decision due to previous length: prevlen=%g "
942  "prevlen_attach=%g len=%g len_attach=%g\n",
943  ThisTask, prevlen / len, prevlen_attach / len_attach, (double)len, (double)len_attach);
944  fflush(stdout);
945  count_different_decisions++;
946  }
947 
948  if(count_cand < max_coll_candidates)
949  {
950  coll_candidates[count_cand].len = len_attach;
951  coll_candidates[count_cand].head = head_attach;
952  count_cand++;
953  }
954  else
955  Terminate("Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates,
956  NumPartGroup);
957  }
958 
959  /* now join the two groups */
960  subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach, prevlen + prevlen_attach);
961  subfind_distlinklist_set_next(tail, head_attach);
962 
963  location ss = head_attach;
964  do
965  {
966  ss = subfind_distlinklist_set_head_get_next(ss, head);
967  }
968  while(ss.index >= 0);
969  }
970 
971  /* finally, attach the particle to 'head' */
972  location tail;
973  int retcode =
974  subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
975 
976  if(!(retcode & 1))
977  subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
978  if(!(retcode & 2))
979  subfind_distlinklist_set_next(tail, sd[k].index);
980  }
981  break;
982  }
983  }
984 
985  myflush(stdout);
986 
987  /* now tell the others to stop polling */
988  for(int k = 0; k < SubNTask; k++)
989  if(k != SubThisTask)
990  MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
991  }
992 
993  MPI_Barrier(SubComm);
994  double tt1 = Logs.second();
995 
996  subfind_collective_printf("SUBFIND: root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask, Logs.timediff(tt0, tt1));
997  }
998  double t1 = Logs.second();
999 
1000  subfind_collective_printf("SUBFIND: root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask,
1001  Logs.timediff(t0, t1));
1002 
1003  /* Add the full group as the final subhalo candidate.
1004  */
1005  location head = {-1, -1};
1006  location prev = {-1, -1};
1007  for(int master = 0; master < SubNTask; master++)
1008  {
1009  if(SubThisTask != master)
1010  subfind_poll_for_requests();
1011  else
1012  {
1013  for(int i = 0; i < NumPartGroup; i++)
1014  {
1015  location index = {SubThisTask, i};
1016 
1017  if(SFHead[i] == index)
1018  {
1019  location tail;
1020  MyLenType len;
1021  double prevlen;
1022  subfind_distlinklist_get_tailandlen(SFHead[i], tail, len, prevlen);
1023  location next = subfind_distlinklist_get_next(tail);
1024 
1025  if(next.index == -1)
1026  {
1027  if(prev.index < 0)
1028  head = index;
1029 
1030  if(prev.index >= 0)
1031  subfind_distlinklist_set_next(prev, index);
1032 
1033  prev = tail;
1034  }
1035  }
1036  }
1037 
1038  /* now tell the others to stop polling */
1039  for(int k = 0; k < SubNTask; k++)
1040  if(k != SubThisTask)
1041  MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
1042  }
1043 
1044  MPI_Barrier(SubComm);
1045  MPI_Bcast(&head, sizeof(head), MPI_BYTE, master, SubComm);
1046  MPI_Bcast(&prev, sizeof(prev), MPI_BYTE, master, SubComm);
1047  }
1048 
1049  if(SubThisTask == SubNTask - 1)
1050  {
1051  if(count_cand < max_coll_candidates)
1052  {
1053  coll_candidates[count_cand].len = totgrouplen;
1054  coll_candidates[count_cand].head = head;
1055  count_cand++;
1056  }
1057  else
1058  Terminate("count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates);
1059  }
1060 
1061  subfind_collective_printf("SUBFIND: root-task=%d: adding background as candidate\n", ThisTask);
1062 
1063  /* go through the whole chain once to establish a rank order. For the rank we use SFLen[]
1064  */
1065  double ta = Logs.second();
1066 
1067  int master = head.task;
1068 
1069  if(master < 0 || master >= SubNTask)
1070  Terminate("master=%d SubNTask=%d\n", master, SubNTask);
1071 
1072  if(SubThisTask != master)
1073  subfind_poll_for_requests();
1074  else
1075  {
1076  location p = head;
1077  MyLenType rank = 0;
1078 
1079  while(p.index >= 0)
1080  {
1081  p = subfind_distlinklist_setrank_and_get_next(p, rank);
1082  }
1083 
1084  /* now tell the others to stop polling */
1085  for(int i = 0; i < SubNTask; i++)
1086  if(i != master)
1087  MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1088  }
1089 
1090  MPI_Barrier(SubComm);
1091 
1092  /* for each candidate, we now pull out the rank of its head */
1093  for(int master = 0; master < SubNTask; master++)
1094  {
1095  if(SubThisTask != master)
1096  subfind_poll_for_requests();
1097  else
1098  {
1099  for(int k = 0; k < count_cand; k++)
1100  coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head);
1101 
1102  /* now tell the others to stop polling */
1103  for(int i = 0; i < SubNTask; i++)
1104  if(i != SubThisTask)
1105  MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1106  }
1107  }
1108  MPI_Barrier(SubComm);
1109 
1110  double tb = Logs.second();
1111 
1112  subfind_collective_printf(
1113  "SUBFIND: root-task=%d: establishing of rank order took %g sec (grouplen=%lld) presently allocated %g MB\n", ThisTask,
1114  Logs.timediff(ta, tb), totgrouplen, Mem.getAllocatedBytesInMB());
1115 }
1116 
1117 template <typename partset>
1118 void fof<partset>::subfind_unbind_independent_ones(domain<partset> *SingleDomain, int count_cand_l)
1119 {
1120  unbind_list = (int *)Mem.mymalloc("unbind_list", Tp->NumPart * sizeof(int));
1121 
1122  mycxxsort(coll_candidates, coll_candidates + count_cand_l, subfind_compare_coll_candidates_nsubs);
1123 
1124  for(int k = 0, ii = 0; k < count_cand_l; k++)
1125  if(coll_candidates[k].parent == 0)
1126  {
1127  int i = PPS[ii].index;
1128 
1129  while(Tp->PS[i].submark < coll_candidates[k].nsub)
1130  {
1131  ii++;
1132  i = PPS[ii].index;
1133 
1134  if(i >= Tp->NumPart)
1135  Terminate("i >= NumPart");
1136  }
1137 
1138  if(Tp->PS[i].submark >= 0 && Tp->PS[i].submark < HIGHBIT)
1139  {
1140  int len = 0;
1141  int nsubs = Tp->PS[i].submark;
1142 
1143  if(nsubs != coll_candidates[k].nsub)
1144  Terminate("TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs, coll_candidates[k].nsub);
1145 
1146  while(i < Tp->NumPart)
1147  {
1148  if(Tp->PS[i].submark == nsubs)
1149  {
1150  Tp->PS[i].submark = HIGHBIT;
1151  if((Tp->PS[i].u.s.origintask & HIGHBIT) == 0)
1152  {
1153  unbind_list[len] = i;
1154  len++;
1155  }
1156  ii++;
1157  i = PPS[ii].index;
1158  }
1159  else
1160  break;
1161  }
1162 
1163  /* call the serial unbind function */
1164  len = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, len);
1165 
1166  if(len >= All.DesLinkNgb)
1167  {
1168  /* ok, we found a substructure */
1169  coll_candidates[k].bound_length = len;
1170 
1171  for(int j = 0; j < len; j++)
1172  Tp->PS[unbind_list[j]].submark = nsubs; /* we use this to flag the substructures */
1173  }
1174  else
1175  coll_candidates[k].bound_length = 0;
1176  }
1177  }
1178 
1179  Mem.myfree(unbind_list);
1180 }
1181 
1182 struct loc_compound0
1183 {
1184  int index;
1185  location loc;
1186  approxlen prevlen;
1187 };
1188 
1189 struct loc_compound1
1190 {
1191  int index;
1192  location loc;
1193 };
1194 
1195 struct loc_compound2
1196 {
1197  location loc;
1198  MyLenType len;
1199  double prevlen;
1200 };
1201 
1202 struct loc_compound3
1203 {
1204  int index;
1205  MyLenType len;
1206  location tail;
1207  double prevlen;
1208 };
1209 
1210 struct loc_compound4
1211 {
1212  int index;
1213  location head;
1214  location next;
1215 };
1216 
1217 struct loc_compound5
1218 {
1219  int index;
1220  MyLenType len;
1221  location head;
1222  location tail;
1223  location next;
1224  approxlen prevlen;
1225 };
1226 
1227 struct loc_compound6
1228 {
1229  location loc;
1230  MyLenType len;
1231 };
1232 
1233 template <typename partset>
1234 void fof<partset>::subfind_poll_for_requests(void)
1235 {
1236  int tag;
1237  do
1238  {
1239  MPI_Status status;
1240  MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status);
1241 
1242  int source = status.MPI_SOURCE;
1243  tag = status.MPI_TAG;
1244 
1245  /* MPI_Get_count(&status, MPI_BYTE, &count); */
1246  switch(tag)
1247  {
1248  case TAG_GET_TWOHEADS:
1249  {
1250  int ibuf[2];
1251  MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE);
1252  location buf[2];
1253  buf[0] = SFHead[ibuf[0]];
1254  buf[1] = SFHead[ibuf[1]];
1255  MPI_Send(buf, 2 * sizeof(location), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm);
1256  }
1257  break;
1258 
1259  case TAG_SET_NEWTAIL:
1260  {
1261  loc_compound0 data;
1262  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE);
1263 
1264  int index = data.index;
1265  location newtail = data.loc;
1266  location oldtail = SFTail[index]; /* return old tail */
1267  SFTail[index] = newtail;
1268  SFLen[index]++;
1269  SFPrevLen[index] += data.prevlen.get();
1270 
1271  if(newtail.task == SubThisTask)
1272  {
1273  SFHead[newtail.index] = {SubThisTask, index};
1274  SFNext[newtail.index] = {-1, -1};
1275  }
1276 
1277  if(oldtail.task == SubThisTask)
1278  {
1279  SFNext[oldtail.index] = newtail;
1280  }
1281 
1282  MPI_Send(&oldtail, sizeof(location), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm);
1283  }
1284  break;
1285 
1286  case TAG_SET_ALL:
1287  {
1288  loc_compound5 data;
1289  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE);
1290  int index = data.index;
1291  SFLen[index] = data.len;
1292  SFHead[index] = data.head;
1293  SFTail[index] = data.tail;
1294  SFNext[index] = data.next;
1295  SFPrevLen[index] = data.prevlen.get();
1296  }
1297  break;
1298 
1299  case TAG_GET_TAILANDLEN:
1300  {
1301  int index;
1302  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1303  loc_compound2 data = {SFTail[index], SFLen[index], SFPrevLen[index]};
1304  MPI_Send(&data, sizeof(data), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm);
1305  }
1306  break;
1307 
1308  case TAG_SET_TAILANDLEN:
1309  {
1310  loc_compound3 data;
1311  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE);
1312  int index = data.index;
1313  SFTail[index] = data.tail;
1314  SFLen[index] = data.len;
1315  SFPrevLen[index] = data.prevlen;
1316  }
1317  break;
1318 
1319  case TAG_SET_HEADANDNEXT:
1320  {
1321  loc_compound4 data;
1322  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE);
1323  int index = data.index;
1324  SFHead[index] = data.head;
1325  SFNext[index] = data.next;
1326  }
1327  break;
1328 
1329  case TAG_SET_NEXT:
1330  {
1331  loc_compound1 data;
1332  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE);
1333  int index = data.index;
1334  SFNext[index] = data.loc;
1335  }
1336  break;
1337 
1338  case TAG_SETHEADGETNEXT:
1339  {
1340  loc_compound1 data;
1341  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE);
1342  int index = data.index;
1343  location head = data.loc;
1344  location next;
1345  int task;
1346  do
1347  {
1348  SFHead[index] = head;
1349  next = SFNext[index];
1350  task = next.task;
1351  index = next.index;
1352  }
1353  while(next.index >= 0 && task == SubThisTask);
1354  MPI_Send(&next, sizeof(location), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm);
1355  }
1356  break;
1357 
1358  case TAG_GET_NEXT:
1359  {
1360  int index;
1361  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1362  MPI_Send(&SFNext[index], sizeof(location), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm);
1363  }
1364  break;
1365 
1366  case TAG_GET_HEAD:
1367  {
1368  int index;
1369  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1370  MPI_Send(&SFHead[index], sizeof(location), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm);
1371  }
1372  break;
1373 
1374  case TAG_ADD_PARTICLE:
1375  {
1376  int index;
1377  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1378  if(SFTail[index].index < 0) /* consider only particles not already in substructures */
1379  {
1380  unbind_list[LocalLen] = index;
1381  if(index >= NumPartGroup)
1382  Terminate("What: index=%d NumPartGroup=%d\n", index, NumPartGroup);
1383  LocalLen++;
1384  }
1385  }
1386  break;
1387 
1388  case TAG_MARK_PARTICLE:
1389  {
1390  int ibuf[3];
1391  MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE);
1392  int index = ibuf[0];
1393  int target = ibuf[1];
1394  int submark = ibuf[2];
1395 
1396  if(Tp->PS[IndexList[index]].submark != HIGHBIT)
1397  Terminate("TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, IndexList[index], Tp->PS[IndexList[index]].submark);
1398 
1399  Tp->PS[IndexList[index]].TargetTask = target;
1400  Tp->PS[IndexList[index]].submark = submark;
1401  }
1402  break;
1403 
1404  case TAG_ADDBOUND:
1405  {
1406  int ibuf[2];
1407  MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status);
1408  int index = ibuf[0];
1409  int nsub = ibuf[1];
1410  if(SFTail[index].index == nsub) /* consider only particles in this substructure */
1411  {
1412  unbind_list[LocalLen] = index;
1413  LocalLen++;
1414  }
1415  }
1416  break;
1417 
1418  case TAG_SETRANK:
1419  {
1420  loc_compound6 data;
1421  MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE);
1422  int index = data.loc.index;
1423  MyLenType rank = data.len;
1424  location next;
1425  do
1426  {
1427  SFLen[index] = rank++;
1428  next = SFNext[index];
1429  if(next.index < 0)
1430  break;
1431  index = next.index;
1432  }
1433  while(next.task == SubThisTask);
1434  data.loc = next;
1435  data.len = rank;
1436  MPI_Send(&data, sizeof(data), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm);
1437  }
1438  break;
1439 
1440  case TAG_GET_RANK:
1441  {
1442  int index;
1443  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1444  MyLenType rank = SFLen[index];
1445  MPI_Send(&rank, sizeof(MyLenType), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm);
1446  }
1447  break;
1448 
1449  case TAG_POLLING_DONE:
1450  {
1451  int index;
1452  MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1453  }
1454  break;
1455 
1456  default:
1457  Terminate("tag not present in the switch");
1458  break;
1459  }
1460  }
1461  while(tag != TAG_POLLING_DONE);
1462 }
1463 
1464 template <typename partset>
1465 location fof<partset>::subfind_distlinklist_setrank_and_get_next(location loc, MyLenType &rank)
1466 {
1467  int task = loc.task;
1468  int i = loc.index;
1469 
1470  location next;
1471 
1472  if(SubThisTask == task)
1473  {
1474  SFLen[i] = rank++;
1475  next = SFNext[i];
1476  }
1477  else
1478  {
1479  loc_compound6 data = {loc, rank};
1480  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SETRANK, SubComm);
1481  MPI_Recv(&data, sizeof(data), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE);
1482  next = data.loc;
1483  rank = data.len;
1484  }
1485  return next;
1486 }
1487 
1488 template <typename partset>
1489 location fof<partset>::subfind_distlinklist_set_head_get_next(location loc, location head)
1490 {
1491  int task = loc.task;
1492  int i = loc.index;
1493 
1494  location next;
1495 
1496  if(SubThisTask == task)
1497  {
1498  SFHead[i] = head;
1499  next = SFNext[i];
1500  }
1501  else
1502  {
1503  loc_compound1 data = {i, head};
1504  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm);
1505  MPI_Recv(&next, sizeof(location), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1506  }
1507 
1508  return next;
1509 }
1510 
1511 template <typename partset>
1512 void fof<partset>::subfind_distlinklist_set_next(location loc, location next)
1513 {
1514  int task = loc.task;
1515  int i = loc.index;
1516 
1517  if(SubThisTask == task)
1518  {
1519  SFNext[i] = next;
1520  }
1521  else
1522  {
1523  loc_compound1 data = {i, next};
1524  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_NEXT, SubComm);
1525  }
1526 }
1527 
1528 template <typename partset>
1529 void fof<partset>::subfind_distlinklist_add_particle(location loc)
1530 {
1531  int task = loc.task;
1532  int i = loc.index;
1533 
1534  if(SubThisTask == task)
1535  {
1536  if(SFTail[i].index < 0) /* consider only particles not already in substructures */
1537  {
1538  if(i >= NumPartGroup)
1539  Terminate("What: index=%d NumPartGroup=%d\n", i, NumPartGroup);
1540 
1541  unbind_list[LocalLen] = i;
1542  LocalLen++;
1543  }
1544  }
1545  else
1546  {
1547  MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm);
1548  }
1549 }
1550 
1551 template <typename partset>
1552 void fof<partset>::subfind_distlinklist_mark_particle(location loc, int target, int submark)
1553 {
1554  int task = loc.task;
1555  int i = loc.index;
1556 
1557  if(SubThisTask == task)
1558  {
1559  if(Tp->PS[IndexList[i]].submark != HIGHBIT)
1560  Terminate("Tas=%d i=%d PS[i].submark=%d?\n", SubThisTask, i, Tp->PS[IndexList[i]].submark);
1561 
1562  Tp->PS[IndexList[i]].TargetTask = target;
1563  Tp->PS[IndexList[i]].submark = submark;
1564  }
1565  else
1566  {
1567  int ibuf[3] = {i, target, submark};
1568  MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm);
1569  }
1570 }
1571 
1572 template <typename partset>
1573 void fof<partset>::subfind_distlinklist_add_bound_particles(location loc, int nsub)
1574 {
1575  int task = loc.task;
1576  int i = loc.index;
1577 
1578  if(SubThisTask == task)
1579  {
1580  if(SFTail[i].index == nsub) /* consider only particles not already in substructures */
1581  {
1582  unbind_list[LocalLen] = i;
1583  LocalLen++;
1584  }
1585  }
1586  else
1587  {
1588  int ibuf[2] = {i, nsub};
1589  MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm);
1590  }
1591 }
1592 
1593 template <typename partset>
1594 location fof<partset>::subfind_distlinklist_get_next(location loc)
1595 {
1596  int task = loc.task;
1597  int i = loc.index;
1598 
1599  location next;
1600 
1601  if(SubThisTask == task)
1602  {
1603  next = SFNext[i];
1604  }
1605  else
1606  {
1607  MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm);
1608  MPI_Recv(&next, sizeof(location), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1609  }
1610 
1611  return next;
1612 }
1613 
1614 template <typename partset>
1615 MyLenType fof<partset>::subfind_distlinklist_get_rank(location loc)
1616 {
1617  int task = loc.task;
1618  int i = loc.index;
1619 
1620  MyLenType rank;
1621 
1622  if(SubThisTask == task)
1623  {
1624  rank = SFLen[i];
1625  }
1626  else
1627  {
1628  MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm);
1629  MPI_Recv(&rank, sizeof(MyLenType), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE);
1630  }
1631 
1632  return rank;
1633 }
1634 
1635 template <typename partset>
1636 location fof<partset>::subfind_distlinklist_get_head(location loc)
1637 {
1638  int task = loc.task;
1639  int i = loc.index;
1640 
1641  location head;
1642 
1643  if(SubThisTask == task)
1644  {
1645  head = SFHead[i];
1646  }
1647  else
1648  {
1649  MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm);
1650  MPI_Recv(&head, sizeof(location), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE);
1651  }
1652 
1653  return head;
1654 }
1655 
1656 template <typename partset>
1657 void fof<partset>::subfind_distlinklist_get_two_heads(location ngb_index1, location ngb_index2, location &head, location &head_attach)
1658 {
1659  if(ngb_index1.task != ngb_index2.task)
1660  Terminate("ngb_index1.task != ngb_index2.task");
1661 
1662  int task = ngb_index1.task;
1663  int i1 = ngb_index1.index;
1664  int i2 = ngb_index2.index;
1665 
1666  if(SubThisTask == task)
1667  {
1668  head = SFHead[i1];
1669  head_attach = SFHead[i2];
1670  }
1671  else
1672  {
1673  int ibuf[2] = {i1, i2};
1674  MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm);
1675  location buf[2];
1676  MPI_Recv(buf, 2 * sizeof(location), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE);
1677  head = buf[0];
1678  head_attach = buf[1];
1679  }
1680 }
1681 
1682 template <typename partset>
1683 void fof<partset>::subfind_distlinklist_set_headandnext(location loc, location head, location next)
1684 {
1685  int task = loc.task;
1686  int i = loc.index;
1687 
1688  if(SubThisTask == task)
1689  {
1690  SFHead[i] = head;
1691  SFNext[i] = next;
1692  }
1693  else
1694  {
1695  loc_compound4 data = {i, head, next};
1696  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm);
1697  }
1698 }
1699 
1700 template <typename partset>
1701 int fof<partset>::subfind_distlinklist_get_tail_set_tail_increaselen(location loc, location &tail, location newtail, approxlen prevlen)
1702 {
1703  int task = loc.task;
1704  int i = loc.index;
1705 
1706  int retcode = 0;
1707 
1708  if(SubThisTask == task)
1709  {
1710  location oldtail = SFTail[i];
1711  SFTail[i] = newtail;
1712  SFLen[i]++;
1713  SFPrevLen[i] += prevlen.get();
1714  tail = oldtail;
1715 
1716  if(newtail.task == SubThisTask)
1717  {
1718  SFHead[newtail.index] = loc;
1719  SFNext[newtail.index] = {-1, -1};
1720  retcode |= 1;
1721  }
1722 
1723  if(oldtail.task == SubThisTask)
1724  {
1725  SFNext[oldtail.index] = newtail;
1726  retcode |= 2;
1727  }
1728  }
1729  else
1730  {
1731  loc_compound0 data = {i, newtail, prevlen};
1732  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm);
1733  location oldtail;
1734  MPI_Recv(&oldtail, sizeof(location), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE);
1735  tail = oldtail;
1736 
1737  if(newtail.task == task)
1738  retcode |= 1;
1739  if(oldtail.task == task)
1740  retcode |= 2;
1741  }
1742 
1743  return retcode;
1744 }
1745 
1746 template <typename partset>
1747 void fof<partset>::subfind_distlinklist_set_tailandlen(location loc, location tail, MyLenType len, double prevlen)
1748 {
1749  int task = loc.task;
1750  int i = loc.index;
1751 
1752  if(SubThisTask == task)
1753  {
1754  SFTail[i] = tail;
1755  SFLen[i] = len;
1756  SFPrevLen[i] = prevlen;
1757  }
1758  else
1759  {
1760  loc_compound3 data = {i, len, tail, prevlen};
1761  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm);
1762  }
1763 }
1764 
1765 template <typename partset>
1766 void fof<partset>::subfind_distlinklist_get_tailandlen(location loc, location &tail, MyLenType &len, double &prevlen)
1767 {
1768  int task = loc.task;
1769  int i = loc.index;
1770 
1771  if(SubThisTask == task)
1772  {
1773  tail = SFTail[i];
1774  len = SFLen[i];
1775  prevlen = SFPrevLen[i];
1776  }
1777  else
1778  {
1779  MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm);
1780 
1781  loc_compound2 data;
1782  MPI_Recv(&data, sizeof(data), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE);
1783  tail = data.loc;
1784  len = data.len;
1785  prevlen = data.prevlen;
1786  }
1787 }
1788 
1789 template <typename partset>
1790 void fof<partset>::subfind_distlinklist_set_all(location loc, location head, location tail, MyLenType len, location next,
1791  approxlen prevlen)
1792 {
1793  int task = loc.task;
1794  int i = loc.index;
1795 
1796  if(SubThisTask == task)
1797  {
1798  SFHead[i] = head;
1799  SFTail[i] = tail;
1800  SFNext[i] = next;
1801  SFLen[i] = len;
1802  SFPrevLen[i] = prevlen.get();
1803  }
1804  else
1805  {
1806  loc_compound5 data = {i, len, head, tail, next, prevlen};
1807  MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_ALL, SubComm);
1808  }
1809 }
1810 
1811 /* now make sure that the following classes are really instantiated, otherwise we may get a linking problem */
1812 #include "../data/simparticles.h"
1813 template class fof<simparticles>;
1814 
1815 #if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
1816 #include "../data/lcparticles.h"
1817 template class fof<lcparticles>;
1818 #endif
1819 
1820 #endif
1821 #endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:68
MPI_Comm Communicator
Definition: setcomm.h:31
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
int MyLenType
Definition: dtypes.h:76
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:19
#define warn(...)
Definition: macros.h:34
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
long long get(void)
Definition: idstorage.h:49
int task
Definition: dtypes.h:164
int index
Definition: dtypes.h:165
void myflush(FILE *fstream)
Definition: system.cc:125