GADGET-4
subfind_distribute.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef SUBFIND
15 
16 #include <mpi.h>
17 #include <algorithm>
18 #include <cmath>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <cstring>
22 
23 #include "../data/allvars.h"
24 #include "../data/dtypes.h"
25 #include "../data/mymalloc.h"
26 #include "../domain/domain.h"
27 #include "../fof/fof.h"
28 #include "../gravtree/gravtree.h"
29 #include "../logs/timer.h"
30 #include "../main/simulation.h"
31 #include "../mpi_utils/mpi_utils.h"
32 #include "../sort/cxxsort.h"
33 #include "../subfind/subfind.h"
34 
35 template <typename partset>
36 void fof<partset>::subfind_distribute_groups(void)
37 {
38  double t0 = Logs.second();
39 
40  int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * NTask);
41  int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * NTask);
42  int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * NTask);
43  int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * NTask);
44 
45  /* count how many we have of each task */
46  for(int i = 0; i < NTask; i++)
47  Send_count[i] = 0;
48 
49  for(int i = 0; i < Ngroups; i++)
50  {
51  int target = Group[i].TargetTask;
52 
53  if(target < 0 || target >= NTask)
54  Terminate("target < 0 || target >= NTask");
55 
56  if(target != ThisTask)
57  Send_count[target]++;
58  }
59 
60  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
61 
62  Recv_offset[0] = Send_offset[0] = 0;
63  int nexport = 0, nimport = 0;
64 
65  for(int i = 0; i < NTask; i++)
66  {
67  nimport += Recv_count[i];
68  nexport += Send_count[i];
69 
70  if(i > 0)
71  {
72  Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1];
73  Recv_offset[i] = Recv_offset[i - 1] + Recv_count[i - 1];
74  }
75  }
76 
77  group_properties *send_Group =
78  (group_properties *)Mem.mymalloc_movable(&send_Group, "send_Group", nexport * sizeof(group_properties));
79 
80  for(int i = 0; i < NTask; i++)
81  Send_count[i] = 0;
82 
83  for(int i = 0; i < Ngroups; i++)
84  {
85  int target = Group[i].TargetTask;
86 
87  if(target != ThisTask)
88  {
89  send_Group[Send_offset[target] + Send_count[target]] = Group[i];
90  Send_count[target]++;
91 
92  Group[i] = Group[Ngroups - 1];
93  Ngroups--;
94  i--;
95  }
96  }
97 
98  if(Ngroups + nimport > MaxNgroups)
99  {
100  MaxNgroups = Ngroups + nimport;
101  Group = (group_properties *)Mem.myrealloc_movable(Group, sizeof(group_properties) * MaxNgroups);
102  }
103 
104  for(int ngrp = 1; ngrp < (1 << PTask); ngrp++)
105  {
106  int recvTask = ThisTask ^ ngrp;
107 
108  if(recvTask < NTask)
109  {
110  if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
111  {
112  /* get the group info */
113  MPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(group_properties), MPI_BYTE, recvTask,
114  TAG_DENS_A, &Group[Ngroups + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(group_properties),
115  MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
116  }
117  }
118  }
119 
120  Ngroups += nimport;
121 
122  Mem.myfree_movable(send_Group);
123 
124  Mem.myfree(Recv_offset);
125  Mem.myfree(Recv_count);
126  Mem.myfree(Send_offset);
127  Mem.myfree(Send_count);
128 
129  double t1 = Logs.second();
130 
131  mpi_printf("SUBFIND: subfind_distribute_groups() took %g sec\n", Logs.timediff(t0, t1));
132 }
133 
134 /* This function redistributes the particles in P[] and PS[] according to what is stored in
135  * PS[].TargetTask, and PS[].TargetIndex. NOTE: The associated SphP[] is not moved, i.e. the
136  * association is broken until the particles are moved back into the original order.
137  */
138 template <typename partset>
139 void fof<partset>::subfind_distribute_particles(MPI_Comm Communicator)
140 {
141  int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * NTask);
142  int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * NTask);
143  int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * NTask);
144  int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * NTask);
145 
146  int CommThisTask, CommNTask;
147  MPI_Comm_size(Communicator, &CommNTask);
148  MPI_Comm_rank(Communicator, &CommThisTask);
149 
150  for(int n = 0; n < CommNTask; n++)
151  Send_count[n] = 0;
152 
153  for(int n = 0; n < Tp->NumPart; n++)
154  {
155  int target = Tp->PS[n].TargetTask;
156 
157  if(target != CommThisTask)
158  {
159  if(target < 0 || target >= CommNTask)
160  Terminate("n=%d targettask=%d", n, target);
161 
162  Send_count[target]++;
163  }
164  }
165 
166  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
167 
168  int nimport = 0, nexport = 0;
169  Recv_offset[0] = 0, Send_offset[0] = 0;
170 
171  for(int j = 0; j < CommNTask; j++)
172  {
173  nexport += Send_count[j];
174  nimport += Recv_count[j];
175 
176  if(j > 0)
177  {
178  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
179  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
180  }
181  }
182 
183  /* for resize */
184  int load = (Tp->NumPart + (nimport - nexport)), max_load;
185  MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
186 
187  typename partset::pdata *partBuf =
188  (typename partset::pdata *)Mem.mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(typename partset::pdata));
189  subfind_data *subBuf = (subfind_data *)Mem.mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(subfind_data));
190 
191  for(int i = 0; i < CommNTask; i++)
192  Send_count[i] = 0;
193 
194  for(int n = 0; n < Tp->NumPart; n++)
195  {
196  int target = Tp->PS[n].TargetTask;
197 
198  if(target != CommThisTask)
199  {
200  partBuf[Send_offset[target] + Send_count[target]] = Tp->P[n];
201  subBuf[Send_offset[target] + Send_count[target]] = Tp->PS[n];
202 
203  Tp->P[n] = Tp->P[Tp->NumPart - 1];
204  Tp->PS[n] = Tp->PS[Tp->NumPart - 1];
205 
206  Send_count[target]++;
207  Tp->NumPart--;
208  n--;
209  }
210  }
211 
212  /* do resize */
213  if(max_load > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPart)
214  Tp->reallocate_memory_maxpart(max_load / (1.0 - 2 * ALLOC_TOLERANCE));
215 
216  Tp->PS = (subfind_data *)Mem.myrealloc_movable(Tp->PS, load * sizeof(subfind_data));
217 
218  for(int i = 0; i < CommNTask; i++)
219  Recv_offset[i] += Tp->NumPart;
220 
221 #ifdef ISEND_IRECV_IN_DOMAIN
222 
223  MPI_Request *requests = (MPI_Request *)Mem.mymalloc("requests", 8 * CommNTask * sizeof(MPI_Request));
224  int n_requests = 0;
225 
226  for(int ngrp = 1; ngrp < (1 << PTask); ngrp++)
227  {
228  int target = CommThisTask ^ ngrp;
229 
230  if(target < CommNTask)
231  {
232  if(Recv_count[target] > 0)
233  {
234  MPI_Irecv(Tp->P + Recv_offset[target], Recv_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA,
235  Communicator, &requests[n_requests++]);
236  MPI_Irecv(Tp->PS + Recv_offset[target], Recv_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY,
237  Communicator, &requests[n_requests++]);
238  }
239  }
240  }
241 
242  for(int ngrp = 1; ngrp < (1 << PTask); ngrp++)
243  {
244  int target = CommThisTask ^ ngrp;
245 
246  if(target < CommNTask)
247  {
248  if(Send_count[target] > 0)
249  {
250  MPI_Issend(partBuf + Send_offset[target], Send_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA,
251  Communicator, &requests[n_requests++]);
252  MPI_Issend(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY,
253  Communicator, &requests[n_requests++]);
254  }
255  }
256  }
257 
258  MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
259  Mem.myfree(requests);
260 
261 #else
262  for(int ngrp = 1; ngrp < (1 << PTask); ngrp++)
263  {
264  int target = CommThisTask ^ ngrp;
265 
266  if(target < CommNTask)
267  {
268  if(Send_count[target] > 0 || Recv_count[target] > 0)
269  {
270  MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA,
271  Tp->P + Recv_offset[target], Recv_count[target] * sizeof(particle_data), MPI_BYTE, target, TAG_PDATA,
272  Communicator, MPI_STATUS_IGNORE);
273 
274  MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY,
275  Tp->PS + Recv_offset[target], Recv_count[target] * sizeof(subfind_data), MPI_BYTE, target, TAG_KEY,
276  Communicator, MPI_STATUS_IGNORE);
277  }
278  }
279  }
280 #endif
281 
282  Tp->NumPart += nimport;
283  Mem.myfree_movable(subBuf);
284  Mem.myfree_movable(partBuf);
285 
286  /* finally, let's also address the desired local order according to PS[].TargetIndex */
287  FoFDomain->reorder_P_PS(0, Tp->NumPart);
288 
289  Mem.myfree(Recv_offset);
290  Mem.myfree(Recv_count);
291  Mem.myfree(Send_offset);
292  Mem.myfree(Send_count);
293 }
294 
295 /* now make sure that the following classes are really instantiated, otherwise we may get a linking problem */
296 #include "../data/simparticles.h"
297 template class fof<simparticles>;
298 
299 #if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
300 #include "../data/lcparticles.h"
301 template class fof<lcparticles>;
302 #endif
303 
304 #endif
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
#define ALLOC_TOLERANCE
Definition: constants.h:74
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:19
#define TAG_DENS_A
Definition: mpi_utils.h:50
#define TAG_PDATA
Definition: mpi_utils.h:27
#define TAG_KEY
Definition: mpi_utils.h:29
memory Mem
Definition: main.cc:44