GADGET-4
domain_exchange.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <mpi.h>
15 #include <algorithm>
16 #include <cmath>
17 #include <cstdio>
18 #include <cstdlib>
19 #include <cstring>
20 
21 #include "../data/allvars.h"
22 #include "../data/dtypes.h"
23 #include "../data/mymalloc.h"
24 #include "../domain/domain.h"
25 #include "../fof/fof.h"
26 #include "../logs/timer.h"
27 #include "../main/simulation.h"
28 #include "../mpi_utils/mpi_utils.h"
29 #include "../ngbtree/ngbtree.h"
30 #include "../sort/cxxsort.h"
31 #include "../system/system.h"
32 
36 template <typename partset>
37 void domain<partset>::domain_resize_storage(int count_get_total, int count_get_sph, int option_flag)
38 {
39  int max_load, load = count_get_total;
40  int max_sphload, sphload = count_get_sph;
41  MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
42  MPI_Allreduce(&sphload, &max_sphload, 1, MPI_INT, MPI_MAX, Communicator);
43 
44  if(max_load > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPart)
45  {
46  Tp->reallocate_memory_maxpart(max_load / (1.0 - 2 * ALLOC_TOLERANCE));
47 
48  if(option_flag == 1)
49  domain_key = (peanokey *)Mem.myrealloc_movable(domain_key, sizeof(peanokey) * Tp->MaxPart);
50  }
51 
52  if(max_sphload > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPartSph || max_sphload < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPartSph)
53  {
54  int maxpartsphNew = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE);
55  if(option_flag == 2)
56  {
57  Terminate("need to reactivate this");
58  }
59  Tp->reallocate_memory_maxpartsph(maxpartsphNew);
60  }
61 }
62 
67 template <typename partset>
68 void domain<partset>::domain_countToGo(int *toGoDM, int *toGoSph)
69 {
70  for(int n = 0; n < NTask; n++)
71  {
72  toGoDM[n] = toGoSph[n] = 0;
73  }
74 
75  for(int n = 0; n < Tp->NumPart; n++)
76  {
77  int no = n_to_no(n);
78 
79  if(Tp->P[n].getType() == 0)
80  toGoSph[TaskOfLeaf[no]]++;
81  else
82  toGoDM[TaskOfLeaf[no]]++;
83  }
84 }
85 
86 template <typename partset>
88 {
89 #ifdef SUBFIND
90  for(int i = 0; i < Tp->NumPart; i++)
91  {
92  int task = TaskOfLeaf[n_to_no(i)];
93 
94  Tp->PS[i].TargetTask = task;
95  Tp->PS[i].TargetIndex = 0; /* unimportant here */
96  }
97 #endif
98 }
99 
100 template <typename partset>
102 {
103  double t0 = Logs.second();
104 
105  int *toGoDM = (int *)Mem.mymalloc_movable(&toGoDM, "toGoDM", NTask * sizeof(int));
106  int *toGoSph = (int *)Mem.mymalloc_movable(&toGoSph, "toGoSph", NTask * sizeof(int));
107  int *toGetDM = (int *)Mem.mymalloc_movable(&toGetDM, "toGetDM", NTask * sizeof(int));
108  int *toGetSph = (int *)Mem.mymalloc_movable(&toGetSph, "toGetSph", NTask * sizeof(int));
109 
110  domain_countToGo(toGoDM, toGoSph);
111 
112  int *toGo = (int *)Mem.mymalloc("toGo", 2 * NTask * sizeof(int));
113  int *toGet = (int *)Mem.mymalloc("toGet", 2 * NTask * sizeof(int));
114 
115  for(int i = 0; i < NTask; ++i)
116  {
117  toGo[2 * i] = toGoDM[i];
118  toGo[2 * i + 1] = toGoSph[i];
119  }
120  MPI_Alltoall(toGo, 2, MPI_INT, toGet, 2, MPI_INT, Communicator);
121  for(int i = 0; i < NTask; ++i)
122  {
123  toGetDM[i] = toGet[2 * i];
124  toGetSph[i] = toGet[2 * i + 1];
125  }
126  Mem.myfree(toGet);
127  Mem.myfree(toGo);
128 
129  int count_togo_dm = 0, count_togo_sph = 0, count_get_dm = 0, count_get_sph = 0;
130  for(int i = 0; i < NTask; i++)
131  {
132  count_togo_dm += toGoDM[i];
133  count_togo_sph += toGoSph[i];
134  count_get_dm += toGetDM[i];
135  count_get_sph += toGetSph[i];
136  }
137 
138  long long sumtogo = count_togo_dm;
139  sumup_longs(1, &sumtogo, &sumtogo, Communicator);
140 
141  domain_printf("DOMAIN: exchange of %lld particles\n", sumtogo);
142 
143  if(Tp->NumPart != count_togo_dm + count_togo_sph)
144  Terminate("NumPart != count_togo");
145 
146  int *send_sph_offset = (int *)Mem.mymalloc_movable(&send_sph_offset, "send_sph_offset", NTask * sizeof(int));
147  int *send_dm_offset = (int *)Mem.mymalloc_movable(&send_dm_offset, "send_dm_offset", NTask * sizeof(int));
148  int *recv_sph_offset = (int *)Mem.mymalloc_movable(&recv_sph_offset, "recv_sph_offset", NTask * sizeof(int));
149  int *recv_dm_offset = (int *)Mem.mymalloc_movable(&recv_dm_offset, "recv_dm_offset", NTask * sizeof(int));
150 
151  send_sph_offset[0] = send_dm_offset[0] = recv_sph_offset[0] = recv_dm_offset[0] = 0;
152  for(int i = 1; i < NTask; i++)
153  {
154  send_sph_offset[i] = send_sph_offset[i - 1] + toGoSph[i - 1];
155  send_dm_offset[i] = send_dm_offset[i - 1] + toGoDM[i - 1];
156 
157  recv_sph_offset[i] = recv_sph_offset[i - 1] + toGetSph[i - 1];
158  recv_dm_offset[i] = recv_dm_offset[i - 1] + toGetDM[i - 1];
159  }
160 
161  for(int i = 0; i < NTask; i++)
162  {
163  send_dm_offset[i] += count_togo_sph;
164  recv_dm_offset[i] += count_get_sph;
165  }
166 
167  pdata *partBuf =
168  (typename partset::pdata *)Mem.mymalloc_movable_clear(&partBuf, "partBuf", (count_togo_dm + count_togo_sph) * sizeof(pdata));
169  sph_particle_data *sphBuf =
170  (sph_particle_data *)Mem.mymalloc_movable_clear(&sphBuf, "sphBuf", count_togo_sph * sizeof(sph_particle_data));
171  peanokey *keyBuf = (peanokey *)Mem.mymalloc_movable_clear(&keyBuf, "keyBuf", (count_togo_dm + count_togo_sph) * sizeof(peanokey));
172 
173  for(int i = 0; i < NTask; i++)
174  toGoSph[i] = toGoDM[i] = 0;
175 
176  for(int n = 0; n < Tp->NumPart; n++)
177  {
178  int off, num;
179  int task = TaskOfLeaf[n_to_no(n)];
180 
181  if(Tp->P[n].getType() == 0)
182  {
183  num = toGoSph[task]++;
184 
185  off = send_sph_offset[task] + num;
186  sphBuf[off] = Tp->SphP[n];
187  }
188  else
189  {
190  num = toGoDM[task]++;
191 
192  off = send_dm_offset[task] + num;
193  }
194 
195  partBuf[off] = Tp->P[n];
196  keyBuf[off] = domain_key[n];
197  }
198 
199  /**** now resize the storage for the P[] and SphP[] arrays if needed ****/
200  domain_resize_storage(count_get_dm + count_get_sph, count_get_sph, 1);
201 
202  /***** space has been created, now we can do the actual exchange *****/
203 
204  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
205  * transfer the data in chunks.
206  */
207 
208  int flag_big = 0, flag_big_all;
209  for(int i = 0; i < NTask; i++)
210  {
211  if(toGoSph[i] * sizeof(sph_particle_data) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
212  flag_big = 1;
213 
214  if(std::max<int>(toGoSph[i], toGoDM[i]) * sizeof(typename partset::pdata) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
215  flag_big = 1;
216  }
217 
218  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
219 
220 #if 1
221 #ifdef USE_MPIALLTOALLV_IN_DOMAINDECOMP
222  int method = 0;
223 #else
224 #ifndef ISEND_IRECV_IN_DOMAIN /* synchronous communication */
225  int method = 1;
226 #else
227  int method = 2; /* asynchronous communication */
228 #endif
229 #endif
230  MPI_Datatype tp;
231  MPI_Type_contiguous(sizeof(typename partset::pdata), MPI_CHAR, &tp);
232  MPI_Type_commit(&tp);
233  myMPI_Alltoallv_new(partBuf, toGoSph, send_sph_offset, tp, Tp->P, toGetSph, recv_sph_offset, tp, Communicator, method);
234  myMPI_Alltoallv_new(partBuf, toGoDM, send_dm_offset, tp, Tp->P, toGetDM, recv_dm_offset, tp, Communicator, method);
235  MPI_Type_free(&tp);
236  MPI_Type_contiguous(sizeof(sph_particle_data), MPI_CHAR, &tp);
237  MPI_Type_commit(&tp);
238  myMPI_Alltoallv_new(sphBuf, toGoSph, send_sph_offset, tp, Tp->SphP, toGetSph, recv_sph_offset, tp, Communicator, method);
239  MPI_Type_free(&tp);
240  MPI_Type_contiguous(sizeof(peanokey), MPI_CHAR, &tp);
241  MPI_Type_commit(&tp);
242  myMPI_Alltoallv_new(keyBuf, toGoSph, send_sph_offset, tp, domain_key, toGetSph, recv_sph_offset, tp, Communicator, method);
243  myMPI_Alltoallv_new(keyBuf, toGoDM, send_dm_offset, tp, domain_key, toGetDM, recv_dm_offset, tp, Communicator, method);
244  MPI_Type_free(&tp);
245 #else
246  my_int_MPI_Alltoallv(partBuf, toGoSph, send_sph_offset, P, toGetSph, recv_sph_offset, sizeof(pdata), flag_big_all, Communicator);
247 
248  my_int_MPI_Alltoallv(sphBuf, toGoSph, send_sph_offset, SphP, toGetSph, recv_sph_offset, sizeof(sph_particle_data), flag_big_all,
249  Communicator);
250 
251  my_int_MPI_Alltoallv(keyBuf, toGoSph, send_sph_offset, domain_key, toGetSph, recv_sph_offset, sizeof(peanokey), flag_big_all,
252  Communicator);
253 
254  my_int_MPI_Alltoallv(partBuf, toGoDM, send_dm_offset, P, toGetDM, recv_dm_offset, sizeof(pdata), flag_big_all, Communicator);
255 
256  my_int_MPI_Alltoallv(keyBuf, toGoDM, send_dm_offset, domain_key, toGetDM, recv_dm_offset, sizeof(peanokey), flag_big_all,
257  Communicator);
258 #endif
259 
260  Tp->NumPart = count_get_dm + count_get_sph;
261  Tp->NumGas = count_get_sph;
262 
263  Mem.myfree(keyBuf);
264  Mem.myfree(sphBuf);
265  Mem.myfree(partBuf);
266 
267  Mem.myfree(recv_dm_offset);
268  Mem.myfree(recv_sph_offset);
269  Mem.myfree(send_dm_offset);
270  Mem.myfree(send_sph_offset);
271 
272  Mem.myfree(toGetSph);
273  Mem.myfree(toGetDM);
274  Mem.myfree(toGoSph);
275  Mem.myfree(toGoDM);
276 
277  double t1 = Logs.second();
278 
279  domain_printf("DOMAIN: particle exchange done. (took %g sec)\n", Logs.timediff(t0, t1));
280 }
281 
282 template <typename partset>
284 {
285  mpi_printf("PEANO: Begin Peano-Hilbert order...\n");
286  double t0 = Logs.second();
287 
288  if(Tp->NumGas)
289  {
290  peano_hilbert_data *pmp = (peano_hilbert_data *)Mem.mymalloc("pmp", sizeof(peano_hilbert_data) * Tp->NumGas);
291  int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * Tp->NumGas);
292 
293  for(int i = 0; i < Tp->NumGas; i++)
294  {
295  pmp[i].index = i;
296  pmp[i].key = key[i];
297  }
298 
299  mycxxsort(pmp, pmp + Tp->NumGas, compare_peano_hilbert_data);
300 
301  for(int i = 0; i < Tp->NumGas; i++)
302  Id[pmp[i].index] = i;
303 
304  reorder_gas(Id);
305 
306  Mem.myfree(Id);
307  Mem.myfree(pmp);
308  }
309 
310  if(Tp->NumPart - Tp->NumGas > 0)
311  {
312  peano_hilbert_data *pmp = (peano_hilbert_data *)Mem.mymalloc("pmp", sizeof(peano_hilbert_data) * (Tp->NumPart - Tp->NumGas));
313  int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * (Tp->NumPart - Tp->NumGas));
314 
315  for(int i = Tp->NumGas; i < Tp->NumPart; i++)
316  {
317  pmp[i - Tp->NumGas].index = i;
318  pmp[i - Tp->NumGas].key = key[i];
319  }
320 
321  mycxxsort(pmp, pmp + Tp->NumPart - Tp->NumGas, compare_peano_hilbert_data);
322 
323  for(int i = Tp->NumGas; i < Tp->NumPart; i++)
324  Id[pmp[i - Tp->NumGas].index - Tp->NumGas] = i;
325 
326  reorder_particles(Id - Tp->NumGas, Tp->NumGas, Tp->NumPart);
327 
328  Mem.myfree(Id);
329  Mem.myfree(pmp);
330  }
331 
332  mpi_printf("PEANO: done, took %g sec.\n", Logs.timediff(t0, Logs.second()));
333 }
334 
335 template <typename partset>
337 {
338  for(int i = 0; i < Tp->NumGas; i++)
339  {
340  if(Id[i] != i)
341  {
342  pdata Psource = Tp->P[i];
343  sph_particle_data SphPsource = Tp->SphP[i];
344 
345  int idsource = Id[i];
346  int dest = Id[i];
347 
348  do
349  {
350  pdata Psave = Tp->P[dest];
351  sph_particle_data SphPsave = Tp->SphP[dest];
352  int idsave = Id[dest];
353 
354  Tp->P[dest] = Psource;
355  Tp->SphP[dest] = SphPsource;
356  Id[dest] = idsource;
357 
358  if(dest == i)
359  break;
360 
361  Psource = Psave;
362  SphPsource = SphPsave;
363  idsource = idsave;
364 
365  dest = idsource;
366  }
367  while(1);
368  }
369  }
370 }
371 
372 template <typename partset>
373 void domain<partset>::reorder_particles(int *Id, int Nstart, int N)
374 {
375  for(int i = Nstart; i < N; i++)
376  {
377  if(Id[i] != i)
378  {
379  pdata Psource = Tp->P[i];
380  int idsource = Id[i];
381 
382  int dest = Id[i];
383 
384  do
385  {
386  pdata Psave = Tp->P[dest];
387  int idsave = Id[dest];
388 
389  Tp->P[dest] = Psource;
390  Id[dest] = idsource;
391 
392  if(dest == i)
393  break;
394 
395  Psource = Psave;
396  idsource = idsave;
397 
398  dest = idsource;
399  }
400  while(1);
401  }
402  }
403 }
404 
405 template <typename partset>
406 void domain<partset>::reorder_PS(int *Id, int Nstart, int N)
407 {
408  for(int i = Nstart; i < N; i++)
409  {
410  if(Id[i] != i)
411  {
412  subfind_data PSsource = Tp->PS[i];
413 
414  int idsource = Id[i];
415  int dest = Id[i];
416 
417  do
418  {
419  subfind_data PSsave = Tp->PS[dest];
420  int idsave = Id[dest];
421 
422  Tp->PS[dest] = PSsource;
423  Id[dest] = idsource;
424 
425  if(dest == i)
426  break;
427 
428  PSsource = PSsave;
429  idsource = idsave;
430 
431  dest = idsource;
432  }
433  while(1);
434  }
435  }
436 }
437 
438 template <typename partset>
440 {
441  for(int i = 0; i < Tp->NumPart; i++)
442  {
443  if(Id[i] != i)
444  {
445  pdata Psource = Tp->P[i];
446  subfind_data PSsource = Tp->PS[i];
447 
448  int idsource = Id[i];
449  int dest = Id[i];
450 
451  do
452  {
453  pdata Psave = Tp->P[dest];
454  subfind_data PSsave = Tp->PS[dest];
455  int idsave = Id[dest];
456 
457  Tp->P[dest] = Psource;
458  Tp->PS[dest] = PSsource;
459  Id[dest] = idsource;
460 
461  if(dest == i)
462  break;
463 
464  Psource = Psave;
465  PSsource = PSsave;
466  idsource = idsave;
467 
468  dest = idsource;
469  }
470  while(1);
471  }
472  }
473 }
474 
475 template <typename partset>
476 void domain<partset>::reorder_P_PS(int loc_numgas, int loc_numpart)
477 {
478  local_sort_data *mp = (local_sort_data *)Mem.mymalloc("mp", sizeof(local_sort_data) * (loc_numpart - loc_numgas));
479  mp -= loc_numgas;
480 
481  int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * (loc_numpart - loc_numgas));
482  Id -= loc_numgas;
483 
484  for(int i = loc_numgas; i < loc_numpart; i++)
485  {
486  mp[i].index = i;
487  mp[i].targetindex = Tp->PS[i].TargetIndex;
488  }
489 
490  mycxxsort(mp + loc_numgas, mp + loc_numpart, compare_local_sort_data_targetindex);
491 
492  for(int i = loc_numgas; i < loc_numpart; i++)
493  Id[mp[i].index] = i;
494 
495  reorder_particles(Id, loc_numgas, loc_numpart);
496 
497  for(int i = loc_numgas; i < loc_numpart; i++)
498  Id[mp[i].index] = i;
499 
500  reorder_PS(Id, loc_numgas, loc_numpart);
501 
502  Id += loc_numgas;
503  Mem.myfree(Id);
504  mp += loc_numgas;
505  Mem.myfree(mp);
506 }
507 
508 /* This function redistributes the particles according to what is stored in
509  * PS[].TargetTask, and PS[].TargetIndex.
510  */
511 template <typename partset>
513 {
514  int CommThisTask, CommNTask, CommPTask;
515  MPI_Comm_size(Communicator, &CommNTask);
516  MPI_Comm_rank(Communicator, &CommThisTask);
517 
518  for(CommPTask = 0; CommNTask > (1 << CommPTask); CommPTask++)
519  ;
520 
521  int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * CommNTask);
522  int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * CommNTask);
523  int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * CommNTask);
524  int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * CommNTask);
525  int nimport = 0, nexport = 0, nstay = 0, nlocal = 0;
526 
527  /* for type_select == 0, we process gas particles, otherwise all other particles */
528  for(int type_select = 0; type_select < 2; type_select++)
529  {
530  /* In order to be able to later distribute the PS[] array, we need to temporarily save the particle type array,
531  * and save the old particle number
532  */
533 
534  unsigned char *Ptype = (unsigned char *)Mem.mymalloc_movable(&Ptype, "Ptype", sizeof(unsigned char) * Tp->NumPart);
535  int *Ptask = (int *)Mem.mymalloc_movable(&Ptask, "Ptask", sizeof(int) * Tp->NumPart);
536 
537  for(int i = 0; i < Tp->NumPart; i++)
538  {
539  Ptype[i] = Tp->P[i].getType();
540  Ptask[i] = Tp->PS[i].TargetTask;
541 
542  if(Ptype[i] == 0 && i >= Tp->NumGas)
543  Terminate("Bummer1");
544 
545  if(Ptype[i] != 0 && i < Tp->NumGas)
546  Terminate("Bummer2");
547  }
548 
549  int NumPart_saved = Tp->NumPart;
550 
551  /* distribute gas particles up front */
552  if(type_select == 0)
553  {
554  sph_particle_data *sphBuf = NULL;
555 
556  for(int rep = 0; rep < 2; rep++)
557  {
558  for(int n = 0; n < CommNTask; n++)
559  Send_count[n] = 0;
560 
561  nstay = 0;
562 
563  for(int n = 0; n < Tp->NumGas; n++)
564  {
565  int target = Ptask[n];
566 
567  if(rep == 0)
568  {
569  if(target != CommThisTask)
570  Send_count[target]++;
571  else
572  nstay++;
573  }
574  else
575  {
576  if(target != CommThisTask)
577  sphBuf[Send_offset[target] + Send_count[target]++] = Tp->SphP[n];
578  else
579  Tp->SphP[nstay++] = Tp->SphP[n];
580  }
581  }
582 
583  if(rep == 0)
584  {
585  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
586 
587  nimport = 0, nexport = 0;
588  Recv_offset[0] = Send_offset[0] = 0;
589  for(int j = 0; j < CommNTask; j++)
590  {
591  nexport += Send_count[j];
592  nimport += Recv_count[j];
593 
594  if(j > 0)
595  {
596  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
597  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
598  }
599  }
600 
601  sphBuf = (sph_particle_data *)Mem.mymalloc_movable(&sphBuf, "sphBuf", nexport * sizeof(sph_particle_data));
602  }
603  else
604  {
605  Tp->NumGas += (nimport - nexport);
606 
607  int max_loadsph = Tp->NumGas;
608  MPI_Allreduce(MPI_IN_PLACE, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
609 
610  if(max_loadsph > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPartSph ||
611  max_loadsph < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPartSph)
612  Tp->reallocate_memory_maxpartsph(max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE));
613 
614  for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
615  {
616  int target = CommThisTask ^ ngrp;
617 
618  if(target < CommNTask)
619  {
620  if(Send_count[target] > 0 || Recv_count[target] > 0)
621  {
622  MPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(sph_particle_data), MPI_BYTE,
623  target, TAG_SPHDATA, Tp->SphP + Recv_offset[target] + nstay,
624  Recv_count[target] * sizeof(sph_particle_data), MPI_BYTE, target, TAG_SPHDATA, Communicator,
625  MPI_STATUS_IGNORE);
626  }
627  }
628  }
629 
630  Mem.myfree(sphBuf);
631  }
632  }
633  }
634 
635  pdata *partBuf = NULL;
636 
637  for(int rep = 0; rep < 2; rep++)
638  {
639  for(int n = 0; n < CommNTask; n++)
640  Send_count[n] = 0;
641 
642  nstay = 0;
643  nlocal = 0;
644 
645  for(int n = 0; n < NumPart_saved; n++)
646  {
647  if(Ptype[n] == type_select || (type_select != 0))
648  {
649  int target = Ptask[n];
650 
651  if(rep == 0)
652  {
653  if(target != CommThisTask)
654  Send_count[target]++;
655  else
656  {
657  nstay++;
658  nlocal++;
659  }
660  }
661  else
662  {
663  if(target != CommThisTask)
664  partBuf[Send_offset[target] + Send_count[target]++] = Tp->P[n];
665  else
666  {
667  Tp->P[nstay++] = Tp->P[n];
668  nlocal++;
669  }
670  }
671  }
672  else
673  {
674  // this is only relevant for type_select == 0
675  if(rep == 0)
676  nstay++;
677  else
678  Tp->P[nstay++] = Tp->P[n];
679  }
680  }
681 
682  if(rep == 0)
683  {
684  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
685 
686  nimport = 0, nexport = 0;
687  Recv_offset[0] = Send_offset[0] = 0;
688  for(int j = 0; j < CommNTask; j++)
689  {
690  nexport += Send_count[j];
691  nimport += Recv_count[j];
692 
693  if(j > 0)
694  {
695  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
696  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
697  }
698  }
699 
700  partBuf = (pdata *)Mem.mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(pdata));
701  }
702  else
703  {
704  Tp->NumPart += (nimport - nexport);
705 
706  int max_load = Tp->NumPart;
707  MPI_Allreduce(MPI_IN_PLACE, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
708 
709  if(max_load > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPart)
710  Tp->reallocate_memory_maxpart(max_load / (1.0 - 2 * ALLOC_TOLERANCE));
711 
712  if(type_select == 0)
713  {
714  // create a gap to place the incoming particles at the end of the already present gas particles
715  memmove(static_cast<void *>(Tp->P + nlocal + nimport), static_cast<void *>(Tp->P + nlocal),
716  (nstay - nlocal) * sizeof(pdata));
717  }
718 
719  for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
720  {
721  int target = CommThisTask ^ ngrp;
722 
723  if(target < CommNTask)
724  {
725  if(Send_count[target] > 0 || Recv_count[target] > 0)
726  {
727  MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(pdata), MPI_BYTE, target, TAG_PDATA,
728  Tp->P + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(pdata), MPI_BYTE, target,
729  TAG_PDATA, Communicator, MPI_STATUS_IGNORE);
730  }
731  }
732  }
733 
734  Mem.myfree(partBuf);
735  }
736  }
737 
738  /* now deal with subfind data */
739 
740  subfind_data *subBuf = NULL;
741  for(int rep = 0; rep < 2; rep++)
742  {
743  for(int n = 0; n < CommNTask; n++)
744  Send_count[n] = 0;
745 
746  nstay = 0;
747  nlocal = 0;
748 
749  for(int n = 0; n < NumPart_saved; n++)
750  {
751  if(Ptype[n] == type_select || (type_select != 0))
752  {
753  int target = Ptask[n];
754 
755  if(rep == 0)
756  {
757  if(target != CommThisTask)
758  Send_count[target]++;
759  else
760  {
761  nstay++;
762  nlocal++;
763  }
764  }
765  else
766  {
767  if(target != CommThisTask)
768  subBuf[Send_offset[target] + Send_count[target]++] = Tp->PS[n];
769  else
770  {
771  Tp->PS[nstay++] = Tp->PS[n];
772  nlocal++;
773  }
774  }
775  }
776  else
777  {
778  // this is only relevant for type_select == 0
779  if(rep == 0)
780  nstay++;
781  else
782  Tp->PS[nstay++] = Tp->PS[n];
783  }
784  }
785 
786  if(rep == 0)
787  {
788  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
789 
790  nimport = 0, nexport = 0;
791  Recv_offset[0] = Send_offset[0] = 0;
792  for(int j = 0; j < CommNTask; j++)
793  {
794  nexport += Send_count[j];
795  nimport += Recv_count[j];
796 
797  if(j > 0)
798  {
799  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
800  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
801  }
802  }
803 
804  subBuf = (subfind_data *)Mem.mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(subfind_data));
805  }
806  else
807  {
808  /* reallocate with new particle number */
809  Tp->PS = (subfind_data *)Mem.myrealloc_movable(Tp->PS, Tp->NumPart * sizeof(subfind_data));
810 
811  if(type_select == 0)
812  {
813  // create a gap to place the incoming particles at the end of the already present gas particles
814  memmove(Tp->PS + nlocal + nimport, Tp->PS + nlocal, (nstay - nlocal) * sizeof(subfind_data));
815  }
816 
817  for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
818  {
819  int target = CommThisTask ^ ngrp;
820 
821  if(target < CommNTask)
822  {
823  if(Send_count[target] > 0 || Recv_count[target] > 0)
824  {
825  MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target,
826  TAG_KEY, Tp->PS + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(subfind_data),
827  MPI_BYTE, target, TAG_KEY, Communicator, MPI_STATUS_IGNORE);
828  }
829  }
830  }
831 
832  Mem.myfree(subBuf);
833  }
834  }
835 
836  Mem.myfree(Ptask);
837  Mem.myfree(Ptype);
838  }
839 
840  Mem.myfree(Recv_offset);
841  Mem.myfree(Recv_count);
842  Mem.myfree(Send_offset);
843  Mem.myfree(Send_count);
844 
845  /* finally, let's also address the desired local order according to PS[].TargetIndex */
846 
847  if(Tp->NumGas)
848  {
849  local_sort_data *mp = (local_sort_data *)Mem.mymalloc("mp", sizeof(local_sort_data) * Tp->NumGas);
850  int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * Tp->NumGas);
851 
852  for(int i = 0; i < Tp->NumGas; i++)
853  {
854  mp[i].index = i;
855  mp[i].targetindex = Tp->PS[i].TargetIndex;
856  }
857 
858  mycxxsort(mp, mp + Tp->NumGas, compare_local_sort_data_targetindex);
859 
860  for(int i = 0; i < Tp->NumGas; i++)
861  Id[mp[i].index] = i;
862 
863  reorder_gas(Id);
864 
865  for(int i = 0; i < Tp->NumGas; i++)
866  Id[mp[i].index] = i;
867 
868  reorder_PS(Id, 0, Tp->NumGas);
869 
870  Mem.myfree(Id);
871  Mem.myfree(mp);
872  }
873 
874  if(Tp->NumPart - Tp->NumGas > 0)
875  {
876  reorder_P_PS(Tp->NumGas, Tp->NumPart);
877  }
878 }
879 
880 #include "../data/simparticles.h"
881 template class domain<simparticles>;
882 
883 #ifdef LIGHTCONE_PARTICLES
884 #include "../data/lcparticles.h"
885 template class domain<lcparticles>;
886 #endif
Definition: domain.h:31
void reorder_gas(int *Id)
void reorder_PS(int *Id, int Nstart, int N)
void reorder_particles(int *Id, int Nstart, int N)
void particle_exchange_based_on_PS(MPI_Comm Communicator)
partset::pdata pdata
Definition: domain.h:47
void reorder_P_and_PS(int *Id)
void reorder_P_PS(int NumGas, int NumPart)
void domain_resize_storage(int count_get, int count_get_sph, int option_flag)
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
#define ALLOC_TOLERANCE
Definition: constants.h:74
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
Definition: constants.h:53
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:19
#define TAG_SPHDATA
Definition: mpi_utils.h:28
void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:235
#define TAG_PDATA
Definition: mpi_utils.h:27
void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, int method)
Definition: myalltoall.cc:74
#define TAG_KEY
Definition: mpi_utils.h:29
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44