GADGET-4
mpi_utils.h
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
11 #ifndef MPI_UTILS_H
12 #define MPI_UTILS_H
13 
14 #include <mpi.h>
15 #include "../data/dtypes.h"
16 #include "../data/mymalloc.h"
17 
19 #define TAG_TOPNODE_FREE 4
20 #define TAG_TOPNODE_OFFSET 5
21 #define TAG_TOPNODE_ALLOC 6
22 #define TAG_EWALD_ALLOC 7
23 #define TAG_TABLE_ALLOC 8
24 #define TAG_TABLE_FREE 9
25 #define TAG_N 10
26 #define TAG_HEADER 11
27 #define TAG_PDATA 12
28 #define TAG_SPHDATA 13
29 #define TAG_KEY 14
30 #define TAG_DMOM 15
31 #define TAG_NODELEN 16
32 #define TAG_HMAX 17
33 #define TAG_GRAV_A 18
34 #define TAG_GRAV_B 19
35 #define TAG_DIRECT_A 20
36 #define TAG_DIRECT_B 21
37 #define TAG_HYDRO_A 22
38 #define TAG_HYDRO_B 23
39 #define TAG_NFORTHISTASK 24
40 #define TAG_PERIODIC_A 25
41 #define TAG_PERIODIC_B 26
42 #define TAG_PERIODIC_C 27
43 #define TAG_PERIODIC_D 28
44 #define TAG_NONPERIOD_A 29
45 #define TAG_NONPERIOD_B 30
46 #define TAG_NONPERIOD_C 31
47 #define TAG_NONPERIOD_D 32
48 #define TAG_POTENTIAL_A 33
49 #define TAG_POTENTIAL_B 34
50 #define TAG_DENS_A 35
51 #define TAG_DENS_B 36
52 #define TAG_LOCALN 37
53 #define TAG_BH_A 38
54 #define TAG_BH_B 39
55 #define TAG_SMOOTH_A 40
56 #define TAG_SMOOTH_B 41
57 #define TAG_ENRICH_A 42
58 #define TAG_CONDUCT_A 43
59 #define TAG_CONDUCT_B 44
60 #define TAG_FOF_A 45
61 #define TAG_FOF_B 46
62 #define TAG_FOF_C 47
63 #define TAG_FOF_D 48
64 #define TAG_FOF_E 49
65 #define TAG_FOF_F 50
66 #define TAG_FOF_G 51
67 #define TAG_HOTNGB_A 52
68 #define TAG_HOTNGB_B 53
69 #define TAG_GRAD_A 54
70 #define TAG_GRAD_B 55
71 
72 #define TAG_SE 56
73 
74 #define TAG_SEARCH_A 58
75 #define TAG_SEARCH_B 59
76 
77 #define TAG_INJECT_A 61
78 
79 #define TAG_PDATA_SPH 70
80 #define TAG_KEY_SPH 71
81 
82 #define TAG_PDATA_STAR 72
83 #define TAG_STARDATA 73
84 #define TAG_KEY_STAR 74
85 
86 #define TAG_PDATA_BH 75
87 #define TAG_BHDATA 76
88 #define TAG_KEY_BH 77
89 
90 #define TAG_GRAVCOST_A 79
91 #define TAG_GRAVCOST_B 80
92 
93 #define TAG_PM_FOLD 81
94 
95 #define TAG_BARRIER 82
96 #define TAG_PART_DATA 83
97 #define TAG_NODE_DATA 84
98 #define TAG_RESULTS 85
99 #define TAG_DRIFT_INIT 86
100 #define TAG_ALL_UPDATE 87
101 #define TAG_METDATA 500
102 #define TAG_FETCH_GRAVTREE 1000
103 #define TAG_FETCH_SPH_DENSITY 2000
104 #define TAG_FETCH_SPH_HYDRO 3000
105 #define TAG_FETCH_SPH_TREETIMESTEP 4000
106 
107 void my_mpi_types_init(void);
108 
109 int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount,
110  MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status);
111 
112 int myMPI_Alltoallv_new_prep(int *sendcnt, int *recvcnt, int *rdispls, MPI_Comm comm, int method);
113 
114 void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls,
115  MPI_Datatype recvtype, MPI_Comm comm, int method);
116 
117 void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len,
118  int big_flag, MPI_Comm comm);
119 
120 void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len,
121  int big_flag, MPI_Comm comm);
122 
123 int myMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
124 
125 int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs,
126  MPI_Datatype recvtype, MPI_Comm comm);
127 
128 void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm comm);
129 
130 void minimum_large_ints(int n, long long *src, long long *res, MPI_Comm comm);
131 void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm);
132 void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm);
133 
134 extern MPI_Datatype MPI_MyIntPosType;
135 
136 extern MPI_Op MPI_MIN_MyIntPosType;
137 extern MPI_Op MPI_MAX_MyIntPosType;
138 extern MPI_Op MPI_MIN_MySignedIntPosType;
139 extern MPI_Op MPI_MAX_MySignedIntPosType;
140 
141 template <typename T>
142 void allreduce_sum(T *glob, int N, MPI_Comm Communicator)
143 {
144  int ntask, thistask, ptask;
145  MPI_Comm_size(Communicator, &ntask);
146  MPI_Comm_rank(Communicator, &thistask);
147 
148  for(ptask = 0; ntask > (1 << ptask); ptask++)
149  ;
150 
151  // we are responsible for a certain stretch of the result, namely the one starting at loc_first_n, of length blocksize[thistask]
152 
153  int *blocksize = (int *)Mem.mymalloc("blocksize", sizeof(int) * ntask);
154  int *blockstart = (int *)Mem.mymalloc("blockstart", sizeof(int) * ntask);
155 
156  int blk = N / ntask;
157  int rmd = N - blk * ntask; /* remainder */
158  int pivot_n = rmd * (blk + 1);
159 
160  int loc_first_n = 0;
161  blockstart[0] = 0;
162 
163  for(int task = 0; task < ntask; task++)
164  {
165  if(task < rmd)
166  blocksize[task] = blk + 1;
167  else
168  blocksize[task] = blk;
169 
170  if(task < thistask)
171  loc_first_n += blocksize[task];
172 
173  if(task > 0)
174  blockstart[task] = blockstart[task - 1] + blocksize[task - 1];
175  }
176 
177  /* here we store the local result */
178  T *loc_data = (T *)Mem.mymalloc_clear("loc_data", blocksize[thistask] * sizeof(T));
179 
180  int *send_count = (int *)Mem.mymalloc("send_count", sizeof(int) * ntask);
181  int *recv_count = (int *)Mem.mymalloc("recv_count", sizeof(int) * ntask);
182 
183  int *send_offset = (int *)Mem.mymalloc("send_offset", sizeof(int) * ntask);
184 
185  struct ind_data
186  {
187  int n;
188  T val;
189  };
190 
191  ind_data *export_data = NULL;
192  int nexport = 0;
193 
194  for(int rep = 0; rep < 2; rep++)
195  {
196  for(int j = 0; j < ntask; j++)
197  send_count[j] = 0;
198 
199  /* find for each non-zero element the processor where it should go for being summed */
200  for(int n = 0; n < N; n++)
201  {
202  if(glob[n] != 0)
203  {
204  int task;
205  if(n < pivot_n)
206  task = n / (blk + 1);
207  else
208  task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */
209 
210  if(rep == 0)
211  send_count[task]++;
212  else
213  {
214  int index = send_offset[task] + send_count[task]++;
215  export_data[index].n = n;
216  export_data[index].val = glob[n];
217  }
218  }
219  }
220 
221  if(rep == 0)
222  {
223  MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator);
224 
225  send_offset[0] = 0;
226 
227  for(int j = 0; j < ntask; j++)
228  {
229  nexport += send_count[j];
230 
231  if(j > 0)
232  send_offset[j] = send_offset[j - 1] + send_count[j - 1];
233  }
234 
235  export_data = (ind_data *)Mem.mymalloc("export_data", nexport * sizeof(ind_data));
236  }
237  else
238  {
239  for(int ngrp = 0; ngrp < (1 << ptask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
240  {
241  int recvTask = thistask ^ ngrp;
242  if(recvTask < ntask)
243  if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
244  {
245  int nimport = recv_count[recvTask];
246 
247  ind_data *import_data = (ind_data *)Mem.mymalloc("import_data", nimport * sizeof(ind_data));
248 
249  MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask,
250  TAG_DENS_B, import_data, recv_count[recvTask] * sizeof(ind_data), MPI_BYTE, recvTask, TAG_DENS_B,
251  Communicator, MPI_STATUS_IGNORE);
252 
253  for(int i = 0; i < nimport; i++)
254  {
255  int j = import_data[i].n - loc_first_n;
256 
257  if(j < 0 || j >= blocksize[thistask])
258  Terminate("j=%d < 0 || j>= blocksize[thistask]=%d", j, blocksize[thistask]);
259 
260  loc_data[j] += import_data[i].val;
261  }
262 
263  Mem.myfree(import_data);
264  }
265  }
266 
267  Mem.myfree(export_data);
268  }
269  }
270 
271  Mem.myfree(send_offset);
272  Mem.myfree(recv_count);
273  Mem.myfree(send_count);
274 
275  /* now share the result across all processors */
276  for(int ngrp = 0; ngrp < (1 << ptask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
277  {
278  int recvTask = thistask ^ ngrp;
279  if(recvTask < ntask)
280  if(blocksize[thistask] > 0 || blocksize[recvTask] > 0)
281  MPI_Sendrecv(loc_data, blocksize[thistask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, &glob[blockstart[recvTask]],
282  blocksize[recvTask] * sizeof(T), MPI_BYTE, recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
283  }
284 
285  Mem.myfree(loc_data);
286  Mem.myfree(blockstart);
287  Mem.myfree(blocksize);
288 }
289 
290 #endif
#define Terminate(...)
Definition: macros.h:19
MPI_Datatype MPI_MyIntPosType
Definition: mpi_vars.cc:24
#define TAG_DENS_A
Definition: mpi_utils.h:50
void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:235
MPI_Op MPI_MAX_MySignedIntPosType
Definition: mpi_vars.cc:29
int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
MPI_Op MPI_MIN_MySignedIntPosType
Definition: mpi_vars.cc:28
void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm comm)
void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, int method)
Definition: myalltoall.cc:74
void my_mpi_types_init(void)
Definition: mpi_types.cc:72
void allreduce_sum(T *glob, int N, MPI_Comm Communicator)
Definition: mpi_utils.h:142
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoallv_new_prep(int *sendcnt, int *recvcnt, int *rdispls, MPI_Comm comm, int method)
Definition: myalltoall.cc:36
MPI_Op MPI_MAX_MyIntPosType
Definition: mpi_vars.cc:27
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
int myMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
void minimum_large_ints(int n, long long *src, long long *res, MPI_Comm comm)
MPI_Op MPI_MIN_MyIntPosType
Definition: mpi_vars.cc:26
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:177
#define TAG_DENS_B
Definition: mpi_utils.h:51
memory Mem
Definition: main.cc:44