15 #include "../data/dtypes.h"
16 #include "../data/mymalloc.h"
19 #define TAG_TOPNODE_FREE 4
20 #define TAG_TOPNODE_OFFSET 5
21 #define TAG_TOPNODE_ALLOC 6
22 #define TAG_EWALD_ALLOC 7
23 #define TAG_TABLE_ALLOC 8
24 #define TAG_TABLE_FREE 9
28 #define TAG_SPHDATA 13
31 #define TAG_NODELEN 16
35 #define TAG_DIRECT_A 20
36 #define TAG_DIRECT_B 21
37 #define TAG_HYDRO_A 22
38 #define TAG_HYDRO_B 23
39 #define TAG_NFORTHISTASK 24
40 #define TAG_PERIODIC_A 25
41 #define TAG_PERIODIC_B 26
42 #define TAG_PERIODIC_C 27
43 #define TAG_PERIODIC_D 28
44 #define TAG_NONPERIOD_A 29
45 #define TAG_NONPERIOD_B 30
46 #define TAG_NONPERIOD_C 31
47 #define TAG_NONPERIOD_D 32
48 #define TAG_POTENTIAL_A 33
49 #define TAG_POTENTIAL_B 34
55 #define TAG_SMOOTH_A 40
56 #define TAG_SMOOTH_B 41
57 #define TAG_ENRICH_A 42
58 #define TAG_CONDUCT_A 43
59 #define TAG_CONDUCT_B 44
67 #define TAG_HOTNGB_A 52
68 #define TAG_HOTNGB_B 53
74 #define TAG_SEARCH_A 58
75 #define TAG_SEARCH_B 59
77 #define TAG_INJECT_A 61
79 #define TAG_PDATA_SPH 70
80 #define TAG_KEY_SPH 71
82 #define TAG_PDATA_STAR 72
83 #define TAG_STARDATA 73
84 #define TAG_KEY_STAR 74
86 #define TAG_PDATA_BH 75
90 #define TAG_GRAVCOST_A 79
91 #define TAG_GRAVCOST_B 80
93 #define TAG_PM_FOLD 81
95 #define TAG_BARRIER 82
96 #define TAG_PART_DATA 83
97 #define TAG_NODE_DATA 84
98 #define TAG_RESULTS 85
99 #define TAG_DRIFT_INIT 86
100 #define TAG_ALL_UPDATE 87
101 #define TAG_METDATA 500
102 #define TAG_FETCH_GRAVTREE 1000
103 #define TAG_FETCH_SPH_DENSITY 2000
104 #define TAG_FETCH_SPH_HYDRO 3000
105 #define TAG_FETCH_SPH_TREETIMESTEP 4000
109 int myMPI_Sendrecv(
void *sendbuf,
size_t sendcount, MPI_Datatype sendtype,
int dest,
int sendtag,
void *recvbuf,
size_t recvcount,
110 MPI_Datatype recvtype,
int source,
int recvtag, MPI_Comm comm, MPI_Status *status);
114 void myMPI_Alltoallv_new(
void *sendb,
int *sendcounts,
int *sdispls, MPI_Datatype sendtype,
void *recvb,
int *recvcounts,
int *rdispls,
115 MPI_Datatype recvtype, MPI_Comm comm,
int method);
117 void myMPI_Alltoallv(
void *sendbuf,
size_t *sendcounts,
size_t *sdispls,
void *recvbuf,
size_t *recvcounts,
size_t *rdispls,
int len,
118 int big_flag, MPI_Comm comm);
120 void my_int_MPI_Alltoallv(
void *sendb,
int *sendcounts,
int *sdispls,
void *recvb,
int *recvcounts,
int *rdispls,
int len,
121 int big_flag, MPI_Comm comm);
123 int myMPI_Allreduce(
const void *sendbuf,
void *recvbuf,
int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
126 MPI_Datatype recvtype, MPI_Comm comm);
131 void sumup_longs(
int n,
long long *src,
long long *res, MPI_Comm comm);
141 template <
typename T>
144 int ntask, thistask, ptask;
145 MPI_Comm_size(Communicator, &ntask);
146 MPI_Comm_rank(Communicator, &thistask);
148 for(ptask = 0; ntask > (1 << ptask); ptask++)
153 int *blocksize = (
int *)
Mem.mymalloc(
"blocksize",
sizeof(
int) * ntask);
154 int *blockstart = (
int *)
Mem.mymalloc(
"blockstart",
sizeof(
int) * ntask);
157 int rmd = N - blk * ntask;
158 int pivot_n = rmd * (blk + 1);
163 for(
int task = 0; task < ntask; task++)
166 blocksize[task] = blk + 1;
168 blocksize[task] = blk;
171 loc_first_n += blocksize[task];
174 blockstart[task] = blockstart[task - 1] + blocksize[task - 1];
178 T *loc_data = (T *)
Mem.mymalloc_clear(
"loc_data", blocksize[thistask] *
sizeof(T));
180 int *send_count = (
int *)
Mem.mymalloc(
"send_count",
sizeof(
int) * ntask);
181 int *recv_count = (
int *)
Mem.mymalloc(
"recv_count",
sizeof(
int) * ntask);
183 int *send_offset = (
int *)
Mem.mymalloc(
"send_offset",
sizeof(
int) * ntask);
191 ind_data *export_data = NULL;
194 for(
int rep = 0; rep < 2; rep++)
196 for(
int j = 0; j < ntask; j++)
200 for(
int n = 0; n < N; n++)
206 task = n / (blk + 1);
208 task = rmd + (n - pivot_n) / blk;
214 int index = send_offset[task] + send_count[task]++;
215 export_data[index].n = n;
216 export_data[index].val = glob[n];
223 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, Communicator);
227 for(
int j = 0; j < ntask; j++)
229 nexport += send_count[j];
232 send_offset[j] = send_offset[j - 1] + send_count[j - 1];
235 export_data = (ind_data *)
Mem.mymalloc(
"export_data", nexport *
sizeof(ind_data));
239 for(
int ngrp = 0; ngrp < (1 << ptask); ngrp++)
241 int recvTask = thistask ^ ngrp;
243 if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
245 int nimport = recv_count[recvTask];
247 ind_data *import_data = (ind_data *)
Mem.mymalloc(
"import_data", nimport *
sizeof(ind_data));
249 MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] *
sizeof(ind_data), MPI_BYTE, recvTask,
250 TAG_DENS_B, import_data, recv_count[recvTask] *
sizeof(ind_data), MPI_BYTE, recvTask,
TAG_DENS_B,
251 Communicator, MPI_STATUS_IGNORE);
253 for(
int i = 0; i < nimport; i++)
255 int j = import_data[i].n - loc_first_n;
257 if(j < 0 || j >= blocksize[thistask])
258 Terminate(
"j=%d < 0 || j>= blocksize[thistask]=%d", j, blocksize[thistask]);
260 loc_data[j] += import_data[i].val;
263 Mem.myfree(import_data);
267 Mem.myfree(export_data);
271 Mem.myfree(send_offset);
272 Mem.myfree(recv_count);
273 Mem.myfree(send_count);
276 for(
int ngrp = 0; ngrp < (1 << ptask); ngrp++)
278 int recvTask = thistask ^ ngrp;
280 if(blocksize[thistask] > 0 || blocksize[recvTask] > 0)
281 MPI_Sendrecv(loc_data, blocksize[thistask] *
sizeof(T), MPI_BYTE, recvTask,
TAG_DENS_A, &glob[blockstart[recvTask]],
282 blocksize[recvTask] *
sizeof(T), MPI_BYTE, recvTask,
TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
285 Mem.myfree(loc_data);
286 Mem.myfree(blockstart);
287 Mem.myfree(blocksize);
MPI_Datatype MPI_MyIntPosType
void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len, int big_flag, MPI_Comm comm)
MPI_Op MPI_MAX_MySignedIntPosType
int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
MPI_Op MPI_MIN_MySignedIntPosType
void allreduce_sparse_double_sum(double *loc, double *glob, int N, MPI_Comm comm)
void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, int method)
void my_mpi_types_init(void)
void allreduce_sum(T *glob, int N, MPI_Comm Communicator)
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoallv_new_prep(int *sendcnt, int *recvcnt, int *rdispls, MPI_Comm comm, int method)
MPI_Op MPI_MAX_MyIntPosType
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
int myMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
void minimum_large_ints(int n, long long *src, long long *res, MPI_Comm comm)
MPI_Op MPI_MIN_MyIntPosType
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)