GADGET-4
grav_direct.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <math.h>
15 #include <mpi.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <algorithm>
19 
20 #include "../data/allvars.h"
21 #include "../data/dtypes.h"
22 #include "../data/intposconvert.h"
23 #include "../data/mymalloc.h"
24 #include "../domain/domain.h"
25 #include "../gravtree/gravtree.h"
26 #include "../logs/logs.h"
27 #include "../logs/timer.h"
28 #include "../main/simulation.h"
29 #include "../mpi_utils/mpi_utils.h"
30 #include "../pm/pm.h"
31 #include "../system/system.h"
32 #include "../time_integration/timestep.h"
33 
34 #ifdef ALLOW_DIRECT_SUMMATION
35 
39 template <>
41 {
42  TIMER_START(CPU_TREEDIRECT);
43 
44  D->mpi_printf("GRAVDIRECT: direct summation. (presently allocated=%g MB)\n", Mem.getAllocatedBytesInMB());
45 
46  double tstart = Logs.second();
47 
48  int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * D->NTask);
49  int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * D->NTask);
50  int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * D->NTask);
51  int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * D->NTask);
52 
53  struct directdata
54  {
55  MyIntPosType IntPos[3];
56  MyDouble Mass;
57  unsigned char Type;
58 #if NSOFTCLASSES > 1
59  unsigned char SofteningClass;
60 #endif
61 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
62  unsigned char InsideOutsideFlag : 1;
63 #endif
64  };
65  directdata *DirectDataIn, *DirectDataAll;
66 
67  struct accdata
68  {
69  MyFloat Acc[3];
70 #ifdef EVALPOTENTIAL
71  MyFloat Potential;
72 #endif
73  };
74  accdata *DirectAccOut, *DirectAccIn;
75 
76  DirectDataIn = (directdata *)Mem.mymalloc("DirectDataIn", Sp->TimeBinsGravity.NActiveParticles * sizeof(directdata));
77 
78  int nforces = 0;
79 
80  for(int idx = 0; idx < Sp->TimeBinsGravity.NActiveParticles; idx++)
81  {
82  int i = Sp->TimeBinsGravity.ActiveParticleList[idx];
83 
84  for(int k = 0; k < 3; k++)
85  DirectDataIn[nforces].IntPos[k] = Sp->P[i].IntPos[k];
86 
87  DirectDataIn[nforces].Mass = Sp->P[i].getMass();
88 
89  DirectDataIn[nforces].Type = Sp->P[i].getType();
90 #if NSOFTCLASSES > 1
91  DirectDataIn[nforces].SofteningClass = Sp->P[i].getSofteningClass();
92 #endif
93 #if defined(PMGRID) && defined(PLACEHIGHRESREGION)
94  DirectDataIn[nforces].InsideOutsideFlag = Sp->P[i].InsideOutsideFlag;
95 #endif
96  nforces++;
97  }
98 
99  MPI_Allgather(&nforces, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator);
100 
101  int nimport = 0;
102  Recv_offset[0] = 0;
103 
104  for(int j = 0; j < D->NTask; j++)
105  {
106  nimport += Recv_count[j];
107 
108  if(j > 0)
109  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
110  }
111 
112  DirectDataAll = (directdata *)Mem.mymalloc("DirectDataAll", nimport * sizeof(directdata));
113 
114  for(int j = 0; j < D->NTask; j++)
115  {
116  Send_count[j] = Recv_count[j] * sizeof(directdata);
117  Send_offset[j] = Recv_offset[j] * sizeof(directdata);
118  }
119 
120  MPI_Allgatherv(DirectDataIn, nforces * sizeof(directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE,
121  D->Communicator);
122 
123  /* subdivide the work evenly */
124  int first, count;
125  subdivide_evenly(nimport, D->NTask, D->ThisTask, &first, &count);
126 
127  DirectAccOut = (accdata *)Mem.mymalloc("DirectDataOut", count * sizeof(accdata));
128 
129  /* now calculate the forces */
130  for(int i = 0; i < count; i++)
131  {
132  int target = i + first;
133  int result_idx = i;
134 
135  vector<double> acc = 0.0;
136 #ifdef EVALPOTENTIAL
137  double pot = 0.0;
138 #endif
139 
140 #if NSOFTCLASSES > 1
141  double h_i = All.ForceSoftening[DirectDataAll[target].SofteningClass];
142 #else
143  double h_i = All.ForceSoftening[0];
144 #endif
145 
146  for(int j = 0; j < nimport; j++)
147  {
148 #if NSOFTCLASSES > 1
149  double h_j = All.ForceSoftening[DirectDataAll[j].SofteningClass];
150 #else
151  double h_j = All.ForceSoftening[0];
152 #endif
153  double hmax = (h_j > h_i) ? h_j : h_i;
154 
155  vector<double> dxyz;
156  Sp->nearest_image_intpos_to_pos(DirectDataAll[j].IntPos, DirectDataAll[target].IntPos,
157  dxyz.da); /* converts the integer distance to floating point */
158 
159  double r2 = dxyz[0] * dxyz[0] + dxyz[1] * dxyz[1] + dxyz[2] * dxyz[2];
160 
161  double mass = DirectDataAll[j].Mass;
162 
163  /* now evaluate the force component */
164 
165  double r = sqrt(r2);
166 
167  double rinv = (r > 0) ? 1.0 / r : 0;
168 
170 
171 #ifdef PMGRID
172  mesh_factors *mfp = &mf[LOW_MESH];
173 #if defined(PLACEHIGHRESREGION)
174  if((DoPM & TREE_ACTIVE_CUTTOFF_HIGHRES_PM))
175  {
176  if(DirectDataAll[j].InsideOutsideFlag == FLAG_INSIDE && DirectDataAll[target].InsideOutsideFlag == FLAG_INSIDE)
177  mfp = &mf[HIGH_MESH];
178  }
179 #endif
181  {
182  if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
183  return; // if we are outside the cut-off radius, we have no interaction
184  }
185 #endif
186  get_gfactors_monopole(gfac, r, hmax, rinv);
187 
188 #ifdef EVALPOTENTIAL
189  pot -= mass * gfac.fac0;
190 #endif
191  acc -= (mass * gfac.fac1 * rinv) * dxyz;
192 
193  if(DoEwald)
194  {
195  // EWALD treatment, only done for periodic boundaries in case PM is not active
196 
197  ewald_data ew;
198  Ewald.ewald_gridlookup(DirectDataAll[j].IntPos, DirectDataAll[target].IntPos, ewald::POINTMASS, ew);
199 
200 #ifdef EVALPOTENTIAL
201  pot += mass * ew.D0phi;
202 #endif
203  acc += mass * ew.D1phi;
204  }
205  }
206 
207  DirectAccOut[result_idx].Acc[0] = acc[0];
208  DirectAccOut[result_idx].Acc[1] = acc[1];
209  DirectAccOut[result_idx].Acc[2] = acc[2];
210 #ifdef EVALPOTENTIAL
211  DirectAccOut[result_idx].Potential = pot;
212 #endif
213  }
214 
215  /* now send the forces to the right places */
216 
217  DirectAccIn = (accdata *)Mem.mymalloc("DirectDataIn", nforces * sizeof(accdata));
218 
219  MPI_Request *requests = (MPI_Request *)Mem.mymalloc_movable(&requests, "requests", 2 * D->NTask * sizeof(MPI_Request));
220  int n_requests = 0;
221 
222  int recvTask = 0;
223  int sendTask = 0;
224  int send_first, send_count;
225  subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
226 
227  while(recvTask < D->NTask && sendTask < D->NTask) /* go through both lists */
228  {
229  while(send_first + send_count < Recv_offset[recvTask])
230  {
231  if(sendTask >= D->NTask - 1)
232  Terminate("sendTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask);
233 
234  sendTask++;
235  subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
236  }
237 
238  while(Recv_offset[recvTask] + Recv_count[recvTask] < send_first)
239  {
240  if(recvTask >= D->NTask - 1)
241  Terminate("recvTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask);
242 
243  recvTask++;
244  }
245 
246  int start = std::max<int>(Recv_offset[recvTask], send_first);
247  int next = std::min<int>(Recv_offset[recvTask] + Recv_count[recvTask], send_first + send_count);
248 
249  if(next - start >= 1)
250  {
251  if(D->ThisTask == sendTask)
252  MPI_Isend(DirectAccOut + start - send_first, (next - start) * sizeof(accdata), MPI_BYTE, recvTask, TAG_PDATA_SPH,
253  D->Communicator, &requests[n_requests++]);
254 
255  if(D->ThisTask == recvTask)
256  MPI_Irecv(DirectAccIn + start - Recv_offset[recvTask], (next - start) * sizeof(accdata), MPI_BYTE, sendTask, TAG_PDATA_SPH,
257  D->Communicator, &requests[n_requests++]);
258  }
259 
260  if(next == Recv_offset[recvTask] + Recv_count[recvTask])
261  recvTask++;
262  else
263  {
264  sendTask++;
265  if(sendTask >= D->NTask)
266  break;
267 
268  subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
269  }
270  }
271 
272  MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
273  Mem.myfree(requests);
274 
275  nforces = 0;
276 
277  for(int idx = 0; idx < Sp->TimeBinsGravity.NActiveParticles; idx++)
278  {
279  int i = Sp->TimeBinsGravity.ActiveParticleList[idx];
280 
281  for(int k = 0; k < 3; k++)
282  Sp->P[i].GravAccel[k] = DirectAccIn[nforces].Acc[k];
283 
284 #ifdef EVALPOTENTIAL
285  Sp->P[i].Potential = DirectAccIn[nforces].Potential;
286 #endif
287  nforces++;
288  }
289 
290  Mem.myfree(DirectAccIn);
291  Mem.myfree(DirectAccOut);
292  Mem.myfree(DirectDataAll);
293  Mem.myfree(DirectDataIn);
294 
295  Mem.myfree(Recv_offset);
296  Mem.myfree(Recv_count);
297  Mem.myfree(Send_offset);
298  Mem.myfree(Send_count);
299 
300  D->mpi_printf("GRAVDIRECT: force is done.\n");
301 
303 
304  double tend = Logs.second();
305 
306  double timedirect, sumt;
307  timedirect = tend - tstart;
308 
309  MPI_Reduce(&timedirect, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, D->Communicator);
310 
311  if(D->ThisTask == 0)
312  {
313  fprintf(Logs.FdTimings, "Nf=%9lld timebin=%d active part/task: avg=%g total-Nf=%lld\n",
316  fprintf(Logs.FdTimings, " (direct) took=%g sec part/sec: %g ia/sec: %g\n", timedirect,
317  Sp->TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20),
320  }
321 
322  TIMER_STOP(CPU_TREEDIRECT);
323 }
324 
325 #endif
global_data_all_processes All
Definition: main.cc:40
@ POINTMASS
Definition: ewald.h:67
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:304
mesh_factors mf[2]
Definition: gravtree.h:271
char DoEwald
Definition: gravtree.h:361
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:525
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:68
TimeBinData TimeBinsGravity
Definition: simparticles.h:205
particle_data * P
Definition: simparticles.h:54
T da[3]
Definition: symtensors.h:106
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
float MyDouble
Definition: dtypes.h:87
float MyFloat
Definition: dtypes.h:86
uint32_t MyIntPosType
Definition: dtypes.h:35
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_ACTIVE_CUTTOFF_BASE_PM
Definition: gravtree.h:23
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define Terminate(...)
Definition: macros.h:19
#define TAG_PDATA_SPH
Definition: mpi_utils.h:79
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
int NActiveParticles
Definition: timestep.h:18
int * ActiveParticleList
Definition: timestep.h:20
long long GlobalNActiveParticles
Definition: timestep.h:19
vector< MyReal > D1phi
Definition: gravtree.h:186
MyReal D0phi
Definition: gravtree.h:185
long long TotNumDirectForces
Definition: allvars.h:104
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
MyDouble getMass(void)
unsigned char getSofteningClass(void)
unsigned char getType(void)
vector< MyFloat > GravAccel
Definition: particle_data.h:55
MyIntPosType IntPos[3]
Definition: particle_data.h:53
void subdivide_evenly(long long N, int pieces, int index_bin, long long *first, int *count)
Definition: system.cc:51
void myflush(FILE *fstream)
Definition: system.cc:125