GADGET-4
gravtree.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #include <math.h>
15 #include <mpi.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <atomic>
19 
20 #include "../data/allvars.h"
21 #include "../data/dtypes.h"
22 #include "../data/intposconvert.h"
23 #include "../data/mymalloc.h"
24 #include "../domain/domain.h"
25 #include "../gravity/ewald.h"
26 #include "../gravtree/gravtree.h"
27 #include "../gravtree/gwalk.h"
28 #include "../logs/logs.h"
29 #include "../logs/timer.h"
30 #include "../main/simulation.h"
31 #include "../mpi_utils/mpi_utils.h"
32 #include "../mpi_utils/shared_mem_handler.h"
33 #include "../pm/pm.h"
34 #include "../sort/cxxsort.h"
35 #include "../system/system.h"
36 #include "../time_integration/timestep.h"
37 
47 #ifdef PMGRID
48 
55 template <typename partset>
57 {
58  for(int i = 0; i <= NTAB; i++)
59  {
60  double u = (RCUT / 2.0) / NTAB * i;
61 
62  /*
63  * fac0 contains g = g(u) = (erfc(u) - 1)/u
64  * fac1 contains g'
65  * fac2 contains g'' - g'/u
66  * fac3 contains g''' - 3g''/u + 3g'/u^2
67  * fac4 contains g'''' - 6*g'''/u + 15*g''/u^2 - 15*g'/u^3
68  * fac5 contains g''''' - 10*g''''/u + 45*g'''/u^2 - 105*g''/u^3 + 105*g'/u^4
69  */
70 
71  shortrange_factors[i].fac0 = (u > 0) ? (erfc(u) - 1.0) / u : -2.0 / sqrt(M_PI);
72  shortrange_factors[i].fac1 = (u > 0) ? ((1.0 - erfc(u)) - 2.0 * u / sqrt(M_PI) * exp(-u * u)) / (u * u) : 0;
73 #if(MULTIPOLE_ORDER >= 2)
74  shortrange_factors[i].fac2 =
75  (u > 0) ? (-3.0 * (1.0 - erfc(u)) + (6 * u + 4 * pow(u, 3)) / sqrt(M_PI) * exp(-u * u)) / pow(u, 3) : 0;
76 #endif
77 #if(MULTIPOLE_ORDER >= 3)
78  shortrange_factors[i].fac3 =
79  (u > 0) ? (15.0 * (1.0 - erfc(u)) - (30 * u + 20 * pow(u, 3) + 8 * pow(u, 5)) / sqrt(M_PI) * exp(-u * u)) / pow(u, 4) : 0;
80 #endif
81 #if(MULTIPOLE_ORDER >= 4)
82  shortrange_factors[i].fac4 =
83  (u > 0) ? (-105.0 * (1.0 - erfc(u)) +
84  (210.0 * u + 140.0 * pow(u, 3) + 56.0 * pow(u, 5) + 16.0 * pow(u, 7)) / sqrt(M_PI) * exp(-u * u)) /
85  pow(u, 5)
86  : 0;
87 #endif
88 #if(MULTIPOLE_ORDER >= 5)
89  shortrange_factors[i].fac5 = (u > 0) ? (945.0 * (1.0 - erfc(u)) - (1890.0 * u + 1260.0 * pow(u, 3) + 504.0 * pow(u, 5) +
90  144.0 * pow(u, 7) + 32.0 * pow(u, 9)) /
91  sqrt(M_PI) * exp(-u * u)) /
92  pow(u, 6)
93  : 0;
94 #endif
95  }
96 }
97 
98 template <typename partset>
100 {
101  for(int i = 0; i < 2; i++)
102  {
103  mf[i].rcut2 = Tp->Rcut[i] * Tp->Rcut[i];
104  double dblrcut[3] = {Tp->Rcut[i], Tp->Rcut[i],
105  Tp->Rcut[i]}; /* for stretched boxes, the conversion may be different in each dimension */
106  Tp->pos_to_signedintpos(dblrcut, (MySignedIntPosType *)mf[i].intrcut);
107 
108  if(Tp->Asmth[i] > 0)
109  mf[i].asmthinv1 = 0.5 / Tp->Asmth[i];
110  else
111  mf[i].asmthinv1 = 0;
112 
113  mf[i].asmthinv2 = mf[i].asmthinv1 * mf[i].asmthinv1;
114 #if(MULTIPOLE_ORDER >= 2)
115  mf[i].asmthinv3 = mf[i].asmthinv2 * mf[i].asmthinv1;
116 #endif
117 #if(MULTIPOLE_ORDER >= 3)
118  mf[i].asmthinv4 = mf[i].asmthinv3 * mf[i].asmthinv1;
119 #endif
120 #if(MULTIPOLE_ORDER >= 4)
121  mf[i].asmthinv5 = mf[i].asmthinv4 * mf[i].asmthinv1;
122 #endif
123 #if(MULTIPOLE_ORDER >= 5)
124  mf[i].asmthinv6 = mf[i].asmthinv5 * mf[i].asmthinv1;
125 #endif
126 
127  mf[i].asmthfac = mf[i].asmthinv1 * (NTAB / (RCUT / 2.0));
128  }
129 }
130 
131 #endif
132 
133 template <typename partset>
135 {
136  int *send_count = (int *)Mem.mymalloc_movable(&send_count, "send_count", sizeof(int) * D->NTask);
137  int *send_offset = (int *)Mem.mymalloc_movable(&send_offset, "send_offset", sizeof(int) * D->NTask);
138  int *recv_count = (int *)Mem.mymalloc_movable(&recv_count, "recv_count", sizeof(int) * D->NTask);
139  int *recv_offset = (int *)Mem.mymalloc_movable(&recv_offset, "tecv_offset", sizeof(int) * D->NTask);
140 
141  /* now communicate the forces in ResultsActiveImported */
142  for(int j = 0; j < D->NTask; j++)
143  recv_count[j] = 0;
144 
145  int n = 0, k = 0;
146 
147  for(int i = 0; i < D->NTask; i++)
148  for(int j = 0; j < Recv_count[i]; j++, n++) /* Note that we access Tree.Recv_count here */
149  {
150 #ifndef HIERARCHICAL_GRAVITY
151  if(Points[n].ActiveFlag)
152 #endif
153  {
154  ResultsActiveImported[k].index = Points[n].index;
155  recv_count[i]++;
156  k++;
157  }
158  }
159  MPI_Alltoall(recv_count, 1, MPI_INT, send_count, 1, MPI_INT, D->Communicator);
160 
161  recv_offset[0] = 0;
162  send_offset[0] = 0;
163 
164  int Nexport = 0;
165  int Nimport = 0;
166 
167  for(int j = 0; j < D->NTask; j++)
168  {
169  Nexport += send_count[j];
170  Nimport += recv_count[j];
171  if(j > 0)
172  {
173  send_offset[j] = send_offset[j - 1] + send_count[j - 1];
174  recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
175  }
176  }
177 
178  resultsactiveimported_data *tmp_results =
179  (resultsactiveimported_data *)Mem.mymalloc("tmp_results", Nexport * sizeof(resultsactiveimported_data));
180 
181  /* exchange data */
182  for(int ngrp = 1; ngrp < (1 << D->PTask); ngrp++)
183  {
184  int recvTask = D->ThisTask ^ ngrp;
185 
186  if(recvTask < D->NTask)
187  {
188  if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
189  {
190  MPI_Sendrecv(&ResultsActiveImported[recv_offset[recvTask]], recv_count[recvTask] * sizeof(resultsactiveimported_data),
191  MPI_BYTE, recvTask, TAG_FOF_A, &tmp_results[send_offset[recvTask]],
192  send_count[recvTask] * sizeof(resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A, D->Communicator,
193  MPI_STATUS_IGNORE);
194  }
195  }
196  }
197  for(int i = 0; i < Nexport; i++)
198  {
199  int target = tmp_results[i].index;
200 
201  for(int k = 0; k < 3; k++)
202  Tp->P[target].GravAccel[k] += tmp_results[i].GravAccel[k];
203 #ifdef EVALPOTENTIAL
204  Tp->P[target].Potential += tmp_results[i].Potential;
205 #endif
206 
207  if(MeasureCostFlag)
208  Tp->P[target].GravCost += tmp_results[i].GravCost;
209  }
210  Mem.myfree(tmp_results);
211 
212  Mem.myfree(recv_offset);
213  Mem.myfree(recv_count);
214  Mem.myfree(send_offset);
215  Mem.myfree(send_count);
216 }
217 
218 /* make sure that we instantiate the template */
219 #include "../data/simparticles.h"
220 template class gravtree<simparticles>;
void gravity_exchange_forces(void)
Definition: gravtree.cc:134
int ThisTask
Definition: setcomm.h:33
#define RCUT
Definition: constants.h:293
#define M_PI
Definition: constants.h:56
int32_t MySignedIntPosType
Definition: dtypes.h:36
#define NTAB
Definition: gravtree.h:18
#define TAG_FOF_A
Definition: mpi_utils.h:60
memory Mem
Definition: main.cc:44
expr exp(half arg)
Definition: half.hpp:2724
expr pow(half base, half exp)
Definition: half.hpp:2803
expr erfc(half arg)
Definition: half.hpp:2925
expr sqrt(half arg)
Definition: half.hpp:2777