GADGET-4
ngenic.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  * \copyright This file is part of the GADGET4 N-body/SPH code developed
3  * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4  * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5  *******************************************************************************/
6 
12 #include "gadgetconfig.h"
13 
14 #ifdef NGENIC
15 
16 #include <gsl/gsl_rng.h>
17 #include <math.h>
18 #include <mpi.h>
19 #include <stdlib.h>
20 #include <algorithm>
21 
22 #include "../data/allvars.h"
23 #include "../data/dtypes.h"
24 #include "../data/intposconvert.h"
25 #include "../data/mymalloc.h"
26 #include "../logs/logs.h"
27 #include "../main/simulation.h"
28 #include "../mpi_utils/mpi_utils.h"
29 #include "../mpi_utils/shared_mem_handler.h"
30 #include "../ngenic/ngenic.h"
31 #include "../pm/pm_mpi_fft.h"
32 #include "../system/system.h"
33 
34 #ifdef GRIDX
35 #undef GRIDX
36 #undef GRIDY
37 #undef GRIDZ
38 #undef INTCELL
39 #endif
40 
41 #define GRIDX (NGENIC)
42 #define GRIDY (NGENIC)
43 #define GRIDZ (NGENIC)
44 
45 #define INTCELL ((~((MyIntPosType)0)) / GRIDX + 1)
46 
47 #define GRIDz (GRIDZ / 2 + 1)
48 #define GRID2 (2 * GRIDz)
49 
50 #define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z))
51 #define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.firstcol_XY) + (z))
52 
53 #if(GRIDZ > 1024)
54 typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid
55  with a single index */
56 #else
57 typedef unsigned int large_array_offset;
58 #endif
59 
60 #ifdef NUMPART_PER_TASK_LARGE
61 typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this
62  data type should be used */
63 #else
64 typedef int large_numpart_type;
65 #endif
66 
67 void ngenic::ngenic_displace_particles(void)
68 {
69  TIMER_START(CPU_NGENIC);
70 
71  mpi_printf("NGENIC: computing displacement fields...\n");
72 
74 
75  double vel_prefac1 = All.cf_atime * All.cf_hubble_a * ngenic_f1_omega(All.cf_atime);
76  double vel_prefac2 = All.cf_atime * All.cf_hubble_a * ngenic_f2_omega(All.cf_atime);
77 
78  vel_prefac1 /= sqrt(All.cf_atime); /* converts to Gadget velocity */
79  vel_prefac2 /= sqrt(All.cf_atime); /* converts to Gadget velocity */
80 
81  mpi_printf("NGENIC: vel_prefac1= %g hubble_a=%g fom1=%g\n", vel_prefac1, All.cf_hubble_a, ngenic_f1_omega(All.cf_atime));
82  mpi_printf("NGENIC: vel_prefac2= %g hubble_a=%g fom2=%g\n", vel_prefac2, All.cf_hubble_a, ngenic_f2_omega(All.cf_atime));
83 
84  rnd_generator_conjugate = gsl_rng_alloc(gsl_rng_ranlxd1);
85  rnd_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
86  gsl_rng_set(rnd_generator, All.NgenicSeed);
87 
88  ngenic_initialize_powerspectrum();
89 
90  ngenic_initialize_ffts();
91 
92  if(!(seedtable = (unsigned int *)Mem.mymalloc("seedtable", NGENIC * NGENIC * sizeof(unsigned int))))
93  Terminate("could not allocate seed table");
94 
95  for(int i = 0; i < NGENIC / 2; i++)
96  {
97  for(int j = 0; j < i; j++)
98  seedtable[i * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
99 
100  for(int j = 0; j < i + 1; j++)
101  seedtable[j * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
102 
103  for(int j = 0; j < i; j++)
104  seedtable[(NGENIC - 1 - i) * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
105 
106  for(int j = 0; j < i + 1; j++)
107  seedtable[(NGENIC - 1 - j) * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
108 
109  for(int j = 0; j < i; j++)
110  seedtable[i * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
111 
112  for(int j = 0; j < i + 1; j++)
113  seedtable[j * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
114 
115  for(int j = 0; j < i; j++)
116  seedtable[(NGENIC - 1 - i) * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
117 
118  for(int j = 0; j < i + 1; j++)
119  seedtable[(NGENIC - 1 - j) * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
120  }
121 
123  {
124  // We actually have multiple shared memory nodes in which we set aside one MPI rank for shared memory communictiona.
125  // In this casem move the seedtable to the communication rank in order to consume this memory only once on the node
126 
127  if(Shmem.Island_ThisTask == 0)
128  {
129  size_t tab_len = NGENIC * NGENIC * sizeof(unsigned int);
130 
131  MPI_Send(&tab_len, sizeof(tab_len), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_TABLE_ALLOC, MPI_COMM_WORLD);
132  MPI_Send(seedtable, tab_len, MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_DMOM, MPI_COMM_WORLD);
133  }
134 
135  Mem.myfree(seedtable);
136 
137  ptrdiff_t off;
138  MPI_Bcast(&off, sizeof(ptrdiff_t), MPI_BYTE, Shmem.Island_NTask - 1, Shmem.SharedMemComm);
139 
140  seedtable = (unsigned int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off);
141  }
142 
143  ngenic_distribute_particles();
144 
145  /* allocate displacement vectors */
146  Pdisp = (disp_data *)Mem.mymalloc_clear("disp_data", Sp->NumPart * sizeof(disp_data));
147 
148 #if defined(MULTICOMPONENTGLASSFILE) && defined(DIFFERENT_TRANSFER_FUNC)
149  for(Type = MinType; Type <= MaxType; Type++)
150 #endif
151  {
152 #ifdef NGENIC_2LPT
153 
154  /* allocate temporary buffers for second derivatives */
155  fft_real *d2phi1[3];
156  for(int axes = 0; axes < 3; axes++)
157  d2phi1[axes] = (fft_real *)Mem.mymalloc_clear("d2Phi1", maxfftsize * sizeof(fft_real));
158 
159  for(int axes = 0; axes < 3; axes++)
160  {
161  mpi_printf("NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", axes, axes);
162 
163  fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
164 
165  ngenic_setup_modes_in_kspace((fft_complex *)disp);
166  ngenic_get_derivate_from_fourier_field(axes, axes, (fft_complex *)disp);
167 
168  memcpy(d2phi1[axes], disp, maxfftsize * sizeof(fft_real));
169 
170  Mem.myfree(disp);
171  }
172 
173  /* allocate second source potential */
174  fft_real *Phi2 = (fft_real *)Mem.mymalloc_movable(&Phi2, "Phi2", maxfftsize * sizeof(fft_real));
175 
176  for(size_t n = 0; n < maxfftsize; n++)
177  Phi2[n] = d2phi1[0][n] * d2phi1[1][n] + d2phi1[0][n] * d2phi1[2][n] + d2phi1[1][n] * d2phi1[2][n];
178 
179  for(int axes = 2; axes >= 0; axes--)
180  Mem.myfree_movable(d2phi1[axes]);
181 
182  for(int i = 0; i < 3; i++)
183  for(int j = i + 1; j < 3; j++)
184  {
185  mpi_printf("NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", i, j);
186 
187  fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
188 
189  ngenic_setup_modes_in_kspace((fft_complex *)disp);
190  ngenic_get_derivate_from_fourier_field(i, j, (fft_complex *)disp);
191 
192  for(size_t n = 0; n < maxfftsize; n++)
193  Phi2[n] -= disp[n] * disp[n];
194 
195  Mem.myfree(disp);
196  }
197 
198  mpi_printf("NGENIC_2LPT: Secondary source term computed in real space\n");
199 
200  /* Do a forward inplace-FFT to get Phi2 in Fourier space */
201  ngenic_compute_transform_of_source_potential(Phi2);
202 
203  mpi_printf("NGENIC_2LPT: Done transforming it to k-space\n");
204 
205  for(int axes = 0; axes < 3; axes++)
206  {
207  mpi_printf("NGENIC_2LPT: Obtaining second order displacements for axes=%d\n", axes);
208 
209  fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
210 
211  memcpy(disp, Phi2, maxfftsize * sizeof(fft_real));
212 
213  ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
214 
215  ngenic_readout_disp(disp, axes, 3.0 / 7, 3.0 / 7 * vel_prefac2);
216 
217  Mem.myfree(disp);
218  }
219 
220  Mem.myfree(Phi2);
221 #endif
222 
223  /* now carry out Zeldovich approximation, yielding first order displacements */
224  for(int axes = 0; axes < 3; axes++)
225  {
226  mpi_printf("NGENIC_2LPT: Obtaining Zeldovich displacements for axes=%d\n", axes);
227 
228  fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
229 
230  ngenic_setup_modes_in_kspace((fft_complex *)disp);
231 
232  ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
233 
234  ngenic_readout_disp(disp, axes, 1.0, vel_prefac1);
235 
236  Mem.myfree(disp);
237  }
238  }
239 
240  /* now add displacement to Lagrangian coordinates */
241  double maxdisp = 0;
242  double maxvel = 0;
243  for(int n = 0; n < Sp->NumPart; n++)
244  {
245  double posdiff[3] = {Pdisp[n].deltapos[0], Pdisp[n].deltapos[1], Pdisp[n].deltapos[2]};
246 
247  MyIntPosType delta[3];
248  Sp->pos_to_signedintpos(posdiff, (MySignedIntPosType *)delta);
249 
250  for(int axes = 0; axes < 3; axes++)
251  {
252  Sp->P[n].IntPos[axes] += delta[axes];
253 
254  if(Pdisp[n].deltapos[axes] > maxdisp)
255  maxdisp = Pdisp[n].deltapos[axes];
256 
257  if(Sp->P[n].Vel[axes] > maxvel)
258  maxvel = Sp->P[n].Vel[axes];
259  }
260  }
261 
262  double max_disp_global, maxvel_global;
263  MPI_Reduce(&maxdisp, &max_disp_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
264  MPI_Reduce(&maxvel, &maxvel_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
265 
266  mpi_printf("\nNGENIC: Maximum displacement: %g, in units of the part-spacing= %g\n\n", max_disp_global,
267  max_disp_global / (All.BoxSize / NGENIC));
268  mpi_printf("\nNGENIC: Maximum velocity component: %g\n\n", maxvel_global);
269 
270  Mem.myfree(Pdisp);
271 
272  Mem.myfree(partin);
273  Mem.myfree(Rcvpm_offset);
274  Mem.myfree(Rcvpm_count);
275  Mem.myfree(Sndpm_offset);
276  Mem.myfree(Sndpm_count);
277 
279  {
280  if(Shmem.Island_ThisTask == 0)
281  {
282  // need to send this flag to the correct processor rank (our shared memory handler) so that the table is freed there
283  size_t tab_len = NGENIC * NGENIC * sizeof(unsigned int);
284  MPI_Send(&tab_len, sizeof(tab_len), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_TABLE_FREE, MPI_COMM_WORLD);
285  }
286  }
287  else
288  {
289  Mem.myfree(seedtable);
290  }
291 
292 #ifndef FFT_COLUMN_BASED
293  my_slab_based_fft_free(&myplan);
294 #else
295  my_column_based_fft_free(&myplan);
296 #endif
297 
298  FFTW(destroy_plan)(myplan.forward_plan_zdir);
299  FFTW(destroy_plan)(myplan.forward_plan_ydir);
300  FFTW(destroy_plan)(myplan.forward_plan_xdir);
301 
302  FFTW(destroy_plan)(myplan.backward_plan_zdir);
303  FFTW(destroy_plan)(myplan.backward_plan_ydir);
304  FFTW(destroy_plan)(myplan.backward_plan_xdir);
305 
306  print_spec();
307 
308  if(All.PowerSpectrumType == 2)
309  free_power_table();
310 
311  gsl_rng_free(rnd_generator);
312  gsl_rng_free(rnd_generator_conjugate);
313 
314  TIMER_STOP(CPU_NGENIC);
315 }
316 
317 void ngenic::ngenic_distribute_particles(void)
318 {
319  Sndpm_count = (size_t *)Mem.mymalloc("Sndpm_count", NTask * sizeof(size_t));
320  Sndpm_offset = (size_t *)Mem.mymalloc("Sndpm_offset", NTask * sizeof(size_t));
321  Rcvpm_count = (size_t *)Mem.mymalloc("Rcvpm_count", NTask * sizeof(size_t));
322  Rcvpm_offset = (size_t *)Mem.mymalloc("Rcvpm_offset", NTask * sizeof(size_t));
323 
324 #ifdef FFT_COLUMN_BASED
325  int columns = GRIDX * GRIDY;
326  int avg = (columns - 1) / NTask + 1;
327  int exc = NTask * avg - columns;
328  int tasklastsection = NTask - exc;
329  int pivotcol = tasklastsection * avg;
330 #endif
331 
332  /* determine the slabs/columns each particles accesses */
333  {
334  size_t *send_count = Sndpm_count;
335 
336  for(int j = 0; j < NTask; j++)
337  send_count[j] = 0;
338 
339  for(int i = 0; i < Sp->NumPart; i++)
340  {
341  int slab_x = Sp->P[i].IntPos[0] / INTCELL;
342  int slab_xx = slab_x + 1;
343 
344  if(slab_xx >= GRIDX)
345  slab_xx = 0;
346 
347 #ifndef FFT_COLUMN_BASED
348  int task0 = myplan.slab_to_task[slab_x];
349  int task1 = myplan.slab_to_task[slab_xx];
350 
351  send_count[task0]++;
352  if(task0 != task1)
353  send_count[task1]++;
354 #else
355  int slab_y = Sp->P[i].IntPos[1] / INTCELL;
356  int slab_yy = slab_y + 1;
357 
358  if(slab_yy >= GRIDY)
359  slab_yy = 0;
360 
361  int column0 = slab_x * GRIDY + slab_y;
362  int column1 = slab_x * GRIDY + slab_yy;
363  int column2 = slab_xx * GRIDY + slab_y;
364  int column3 = slab_xx * GRIDY + slab_yy;
365 
366  int task0, task1, task2, task3;
367 
368  if(column0 < pivotcol)
369  task0 = column0 / avg;
370  else
371  task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
372 
373  if(column1 < pivotcol)
374  task1 = column1 / avg;
375  else
376  task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
377 
378  if(column2 < pivotcol)
379  task2 = column2 / avg;
380  else
381  task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
382 
383  if(column3 < pivotcol)
384  task3 = column3 / avg;
385  else
386  task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
387 
388  send_count[task0]++;
389  if(task1 != task0)
390  send_count[task1]++;
391  if(task2 != task1 && task2 != task0)
392  send_count[task2]++;
393  if(task3 != task0 && task3 != task1 && task3 != task2)
394  send_count[task3]++;
395 #endif
396  }
397  }
398 
399  /* collect thread-specific offset table and collect the results from the other threads */
400  Sndpm_offset[0] = 0;
401  for(int i = 1; i < NTask; i++)
402  {
403  int ind = i;
404  int ind_prev = i - 1;
405 
406  Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
407  }
408 
409  MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator);
410 
411  nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0;
412  for(int j = 0; j < NTask; j++)
413  {
414  nexport += Sndpm_count[j];
415  nimport += Rcvpm_count[j];
416 
417  if(j > 0)
418  {
419  Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
420  Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
421  }
422  }
423 
424  /* allocate import and export buffer */
425  partin = (partbuf *)Mem.mymalloc("partin", nimport * sizeof(partbuf));
426  partout = (partbuf *)Mem.mymalloc("partout", nexport * sizeof(partbuf));
427 
428  {
429  size_t *send_count = Sndpm_count;
430  size_t *send_offset = Sndpm_offset;
431 
432  for(int j = 0; j < NTask; j++)
433  send_count[j] = 0;
434 
435  /* fill export buffer */
436  for(int i = 0; i < Sp->NumPart; i++)
437  {
438  int slab_x = Sp->P[i].IntPos[0] / INTCELL;
439  int slab_xx = slab_x + 1;
440 
441  if(slab_xx >= GRIDX)
442  slab_xx = 0;
443 
444 #ifndef FFT_COLUMN_BASED
445  int task0 = myplan.slab_to_task[slab_x];
446  int task1 = myplan.slab_to_task[slab_xx];
447 
448  size_t ind0 = send_offset[task0] + send_count[task0]++;
449  for(int j = 0; j < 3; j++)
450  partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
451 
452  if(task0 != task1)
453  {
454  size_t ind1 = send_offset[task1] + send_count[task1]++;
455  for(int j = 0; j < 3; j++)
456  partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
457  }
458 #else
459  int slab_y = Sp->P[i].IntPos[1] / INTCELL;
460  int slab_yy = slab_y + 1;
461 
462  if(slab_yy >= GRIDY)
463  slab_yy = 0;
464 
465  int column0 = slab_x * GRIDY + slab_y;
466  int column1 = slab_x * GRIDY + slab_yy;
467  int column2 = slab_xx * GRIDY + slab_y;
468  int column3 = slab_xx * GRIDY + slab_yy;
469 
470  int task0, task1, task2, task3;
471 
472  if(column0 < pivotcol)
473  task0 = column0 / avg;
474  else
475  task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
476 
477  if(column1 < pivotcol)
478  task1 = column1 / avg;
479  else
480  task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
481 
482  if(column2 < pivotcol)
483  task2 = column2 / avg;
484  else
485  task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
486 
487  if(column3 < pivotcol)
488  task3 = column3 / avg;
489  else
490  task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
491 
492  size_t ind0 = send_offset[task0] + send_count[task0]++;
493  for(int j = 0; j < 3; j++)
494  partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
495 
496  if(task1 != task0)
497  {
498  size_t ind1 = send_offset[task1] + send_count[task1]++;
499 
500  for(int j = 0; j < 3; j++)
501  partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
502  }
503  if(task2 != task1 && task2 != task0)
504  {
505  size_t ind2 = send_offset[task2] + send_count[task2]++;
506 
507  for(int j = 0; j < 3; j++)
508  partout[ind2].IntPos[j] = Sp->P[i].IntPos[j];
509  }
510  if(task3 != task0 && task3 != task1 && task3 != task2)
511  {
512  size_t ind3 = send_offset[task3] + send_count[task3]++;
513 
514  for(int j = 0; j < 3; j++)
515  partout[ind3].IntPos[j] = Sp->P[i].IntPos[j];
516  }
517 #endif
518  }
519  }
520 
521  int flag_big = 0, flag_big_all;
522  for(int i = 0; i < NTask; i++)
523  if(Sndpm_count[i] * sizeof(partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
524  flag_big = 1;
525 
526  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
527  * transfer the data in chunks.
528  */
529  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
530 
531  /* exchange particle data */
532  myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(partbuf), flag_big_all, Communicator);
533 
534  Mem.myfree(partout);
535 }
536 
537 void ngenic::ngenic_compute_transform_of_source_potential(fft_real *pot)
538 {
539  fft_real *workspace = (fft_real *)Mem.mymalloc("workspace", maxfftsize * sizeof(fft_real));
540 
541 #ifndef FFT_COLUMN_BASED
542  my_slab_based_fft(&myplan, &pot[0], &workspace[0], +1);
543 #else
544  my_column_based_fft(&myplan, pot, workspace, +1); // result is in workspace, not in Phi2
545  memcpy(pot, workspace, maxfftsize * sizeof(fft_real));
546 #endif
547 
548  Mem.myfree(workspace);
549 
550  double normfac = 1 / (((double)GRIDX) * GRIDY * GRIDZ);
551 
552  for(size_t n = 0; n < maxfftsize; n++)
553  pot[n] *= normfac;
554 }
555 
556 /* this function returns the component 'axes' (0, 1, or 2) of the gradient of a field phi,
557  * which is the solution of nabla^2 phi = grid.
558  * We input the Fourier transform of grid to the function, and this field is overwritten with
559  * the gradient.
560  */
561 void ngenic::ngenic_get_derivate_from_fourier_field(int axes1, int axes2, fft_complex *fft_of_grid)
562 {
563  double kfacx = 2.0 * M_PI / All.BoxSize;
564  double kfacy = 2.0 * M_PI / All.BoxSize;
565  double kfacz = 2.0 * M_PI / All.BoxSize;
566 
567 #ifdef FFT_COLUMN_BASED
568  for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
569  {
570  large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
571  int y = ipcell / (GRIDX * GRIDz);
572  int yr = ipcell % (GRIDX * GRIDz);
573  int z = yr / GRIDX;
574  if(yr % GRIDX != 0) // Note: check that x-columns are really complete
575  Terminate("x-column seems incomplete. This is not expected");
576 #else
577  for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
578  for(int z = 0; z < GRIDz; z++)
579  {
580 #endif
581 
582  for(int x = 0; x < GRIDX; x++)
583  {
584  int xx, yy, zz;
585 
586  if(x >= (GRIDX / 2))
587  xx = x - GRIDX;
588  else
589  xx = x;
590  if(y >= (GRIDY / 2))
591  yy = y - GRIDY;
592  else
593  yy = y;
594  if(z >= (GRIDZ / 2))
595  zz = z - GRIDZ;
596  else
597  zz = z;
598 
599  double kvec[3];
600  kvec[0] = kfacx * xx;
601  kvec[1] = kfacy * yy;
602  kvec[2] = kfacz * zz;
603 
604  double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
605 
606  double smth = 1;
607 
608 #ifdef CORRECT_CIC
609  if(axes2 >= 0)
610  {
611  /* do deconvolution of CIC interpolation */
612  double fx = 1, fy = 1, fz = 1;
613  if(kvec[0] != 0)
614  {
615  fx = (kvec[0] * All.BoxSize / 2) / NGENIC;
616  fx = sin(fx) / fx;
617  }
618  if(kvec[1] != 0)
619  {
620  fy = (kvec[1] * All.BoxSize / 2) / NGENIC;
621  fy = sin(fy) / fy;
622  }
623  if(kvec[2] != 0)
624  {
625  fz = (kvec[2] * All.BoxSize / 2) / NGENIC;
626  fz = sin(fz) / fz;
627  }
628  double ff = 1 / (fx * fy * fz);
629  smth = ff * ff;
630  /* end deconvolution */
631  }
632 #endif
633 
634 #ifndef FFT_COLUMN_BASED
635  large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
636 #else
637  large_array_offset elem = ip + x;
638 #endif
639 
640  fft_real re = smth * fft_of_grid[elem][0];
641  fft_real im = smth * fft_of_grid[elem][1];
642 
643  if(axes2 < 0)
644  {
645  /* first derivative */
646  fft_of_grid[elem][0] = (kmag2 > 0.0 ? -kvec[axes1] / kmag2 * im : 0.0);
647  fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] / kmag2 * re : 0.0);
648  }
649  else
650  {
651  /* second derivative */
652  fft_of_grid[elem][0] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * re : 0.0);
653  fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * im : 0.0);
654  }
655  }
656  }
657 
658 #ifdef FFT_COLUMN_BASED
659  if(myplan.second_transposed_firstcol == 0)
660  fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
661 #else
662  if(myplan.slabstart_y == 0)
663  fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
664 #endif
665 
666  /* Do the inverse FFT to get the displacement field */
667  fft_real *workspace = (fft_real *)Mem.mymalloc("workspace", maxfftsize * sizeof(fft_real));
668 
669 #ifndef FFT_COLUMN_BASED
670  my_slab_based_fft(&myplan, &fft_of_grid[0], &workspace[0], -1);
671 #else
672  my_column_based_fft(&myplan, fft_of_grid, workspace, -1); // result is in workspace
673  memcpy(fft_of_grid, workspace, maxfftsize * sizeof(fft_real));
674 #endif
675 
676  Mem.myfree(workspace);
677 }
678 
679 void ngenic::ngenic_setup_modes_in_kspace(fft_complex *fft_of_grid)
680 {
681  double fac = pow(2 * M_PI / All.BoxSize, 1.5);
682 
683  /* clear local FFT-mesh */
684  memset(fft_of_grid, 0, maxfftsize * sizeof(fft_real));
685 
686  mpi_printf("NGENIC: setting up modes in kspace...\n");
687 
688  double kfacx = 2.0 * M_PI / All.BoxSize;
689  double kfacy = 2.0 * M_PI / All.BoxSize;
690  double kfacz = 2.0 * M_PI / All.BoxSize;
691 
692 #ifdef FFT_COLUMN_BASED
693  for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
694  {
695  large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
696  int y = ipcell / (GRIDX * GRIDz);
697  int yr = ipcell % (GRIDX * GRIDz);
698  int z = yr / GRIDX;
699  if(yr % GRIDX != 0) // Note: check that x-columns are really complete
700  Terminate("x-column seems incomplete. This is not expected");
701 #else
702  for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
703  for(int z = 0; z < GRIDz; z++)
704  {
705 #endif
706 
707  // let's use the y and z plane here, because the x-column is available in full for both FFT schemes
708  gsl_rng_set(rnd_generator, seedtable[y * NGENIC + z]);
709 
710  // we also create the modes for the conjugate column so that we can fulfill the reality constraint
711  // by using the conjugate of the corresponding mode if needed
712  int y_conj, z_conj;
713  if(y > 0)
714  y_conj = GRIDY - y;
715  else
716  y_conj = 0;
717 
718  if(z > 0)
719  z_conj = GRIDZ - z;
720  else
721  z_conj = 0;
722 
723  gsl_rng_set(rnd_generator_conjugate, seedtable[y_conj * NGENIC + z_conj]);
724 
725 #ifndef NGENIC_FIX_MODE_AMPLITUDES
726  double mode_ampl[GRIDX], mode_ampl_conj[GRIDX];
727 #endif
728  double mode_phase[GRIDX], mode_phase_conj[GRIDX];
729 
730  // in this loop we precompute the modes for both columns, from low-k to high-k,
731  // so that after an increase of resolution, one gets the same modes plus new ones
732  for(int xoff = 0; xoff < GRIDX / 2; xoff++)
733  for(int side = 0; side < 2; side++)
734  {
735  int x;
736  if(side == 0)
737  x = xoff;
738  else
739  x = GRIDX - 1 - xoff;
740 
741  double phase = gsl_rng_uniform(rnd_generator) * 2 * M_PI;
742  double phase_conj = gsl_rng_uniform(rnd_generator_conjugate) * 2 * M_PI;
743 
744 #ifdef NGENIC_MIRROR_PHASES
745  phase += M_PI;
746  if(phase >= 2 * M_PI)
747  phase -= 2 * M_PI;
748 
749  phase_conj += M_PI;
750  if(phase_conj >= 2 * M_PI)
751  phase_conj -= 2 * M_PI;
752 #endif
753  mode_phase[x] = phase;
754  mode_phase_conj[x] = phase_conj;
755 
756 #ifndef NGENIC_FIX_MODE_AMPLITUDES
757  double ampl;
758  do
759  {
760  ampl = gsl_rng_uniform(rnd_generator);
761  }
762  while(ampl == 0);
763 
764  double ampl_conj;
765  do
766  {
767  ampl_conj = gsl_rng_uniform(rnd_generator_conjugate);
768  }
769  while(ampl_conj == 0);
770 
771  mode_ampl[x] = ampl;
772 
773  mode_ampl_conj[x] = ampl_conj;
774 #endif
775  }
776 
777  // now let's populate the full x-column of modes
778  for(int x = 0; x < GRIDX; x++)
779  {
780  int xx, yy, zz;
781 
782  if(x >= (GRIDX / 2))
783  xx = x - GRIDX;
784  else
785  xx = x;
786  if(y >= (GRIDY / 2))
787  yy = y - GRIDY;
788  else
789  yy = y;
790  if(z >= (GRIDZ / 2))
791  zz = z - GRIDZ;
792  else
793  zz = z;
794 
795  double kvec[3];
796  kvec[0] = kfacx * xx;
797  kvec[1] = kfacy * yy;
798  kvec[2] = kfacz * zz;
799 
800  double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
801  double kmag = sqrt(kmag2);
802 
803  if(All.SphereMode == 1)
804  {
805  if(kmag * All.BoxSize / (2 * M_PI) > All.NSample / 2) /* select a sphere in k-space */
806  continue;
807  }
808  else
809  {
810  if(fabs(kvec[0]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
811  continue;
812  if(fabs(kvec[1]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
813  continue;
814  if(fabs(kvec[2]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
815  continue;
816  }
817 
818  double p_of_k = ngenic_power_spec(kmag);
819 
820  /* Note: kmag and p_of_k are unaffected by whether or not we use the conjugate mode */
821 
822  int conjugate_flag = 0;
823 
824  if(z == 0 || z == GRIDZ / 2)
825  {
826  if(x > GRIDX / 2 && x < GRIDX)
827  conjugate_flag = 1;
828  else if(x == 0 || x == GRIDX / 2)
829  {
830  if(y > GRIDY / 2 && y < GRIDY)
831  conjugate_flag = 1;
832  else if(y == 0 || y == GRIDX / 2)
833  {
834  continue;
835  }
836  }
837  }
838 
839  // determine location of conjugate mode in x column
840  int x_conj;
841 
842  if(x > 0)
843  x_conj = GRIDX - x;
844  else
845  x_conj = 0;
846 
847 #ifndef NGENIC_FIX_MODE_AMPLITUDES
848  if(conjugate_flag)
849  p_of_k *= -log(mode_ampl_conj[x_conj]);
850  else
851  p_of_k *= -log(mode_ampl[x]);
852 #endif
853 
854  double delta = fac * sqrt(p_of_k) / Dplus; /* scale back to starting redshift */
855 
856 #ifndef FFT_COLUMN_BASED
857  large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
858 #else
859  large_array_offset elem = ip + x;
860 #endif
861 
862  if(conjugate_flag)
863  {
864  fft_of_grid[elem][0] = delta * cos(mode_phase_conj[x_conj]);
865  fft_of_grid[elem][1] = -delta * sin(mode_phase_conj[x_conj]);
866  }
867  else
868  {
869  fft_of_grid[elem][0] = delta * cos(mode_phase[x]);
870  fft_of_grid[elem][1] = delta * sin(mode_phase[x]);
871  }
872  }
873  }
874 }
875 
876 void ngenic::ngenic_readout_disp(fft_real *grid, int axis, double pfac, double vfac)
877 {
878 #ifdef FFT_COLUMN_BASED
879  int columns = GRIDX * GRIDY;
880  int avg = (columns - 1) / NTask + 1;
881  int exc = NTask * avg - columns;
882  int tasklastsection = NTask - exc;
883  int pivotcol = tasklastsection * avg;
884 #endif
885 
886  double *flistin = (double *)Mem.mymalloc("flistin", nimport * sizeof(double));
887  double *flistout = (double *)Mem.mymalloc("flistout", nexport * sizeof(double));
888 
889  for(size_t i = 0; i < nimport; i++)
890  {
891  flistin[i] = 0;
892 
893  int slab_x = partin[i].IntPos[0] / INTCELL;
894  MyIntPosType rmd_x = partin[i].IntPos[0] % INTCELL;
895 
896  int slab_y = partin[i].IntPos[1] / INTCELL;
897  MyIntPosType rmd_y = partin[i].IntPos[1] % INTCELL;
898 
899  int slab_z = partin[i].IntPos[2] / INTCELL;
900  MyIntPosType rmd_z = partin[i].IntPos[2] % INTCELL;
901 
902  double dx = rmd_x * (1.0 / INTCELL);
903  double dy = rmd_y * (1.0 / INTCELL);
904  double dz = rmd_z * (1.0 / INTCELL);
905 
906  int slab_xx = slab_x + 1;
907  int slab_yy = slab_y + 1;
908  int slab_zz = slab_z + 1;
909 
910  if(slab_xx >= GRIDX)
911  slab_xx = 0;
912  if(slab_yy >= GRIDY)
913  slab_yy = 0;
914  if(slab_zz >= GRIDZ)
915  slab_zz = 0;
916 
917 #ifndef FFT_COLUMN_BASED
918  if(myplan.slab_to_task[slab_x] == ThisTask)
919  {
920  slab_x -= myplan.first_slab_x_of_task[ThisTask];
921 
922  flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
923  grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
924  grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
925  grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
926  }
927 
928  if(myplan.slab_to_task[slab_xx] == ThisTask)
929  {
930  slab_xx -= myplan.first_slab_x_of_task[ThisTask];
931 
932  flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
933  grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
934  grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
935  grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
936  }
937 #else
938  int column0 = slab_x * GRIDY + slab_y;
939  int column1 = slab_x * GRIDY + slab_yy;
940  int column2 = slab_xx * GRIDY + slab_y;
941  int column3 = slab_xx * GRIDY + slab_yy;
942 
943  if(column0 >= myplan.firstcol_XY && column0 <= myplan.lastcol_XY)
944  {
945  flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
946  grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
947  }
948  if(column1 >= myplan.firstcol_XY && column1 <= myplan.lastcol_XY)
949  {
950  flistin[i] +=
951  grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
952  }
953 
954  if(column2 >= myplan.firstcol_XY && column2 <= myplan.lastcol_XY)
955  {
956  flistin[i] +=
957  grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
958  }
959 
960  if(column3 >= myplan.firstcol_XY && column3 <= myplan.lastcol_XY)
961  {
962  flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
963  }
964 #endif
965  }
966 
967  /* exchange the potential component data */
968  int flag_big = 0, flag_big_all;
969  for(int i = 0; i < NTask; i++)
970  if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
971  flag_big = 1;
972 
973  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
974  * transfer the data in chunks.
975  */
976  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
977 
978  /* exchange data */
979  myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, Communicator);
980 
981  {
982  size_t *send_count = Sndpm_count;
983  size_t *send_offset = Sndpm_offset;
984 
985  for(int j = 0; j < NTask; j++)
986  send_count[j] = 0;
987 
988  for(int i = 0; i < Sp->NumPart; i++)
989  {
990  int slab_x = Sp->P[i].IntPos[0] / INTCELL;
991  int slab_xx = slab_x + 1;
992 
993  if(slab_xx >= GRIDX)
994  slab_xx = 0;
995 
996 #ifndef FFT_COLUMN_BASED
997  int task0 = myplan.slab_to_task[slab_x];
998  int task1 = myplan.slab_to_task[slab_xx];
999 
1000  double value = flistout[send_offset[task0] + send_count[task0]++];
1001 
1002  if(task0 != task1)
1003  value += flistout[send_offset[task1] + send_count[task1]++];
1004 #else
1005  int slab_y = Sp->P[i].IntPos[1] / INTCELL;
1006  int slab_yy = slab_y + 1;
1007 
1008  if(slab_yy >= GRIDY)
1009  slab_yy = 0;
1010 
1011  int column0 = slab_x * GRIDY + slab_y;
1012  int column1 = slab_x * GRIDY + slab_yy;
1013  int column2 = slab_xx * GRIDY + slab_y;
1014  int column3 = slab_xx * GRIDY + slab_yy;
1015 
1016  int task0, task1, task2, task3;
1017 
1018  if(column0 < pivotcol)
1019  task0 = column0 / avg;
1020  else
1021  task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1022 
1023  if(column1 < pivotcol)
1024  task1 = column1 / avg;
1025  else
1026  task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1027 
1028  if(column2 < pivotcol)
1029  task2 = column2 / avg;
1030  else
1031  task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1032 
1033  if(column3 < pivotcol)
1034  task3 = column3 / avg;
1035  else
1036  task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1037 
1038  double value = flistout[send_offset[task0] + send_count[task0]++];
1039 
1040  if(task1 != task0)
1041  value += flistout[send_offset[task1] + send_count[task1]++];
1042 
1043  if(task2 != task1 && task2 != task0)
1044  value += flistout[send_offset[task2] + send_count[task2]++];
1045 
1046  if(task3 != task0 && task3 != task1 && task3 != task2)
1047  value += flistout[send_offset[task3] + send_count[task3]++];
1048 #endif
1049 
1050  Pdisp[i].deltapos[axis] += pfac * value;
1051  Sp->P[i].Vel[axis] += vfac * value;
1052  }
1053  }
1054 
1055  Mem.myfree(flistout);
1056  Mem.myfree(flistin);
1057 }
1058 
1059 void ngenic::ngenic_initialize_ffts(void)
1060 {
1061 #ifdef LONG_X
1062  if(LONG_X != (int)(LONG_X))
1063  Terminate("LONG_X must be an integer if used with PMGRID");
1064 #endif
1065 
1066 #ifdef LONG_Y
1067  if(LONG_Y != (int)(LONG_Y))
1068  Terminate("LONG_Y must be an integer if used with PMGRID");
1069 #endif
1070 
1071 #ifdef LONG_Z
1072  if(LONG_Z != (int)(LONG_Z))
1073  Terminate("LONG_Z must be an integer if used with PMGRID");
1074 #endif
1075 
1076  /* Set up the FFTW-3 plan files. */
1077  int ndimx[1] = {GRIDX}; /* dimension of the 1D transforms */
1078  int ndimy[1] = {GRIDY}; /* dimension of the 1D transforms */
1079  int ndimz[1] = {GRIDZ}; /* dimension of the 1D transforms */
1080 
1081  int max_GRID2 = 2 * (std::max<int>(std::max<int>(GRIDX, GRIDY), GRIDZ) / 2 + 1);
1082 
1083  /* temporarily allocate some arrays to make sure that out-of-place plans are created */
1084  fft_real *DispGrid = (fft_real *)Mem.mymalloc("DispGrid", max_GRID2 * sizeof(fft_real));
1085  fft_complex *fft_of_DispGrid = (fft_complex *)Mem.mymalloc("DispGrid", max_GRID2 * sizeof(fft_real));
1086 
1087 #ifdef DOUBLEPRECISION_FFTW
1088  int alignflag = 0;
1089 #else
1090  /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */
1091  int alignflag = FFTW_UNALIGNED;
1092 #endif
1093 
1094 #ifndef FFT_COLUMN_BASED
1095  int stride = GRIDz;
1096 #else
1097  int stride = 1;
1098 #endif
1099 
1100  myplan.backward_plan_xdir =
1101  FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0, stride, GRIDz * GRIDX,
1102  FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1103 
1104  myplan.backward_plan_ydir =
1105  FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0, stride, GRIDz * GRIDY,
1106  FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1107 
1108  myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)DispGrid, 0, 1, GRIDz, (fft_real *)fft_of_DispGrid,
1109  0, 1, GRID2, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1110 
1111  myplan.forward_plan_xdir = FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0,
1112  stride, GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1113 
1114  myplan.forward_plan_ydir = FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0,
1115  stride, GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1116 
1117  myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndimz, 1, DispGrid, 0, 1, GRID2, (fft_complex *)fft_of_DispGrid, 0, 1, GRIDz,
1118  FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1119 
1120  Mem.myfree(fft_of_DispGrid);
1121  Mem.myfree(DispGrid);
1122 
1123 #ifndef FFT_COLUMN_BASED
1124 
1125  my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1126 
1127  maxfftsize = std::max<int>(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2);
1128 
1129 #else
1130 
1131  my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1132 
1133  maxfftsize = myplan.max_datasize;
1134 
1135 #endif
1136 }
1137 
1138 void ngenic::print_spec(void)
1139 {
1140  if(ThisTask == 0)
1141  {
1142  char buf[3 * MAXLEN_PATH];
1143  sprintf(buf, "%s/inputspec_%s.txt", All.OutputDir, All.SnapshotFileBase);
1144 
1145  FILE *fd = fopen(buf, "w");
1146 
1147  double gf = ngenic_growth_factor(0.001, 1.0) / (1.0 / 0.001);
1148 
1149  double DDD = ngenic_growth_factor(All.cf_atime, 1.0);
1150 
1151  fprintf(fd, "%12g %12g\n", All.cf_redshift, DDD); /* print actual starting redshift and
1152  linear growth factor for this cosmology */
1153 
1154  double kstart = 2 * M_PI / (1000.0 * (1e6 * PARSEC / All.UnitLength_in_cm)); /* 1000 Mpc/h */
1155  double kend = 2 * M_PI / (0.001 * (3.1e6 * PARSEC / All.UnitLength_in_cm)); /* 0.001 Mpc/h */
1156 
1157  for(double k = kstart; k < kend; k *= 1.025)
1158  {
1159  double po = ngenic_power_spec(k);
1160  double dl = 4.0 * M_PI * k * k * k * po;
1161 
1162  double kf = 0.5;
1163 
1164  double po2 = ngenic_power_spec(1.001 * k * kf);
1165  double po1 = ngenic_power_spec(k * kf);
1166  double dnl = 0, knl = 0;
1167 
1168  if(po != 0 && po1 != 0 && po2 != 0)
1169  {
1170  double neff = (log(po2) - log(po1)) / (log(1.001 * k * kf) - log(k * kf));
1171 
1172  if(1 + neff / 3 > 0)
1173  {
1174  double A = 0.482 * pow(1 + neff / 3, -0.947);
1175  double B = 0.226 * pow(1 + neff / 3, -1.778);
1176  double alpha = 3.310 * pow(1 + neff / 3, -0.244);
1177  double beta = 0.862 * pow(1 + neff / 3, -0.287);
1178  double V = 11.55 * pow(1 + neff / 3, -0.423) * 1.2;
1179 
1180  dnl = fnl(dl, A, B, alpha, beta, V, gf);
1181  knl = k * pow(1 + dnl, 1.0 / 3);
1182  }
1183  }
1184 
1185  fprintf(fd, "%12g %12g %12g %12g\n", k, dl, knl, dnl);
1186  }
1187  fclose(fd);
1188  }
1189 
1190  /* create an output file with the growth factor as a function of redshift, which
1191  * can be handy in analysis routines
1192  */
1193  if(ThisTask == 0)
1194  {
1195  if(All.cf_atime < 1.0)
1196  {
1197  char buf[3 * MAXLEN_PATH];
1198  sprintf(buf, "%s/growthfac.txt", All.OutputDir);
1199 
1200  FILE *fd = fopen(buf, "w");
1201 
1202  const int NSTEPS = 100;
1203 
1204  for(int i = 0; i <= NSTEPS; i++)
1205  {
1206  double a = exp(log(All.cf_atime) + ((log(1.0) - log(All.cf_atime)) / NSTEPS) * i);
1207 
1208  double d = ngenic_growth_factor(a, 1.0);
1209 
1210  fprintf(fd, "%12g %12g\n", a, 1.0 / d);
1211  }
1212  fclose(fd);
1213  }
1214  }
1215 
1216  /* also create an output file with the sigma(M), i.e. the variance as a function of the mass scale,
1217  * for simplifying analytic mass function plots, such as Press-Schechter
1218  */
1219 
1220  if(ThisTask == 0)
1221  {
1222  char buf[3 * MAXLEN_PATH];
1223  sprintf(buf, "%s/variance.txt", All.OutputDir);
1224 
1225  FILE *fd = fopen(buf, "w");
1226 
1227  double rhoback = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
1228 
1229  for(double M = 1.0e5; M <= 1.01e16; M *= pow(10.0, 1.0 / 16)) // mass in solar masses / h
1230  {
1231  double Mint = M * (SOLAR_MASS / All.UnitMass_in_g);
1232 
1233  double R = pow(3.0 * Mint / (4 * M_PI * rhoback), 1.0 / 3);
1234 
1235  double sigma2 = ngenic_tophat_sigma2(R);
1236 
1237  fprintf(fd, "%12g %12g %12g %12g\n", M, Mint, sigma2, sqrt(sigma2));
1238  }
1239 
1240  fclose(fd);
1241  }
1242 }
1243 
1244 #endif
global_data_all_processes All
Definition: main.cc:40
MPI_Comm SharedMemComm
int Island_ThisTask
int Island_NTask
void ** SharedMemBaseAddr
int MyShmRankInGlobal
#define SOLAR_MASS
Definition: constants.h:119
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
Definition: constants.h:53
#define MAXLEN_PATH
Definition: constants.h:300
#define PARSEC
Definition: constants.h:123
#define M_PI
Definition: constants.h:56
#define LONG_Y
Definition: dtypes.h:369
uint32_t MyIntPosType
Definition: dtypes.h:35
int32_t MySignedIntPosType
Definition: dtypes.h:36
#define LONG_X
Definition: dtypes.h:361
#define LONG_Z
Definition: dtypes.h:377
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define Terminate(...)
Definition: macros.h:19
shmem Shmem
Definition: main.cc:45
#define TAG_TABLE_FREE
Definition: mpi_utils.h:24
#define TAG_DMOM
Definition: mpi_utils.h:30
#define TAG_TABLE_ALLOC
Definition: mpi_utils.h:23
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:177
#define DDD(x)
Definition: mymalloc.h:24
memory Mem
Definition: main.cc:44
expr exp(half arg)
Definition: half.hpp:2724
expr cos(half arg)
Definition: half.hpp:2823
expr sin(half arg)
Definition: half.hpp:2816
half fabs(half arg)
Definition: half.hpp:2627
expr log(half arg)
Definition: half.hpp:2745
expr pow(half base, half exp)
Definition: half.hpp:2803
expr sqrt(half arg)
Definition: half.hpp:2777
#define FFTW(x)
Definition: pm_mpi_fft.h:22
char OutputDir[MAXLEN_PATH]
Definition: allvars.h:272
char SnapshotFileBase[MAXLEN_PATH]
Definition: allvars.h:272
void set_cosmo_factors_for_current_time(void)
Definition: allvars.cc:20