Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <limits.h>
10 : #include <math.h>
11 : #include <stdio.h>
12 : #include <stdlib.h>
13 : #include <string.h>
14 :
15 : #include "../common/grid_common.h"
16 : #include "../common/grid_library.h"
17 :
18 : #if (GRID_DO_COLLOCATE)
19 : #define GRID_CONST_WHEN_COLLOCATE const
20 : #define GRID_CONST_WHEN_INTEGRATE
21 : #else
22 : #define GRID_CONST_WHEN_COLLOCATE
23 : #define GRID_CONST_WHEN_INTEGRATE const
24 : #endif
25 :
26 : /*******************************************************************************
27 : * \brief Collocates coefficients C_x onto the grid for orthorhombic case.
28 : * \author Ole Schuett
29 : ******************************************************************************/
30 : static inline void
31 221940 : ortho_cx_to_grid(const int lp, const int k1, const int k2, const int j1,
32 : const int j2, const int cmax,
33 221940 : const double pol[3][lp + 1][2 * cmax + 1],
34 : const int map[3][2 * cmax + 1], const double dh[3][3],
35 : const double dh_inv[3][3], const double kremain,
36 : const int npts_local[3], GRID_CONST_WHEN_COLLOCATE double *cx,
37 : GRID_CONST_WHEN_INTEGRATE double *grid) {
38 :
39 221940 : const int kg1 = map[2][k1 + cmax];
40 221940 : const int kg2 = map[2][k2 + cmax];
41 221940 : const int jg1 = map[1][j1 + cmax];
42 221940 : const int jg2 = map[1][j2 + cmax];
43 :
44 : // For this innermost dimension we're going sequentially instead of in pairs.
45 : // Hence we're processing four points at once, which is well suited for AVX2.
46 221940 : const int jd = (2 * j1 - 1) / 2; // distance from center in grid points
47 221940 : const double jr = jd * dh[1][1]; // distance from center in a.u.
48 221940 : const double jremain = kremain - jr * jr;
49 221940 : const int istart = (int)ceil(-1e-8 - sqrt(fmax(0.0, jremain)) * dh_inv[0][0]);
50 221940 : const int iend = 1 - istart;
51 :
52 3833244 : for (int i = istart; i <= iend; i++) {
53 3611304 : const int ig = map[0][i + cmax];
54 :
55 3611304 : const int stride = npts_local[1] * npts_local[0];
56 3611304 : const int grid_index_0 = kg1 * stride + jg1 * npts_local[0] + ig;
57 3611304 : const int grid_index_1 = kg2 * stride + jg1 * npts_local[0] + ig;
58 3611304 : const int grid_index_2 = kg1 * stride + jg2 * npts_local[0] + ig;
59 3611304 : const int grid_index_3 = kg2 * stride + jg2 * npts_local[0] + ig;
60 :
61 3611304 : GRID_CONST_WHEN_INTEGRATE double *grid_0 = &grid[grid_index_0];
62 3611304 : GRID_CONST_WHEN_INTEGRATE double *grid_1 = &grid[grid_index_1];
63 3611304 : GRID_CONST_WHEN_INTEGRATE double *grid_2 = &grid[grid_index_2];
64 3611304 : GRID_CONST_WHEN_INTEGRATE double *grid_3 = &grid[grid_index_3];
65 :
66 : #if (GRID_DO_COLLOCATE)
67 : // collocate
68 1805652 : double reg[4] = {0.0, 0.0, 0.0, 0.0};
69 7897392 : for (int lxp = 0; lxp <= lp; lxp++) {
70 6091740 : const double p = pol[0][lxp][i + cmax];
71 6091740 : reg[0] += cx[lxp * 4 + 0] * p;
72 6091740 : reg[1] += cx[lxp * 4 + 1] * p;
73 6091740 : reg[2] += cx[lxp * 4 + 2] * p;
74 6091740 : reg[3] += cx[lxp * 4 + 3] * p;
75 : }
76 1805652 : *grid_0 += reg[0];
77 1805652 : *grid_1 += reg[1];
78 1805652 : *grid_2 += reg[2];
79 1805652 : *grid_3 += reg[3];
80 :
81 : #else
82 : // integrate
83 1805652 : const double reg[4] = {*grid_0, *grid_1, *grid_2, *grid_3};
84 8098020 : for (int lxp = 0; lxp <= lp; lxp++) {
85 6292368 : const double p = pol[0][lxp][i + cmax];
86 6292368 : cx[lxp * 4 + 0] += reg[0] * p;
87 6292368 : cx[lxp * 4 + 1] += reg[1] * p;
88 6292368 : cx[lxp * 4 + 2] += reg[2] * p;
89 6292368 : cx[lxp * 4 + 3] += reg[3] * p;
90 : }
91 : #endif
92 : }
93 221940 : }
94 :
95 : /*******************************************************************************
96 : * \brief Transforms coefficients C_xy into C_x by fixing grid index j.
97 : * \author Ole Schuett
98 : ******************************************************************************/
99 221940 : static inline void ortho_cxy_to_cx(const int lp, const int j1, const int j2,
100 : const int cmax,
101 221940 : const double pol[3][lp + 1][2 * cmax + 1],
102 : GRID_CONST_WHEN_COLLOCATE double *cxy,
103 : GRID_CONST_WHEN_INTEGRATE double *cx) {
104 :
105 973818 : for (int lyp = 0; lyp <= lp; lyp++) {
106 2441602 : for (int lxp = 0; lxp <= lp - lyp; lxp++) {
107 1689724 : const double p1 = pol[1][lyp][j1 + cmax];
108 1689724 : const double p2 = pol[1][lyp][j2 + cmax];
109 1689724 : const int cxy_index = lyp * (lp + 1) * 2 + lxp * 2; // [lyp, lxp, 0]
110 :
111 : #if (GRID_DO_COLLOCATE)
112 : // collocate
113 818154 : cx[lxp * 4 + 0] += cxy[cxy_index + 0] * p1;
114 818154 : cx[lxp * 4 + 1] += cxy[cxy_index + 1] * p1;
115 818154 : cx[lxp * 4 + 2] += cxy[cxy_index + 0] * p2;
116 818154 : cx[lxp * 4 + 3] += cxy[cxy_index + 1] * p2;
117 : #else
118 : // integrate
119 871570 : cxy[cxy_index + 0] += cx[lxp * 4 + 0] * p1;
120 871570 : cxy[cxy_index + 1] += cx[lxp * 4 + 1] * p1;
121 871570 : cxy[cxy_index + 0] += cx[lxp * 4 + 2] * p2;
122 871570 : cxy[cxy_index + 1] += cx[lxp * 4 + 3] * p2;
123 : #endif
124 : }
125 : }
126 221940 : }
127 :
128 : /*******************************************************************************
129 : * \brief Collocates coefficients C_xy onto the grid for orthorhombic case.
130 : * \author Ole Schuett
131 : ******************************************************************************/
132 28620 : static inline void ortho_cxy_to_grid(
133 : const int lp, const int k1, const int k2, const int cmax,
134 28620 : const double pol[3][lp + 1][2 * cmax + 1], const int map[3][2 * cmax + 1],
135 : const double dh[3][3], const double dh_inv[3][3], const double disr_radius,
136 : const int npts_local[3], GRID_CONST_WHEN_COLLOCATE double *cxy,
137 28620 : GRID_CONST_WHEN_INTEGRATE double *grid) {
138 :
139 : // The cube contains an even number of grid points in each direction and
140 : // collocation is always performed on a pair of two opposing grid points.
141 : // Hence, the points with index 0 and 1 are both assigned distance zero via
142 : // the formular distance=(2*index-1)/2.
143 :
144 28620 : const int kd = (2 * k1 - 1) / 2; // distance from center in grid points
145 28620 : const double kr = kd * dh[2][2]; // distance from center in a.u.
146 28620 : const double kremain = disr_radius * disr_radius - kr * kr;
147 28620 : const int jstart = (int)ceil(-1e-8 - sqrt(fmax(0.0, kremain)) * dh_inv[1][1]);
148 :
149 28620 : const size_t cx_size = (lp + 1) * 4;
150 28620 : double cx[cx_size];
151 250560 : for (int j1 = jstart; j1 <= 0; j1++) {
152 221940 : const int j2 = 1 - j1;
153 221940 : memset(cx, 0, cx_size * sizeof(double));
154 :
155 : #if (GRID_DO_COLLOCATE)
156 : // collocate
157 110970 : ortho_cxy_to_cx(lp, j1, j2, cmax, pol, cxy, cx);
158 110970 : ortho_cx_to_grid(lp, k1, k2, j1, j2, cmax, pol, map, dh, dh_inv, kremain,
159 : npts_local, cx, grid);
160 : #else
161 : // integrate
162 110970 : ortho_cx_to_grid(lp, k1, k2, j1, j2, cmax, pol, map, dh, dh_inv, kremain,
163 : npts_local, cx, grid);
164 110970 : ortho_cxy_to_cx(lp, j1, j2, cmax, pol, cxy, cx);
165 : #endif
166 : }
167 28620 : }
168 :
169 : /*******************************************************************************
170 : * \brief Transforms coefficients C_xyz into C_xz by fixing grid index k.
171 : * \author Ole Schuett
172 : ******************************************************************************/
173 28620 : static inline void ortho_cxyz_to_cxy(const int lp, const int k1, const int k2,
174 : const int cmax,
175 28620 : const double pol[3][lp + 1][2 * cmax + 1],
176 : GRID_CONST_WHEN_COLLOCATE double *cxyz,
177 : GRID_CONST_WHEN_INTEGRATE double *cxy) {
178 :
179 124350 : for (int lzp = 0; lzp <= lp; lzp++) {
180 308074 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
181 603002 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++) {
182 390658 : const double p1 = pol[2][lzp][k1 + cmax];
183 390658 : const double p2 = pol[2][lzp][k2 + cmax];
184 390658 : const int cxyz_index =
185 390658 : lzp * (lp + 1) * (lp + 1) + lyp * (lp + 1) + lxp; // [lzp, lyp, lxp]
186 390658 : const int cxy_index = lyp * (lp + 1) * 2 + lxp * 2; // [lyp, lxp, 0]
187 :
188 : #if (GRID_DO_COLLOCATE)
189 : // collocate
190 186210 : cxy[cxy_index + 0] += cxyz[cxyz_index] * p1;
191 186210 : cxy[cxy_index + 1] += cxyz[cxyz_index] * p2;
192 : #else
193 : // integrate
194 204448 : cxyz[cxyz_index] += cxy[cxy_index + 0] * p1;
195 204448 : cxyz[cxyz_index] += cxy[cxy_index + 1] * p2;
196 : #endif
197 : }
198 : }
199 : }
200 28620 : }
201 :
202 : /*******************************************************************************
203 : * \brief Collocates coefficients C_xyz onto the grid for orthorhombic case.
204 : * \author Ole Schuett
205 : ******************************************************************************/
206 : static inline void
207 3276 : ortho_cxyz_to_grid(const int lp, const double zetp, const double dh[3][3],
208 : const double dh_inv[3][3], const double rp[3],
209 : const int npts_global[3], const int npts_local[3],
210 : const int shift_local[3], const double radius,
211 : GRID_CONST_WHEN_COLLOCATE double *cxyz,
212 3276 : GRID_CONST_WHEN_INTEGRATE double *grid) {
213 :
214 : // *** position of the gaussian product
215 : //
216 : // this is the actual definition of the position on the grid
217 : // i.e. a point rp(:) gets here grid coordinates
218 : // MODULO(rp(:)/dr(:),npts_global(:))+1
219 : // hence (0.0,0.0,0.0) in real space is rsgrid%lb on the rsgrid in Fortran
220 : // and (1,1,1) on grid here in C.
221 :
222 : // cubecenter(:) = FLOOR(MATMUL(dh_inv, rp))
223 3276 : int cubecenter[3];
224 13104 : for (int i = 0; i < 3; i++) {
225 : double dh_inv_rp = 0.0;
226 39312 : for (int j = 0; j < 3; j++) {
227 29484 : dh_inv_rp += dh_inv[j][i] * rp[j];
228 : }
229 9828 : cubecenter[i] = (int)floor(dh_inv_rp);
230 : }
231 :
232 : double roffset[3];
233 13104 : for (int i = 0; i < 3; i++) {
234 9828 : roffset[i] = rp[i] - ((double)cubecenter[i]) * dh[i][i];
235 : }
236 :
237 : // Historically, the radius gets discretized to make sphere bounds cacheable.
238 3276 : const double drmin = fmin(dh[0][0], fmin(dh[1][1], dh[2][2]));
239 3276 : const double disr_radius = drmin * fmax(1.0, ceil(radius / drmin));
240 :
241 : // Cube bounds.
242 3276 : int lb_cube[3], ub_cube[3];
243 13104 : for (int i = 0; i < 3; i++) {
244 9828 : lb_cube[i] = (int)ceil(-1e-8 - disr_radius * dh_inv[i][i]);
245 9828 : ub_cube[i] = 1 - lb_cube[i];
246 : // If grid is not period check that cube fits without wrapping.
247 9828 : if (npts_global[i] != npts_local[i]) {
248 0 : const int offset =
249 0 : modulo(cubecenter[i] + lb_cube[i] - shift_local[i], npts_global[i]) -
250 : lb_cube[i];
251 0 : assert(offset + ub_cube[i] < npts_local[i]);
252 0 : assert(offset + lb_cube[i] >= 0);
253 : }
254 : }
255 :
256 : // cmax = MAXVAL(ub_cube)
257 3276 : const int cmax = imax(imax(ub_cube[0], ub_cube[1]), ub_cube[2]);
258 :
259 : // Precompute (x-xp)**lp*exp(..) for each direction.
260 3276 : double pol_mutable[3][lp + 1][2 * cmax + 1];
261 13104 : for (int idir = 0; idir < 3; idir++) {
262 9828 : const double dr = dh[idir][idir];
263 9828 : const double ro = roffset[idir];
264 : // Reuse the result from the previous gridpoint to avoid to many exps:
265 : // exp( -a*(x+d)**2) = exp(-a*x**2)*exp(-2*a*x*d)*exp(-a*d**2)
266 : // exp(-2*a*(x+d)*d) = exp(-2*a*x*d)*exp(-2*a*d**2)
267 9828 : const double t_exp_1 = exp(-zetp * pow(dr, 2));
268 9828 : const double t_exp_2 = pow(t_exp_1, 2);
269 9828 : double t_exp_min_1 = exp(-zetp * pow(+dr - ro, 2));
270 9828 : double t_exp_min_2 = exp(-2 * zetp * (+dr - ro) * (-dr));
271 95688 : for (int ig = 0; ig >= lb_cube[idir]; ig--) {
272 85860 : const double rpg = ig * dr - ro;
273 85860 : t_exp_min_1 *= t_exp_min_2 * t_exp_1;
274 85860 : t_exp_min_2 *= t_exp_2;
275 85860 : double pg = t_exp_min_1;
276 373050 : for (int icoef = 0; icoef <= lp; icoef++) {
277 287190 : pol_mutable[idir][icoef][ig + cmax] = pg; // exp(-zetp*rpg**2)
278 287190 : pg *= rpg;
279 : }
280 : }
281 9828 : double t_exp_plus_1 = exp(-zetp * pow(-ro, 2));
282 9828 : double t_exp_plus_2 = exp(-2 * zetp * (-ro) * (+dr));
283 95688 : for (int ig = 0; ig >= lb_cube[idir]; ig--) {
284 85860 : const double rpg = (1 - ig) * dr - ro;
285 85860 : t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
286 85860 : t_exp_plus_2 *= t_exp_2;
287 85860 : double pg = t_exp_plus_1;
288 373050 : for (int icoef = 0; icoef <= lp; icoef++) {
289 287190 : pol_mutable[idir][icoef][1 - ig + cmax] = pg; // exp(-zetp*rpg**2)
290 287190 : pg *= rpg;
291 : }
292 : }
293 : }
294 3276 : const double(*pol)[lp + 1][2 * cmax + 1] =
295 3276 : (const double(*)[lp + 1][2 * cmax + 1]) pol_mutable;
296 :
297 : // Precompute mapping from cube to grid indices for each direction
298 3276 : int map_mutable[3][2 * cmax + 1];
299 13104 : for (int i = 0; i < 3; i++) {
300 191376 : for (int k = -cmax; k <= +cmax; k++) {
301 181548 : map_mutable[i][k + cmax] =
302 181548 : modulo(cubecenter[i] + k - shift_local[i], npts_global[i]);
303 : }
304 : }
305 3276 : const int(*map)[2 * cmax + 1] = (const int(*)[2 * cmax + 1]) map_mutable;
306 :
307 : // Loop over k dimension of the cube.
308 3276 : const int kstart = (int)ceil(-1e-8 - disr_radius * dh_inv[2][2]);
309 3276 : const size_t cxy_size = (lp + 1) * (lp + 1) * 2;
310 3276 : double cxy[cxy_size];
311 31896 : for (int k1 = kstart; k1 <= 0; k1++) {
312 28620 : const int k2 = 1 - k1;
313 28620 : memset(cxy, 0, cxy_size * sizeof(double));
314 :
315 : #if (GRID_DO_COLLOCATE)
316 : // collocate
317 14310 : ortho_cxyz_to_cxy(lp, k1, k2, cmax, pol, cxyz, cxy);
318 14310 : ortho_cxy_to_grid(lp, k1, k2, cmax, pol, map, dh, dh_inv, disr_radius,
319 : npts_local, cxy, grid);
320 : #else
321 : // integrate
322 14310 : ortho_cxy_to_grid(lp, k1, k2, cmax, pol, map, dh, dh_inv, disr_radius,
323 : npts_local, cxy, grid);
324 14310 : ortho_cxyz_to_cxy(lp, k1, k2, cmax, pol, cxyz, cxy);
325 : #endif
326 : }
327 3276 : }
328 :
329 : /*******************************************************************************
330 : * \brief Collocates coefficients C_i onto the grid for general case.
331 : * \author Ole Schuett
332 : ******************************************************************************/
333 : static inline void
334 2079910 : general_ci_to_grid(const int lp, const int jg, const int kg, const int ismin,
335 : const int ismax, const int npts_local[3],
336 : const int index_min[3], const int index_max[3],
337 : const int map_i[], const double gp[3], const int k,
338 : const int j, const double exp_ij[], const double exp_jk[],
339 : const double exp_ki[], GRID_CONST_WHEN_COLLOCATE double *ci,
340 : GRID_CONST_WHEN_INTEGRATE double *grid) {
341 :
342 2079910 : const int base = kg * npts_local[1] * npts_local[0] + jg * npts_local[0];
343 :
344 23376248 : for (int i = ismin; i <= ismax; i++) {
345 21296338 : const int ig = map_i[i - index_min[0]];
346 21296338 : if (ig < 0) {
347 0 : continue; // skip over out-of-bounds indicies
348 : }
349 21296338 : const double di = i - gp[0];
350 :
351 21296338 : const int stride_i = index_max[0] - index_min[0] + 1;
352 21296338 : const int stride_j = index_max[1] - index_min[1] + 1;
353 21296338 : const int stride_k = index_max[2] - index_min[2] + 1;
354 21296338 : const int idx_ij = (j - index_min[1]) * stride_i + i - index_min[0];
355 21296338 : const int idx_jk = (k - index_min[2]) * stride_j + j - index_min[1];
356 21296338 : const int idx_ki = (i - index_min[0]) * stride_k + k - index_min[2];
357 :
358 : // Mathieu's trick: Calculate 3D Gaussian from three precomputed 2D tables
359 : //
360 : // r = (i-gp[0])*dh[0,:] + (j-gp[1])*dh[1,:] + (k-gp[2])*dh[2,:]
361 : // = a + b + c
362 : //
363 : // r**2 = (a + b + c)**2 = a**2 + b**2 + c**2 + 2ab + 2bc + 2ca
364 : //
365 : // exp(-r**2) = exp(-a(a+2b)) * exp(-b*(b+2c)) * exp(-c*(c+2a))
366 : //
367 21296338 : const double gaussian = exp_ij[idx_ij] * exp_jk[idx_jk] * exp_ki[idx_ki];
368 :
369 21296338 : const int grid_index = base + ig; // [kg, jg, ig]
370 21296338 : double dip = gaussian;
371 :
372 : #if (GRID_DO_COLLOCATE)
373 : // collocate
374 12169336 : double reg = 0.0;
375 48677344 : for (int il = 0; il <= lp; il++) {
376 36508008 : reg += ci[il] * dip;
377 36508008 : dip *= di;
378 : }
379 12169336 : grid[grid_index] += reg;
380 : #else
381 : // integrate
382 9127002 : const double reg = grid[grid_index];
383 39550342 : for (int il = 0; il <= lp; il++) {
384 30423340 : ci[il] += reg * dip;
385 30423340 : dip *= di;
386 : }
387 : #endif
388 : }
389 2079910 : }
390 :
391 : /*******************************************************************************
392 : * \brief Transforms coefficients C_ij into C_i by fixing grid index j.
393 : * \author Ole Schuett
394 : ******************************************************************************/
395 2079910 : static inline void general_cij_to_ci(const int lp, const double dj,
396 : GRID_CONST_WHEN_COLLOCATE double *cij,
397 : GRID_CONST_WHEN_INTEGRATE double *ci) {
398 2079910 : double djp = 1.0;
399 8616770 : for (int jl = 0; jl <= lp; jl++) {
400 20204840 : for (int il = 0; il <= lp - jl; il++) {
401 13667980 : const int cij_index = jl * (lp + 1) + il; // [jl, il]
402 : #if (GRID_DO_COLLOCATE)
403 7131120 : ci[il] += cij[cij_index] * djp; // collocate
404 : #else
405 6536860 : cij[cij_index] += ci[il] * djp; // integrate
406 : #endif
407 : }
408 6536860 : djp *= dj;
409 : }
410 2079910 : }
411 :
412 : /*******************************************************************************
413 : * \brief Collocates coefficients C_ij onto the grid for general case.
414 : * \author Ole Schuett
415 : ******************************************************************************/
416 175910 : static inline void general_cij_to_grid(
417 : const int lp, const int k, const int kg, const int npts_local[3],
418 : const int index_min[3], const int index_max[3], const int map_i[],
419 : const int map_j[], const double dh[3][3], const double gp[3],
420 : const double radius, const double exp_ij[], const double exp_jk[],
421 : const double exp_ki[], GRID_CONST_WHEN_COLLOCATE double *cij,
422 : GRID_CONST_WHEN_INTEGRATE double *grid) {
423 :
424 4593960 : for (int j = index_min[1]; j <= index_max[1]; j++) {
425 4418050 : const int jg = map_j[j - index_min[1]];
426 4418050 : if (jg < 0) {
427 0 : continue; // skip over out-of-bounds indicies
428 : }
429 4418050 : const double dj = j - gp[1];
430 :
431 : //--------------------------------------------------------------------
432 : // Find bounds for the inner loop based on a quadratic equation in i.
433 : //
434 : // The real-space vector from the center of the gaussian to the
435 : // grid point i,j,k is given by:
436 : // r = (i-gp[0])*dh[0,:] + (j-gp[1])*dh[1,:] + (k-gp[2])*dh[2,:]
437 : //
438 : // Separating the term that depends on i:
439 : // r = i*dh[0,:] - gp[0]*dh[0,:] + (j-gp[1])*dh[1,:] + (k-gp[2])*dh[2,:]
440 : // = i*dh[0,:] + v
441 : //
442 : // The squared distance works out to:
443 : // r**2 = dh[0,:]**2 * i**2 + 2 * v * dh[0,:] * i + v**2
444 : // = a * i**2 + b * i + c
445 : //
446 : // Solving r**2==radius**2 for i yields:
447 : // d = b**2 - 4 * a * (c - radius**2)
448 : // i = (-b \pm sqrt(d)) / (2*a)
449 : //
450 4418050 : double a = 0.0, b = 0.0, c = 0.0;
451 17672200 : for (int i = 0; i < 3; i++) {
452 13254150 : const double v = (0 - gp[0]) * dh[0][i] + (j - gp[1]) * dh[1][i] +
453 13254150 : (k - gp[2]) * dh[2][i];
454 13254150 : a += dh[0][i] * dh[0][i];
455 13254150 : b += 2.0 * v * dh[0][i];
456 13254150 : c += v * v;
457 : }
458 4418050 : const double d = b * b - 4.0 * a * (c - radius * radius);
459 4418050 : if (0.0 < d) {
460 2079910 : const double sqrt_d = sqrt(d);
461 2079910 : const double inv_2a = 1.0 / (2.0 * a);
462 2079910 : const int ismin = (int)ceil((-b - sqrt_d) * inv_2a);
463 2079910 : const int ismax = (int)floor((-b + sqrt_d) * inv_2a);
464 :
465 2079910 : double ci[lp + 1];
466 2079910 : memset(ci, 0, sizeof(ci));
467 :
468 : #if (GRID_DO_COLLOCATE)
469 : // collocate
470 1188520 : general_cij_to_ci(lp, dj, cij, ci);
471 1188520 : general_ci_to_grid(lp, jg, kg, ismin, ismax, npts_local, index_min,
472 : index_max, map_i, gp, k, j, exp_ij, exp_jk, exp_ki, ci,
473 : grid);
474 : #else
475 : // integrate
476 891390 : general_ci_to_grid(lp, jg, kg, ismin, ismax, npts_local, index_min,
477 : index_max, map_i, gp, k, j, exp_ij, exp_jk, exp_ki, ci,
478 : grid);
479 891390 : general_cij_to_ci(lp, dj, cij, ci);
480 : #endif
481 : }
482 : }
483 175910 : }
484 :
485 : /*******************************************************************************
486 : * \brief Transforms coefficients C_ijk into C_ij by fixing grid index k.
487 : * \author Ole Schuett
488 : ******************************************************************************/
489 175910 : static inline void general_cijk_to_cij(const int lp, const double dk,
490 : GRID_CONST_WHEN_COLLOCATE double *cijk,
491 : GRID_CONST_WHEN_INTEGRATE double *cij) {
492 175910 : double dkp = 1.0;
493 728770 : for (int kl = 0; kl <= lp; kl++) {
494 1708840 : for (int jl = 0; jl <= lp - kl; jl++) {
495 3166380 : for (int il = 0; il <= lp - kl - jl; il++) {
496 2010400 : const int cij_index = jl * (lp + 1) + il; // [jl, il]
497 2010400 : const int cijk_index =
498 2010400 : kl * (lp + 1) * (lp + 1) + jl * (lp + 1) + il; // [kl, jl, il]
499 : #if (GRID_DO_COLLOCATE)
500 1005200 : cij[cij_index] += cijk[cijk_index] * dkp; // collocate
501 : #else
502 1005200 : cijk[cijk_index] += cij[cij_index] * dkp; // integrate
503 : #endif
504 : }
505 : }
506 552860 : dkp *= dk;
507 : }
508 175910 : }
509 :
510 : /*******************************************************************************
511 : * \brief Precompute mapping of grid indices for general case.
512 : * \author Ole Schuett
513 : ******************************************************************************/
514 32676 : static inline void general_precompute_mapping(const int index_min,
515 : const int index_max,
516 : const int shift_local,
517 : const int npts_global,
518 : const int bounds[2], int map[]) {
519 :
520 : // Precompute mapping from continous grid indices to pbc wraped.
521 736498 : for (int k = index_min; k <= index_max; k++) {
522 703822 : const int kg = modulo(k - shift_local, npts_global);
523 703822 : if (bounds[0] <= kg && kg <= bounds[1]) {
524 703822 : map[k - index_min] = kg;
525 : } else {
526 0 : map[k - index_min] = INT_MIN; // out of bounds - not mapped
527 : }
528 : }
529 32676 : }
530 :
531 : /*******************************************************************************
532 : * \brief Fill one of the 2D tables that speedup 3D Gaussian (Mathieu's trick).
533 : * \author Ole Schuett
534 : ******************************************************************************/
535 : static inline void
536 32676 : general_fill_exp_table(const int idir, const int jdir, const int index_min[3],
537 : const int index_max[3], const double zetp,
538 : const double dh[3][3], const double gp[3],
539 : double exp_table[]) {
540 :
541 32676 : const int stride_i = index_max[idir] - index_min[idir] + 1;
542 32676 : const double h_ii = dh[idir][0] * dh[idir][0] + dh[idir][1] * dh[idir][1] +
543 32676 : dh[idir][2] * dh[idir][2];
544 32676 : const double h_ij = dh[idir][0] * dh[jdir][0] + dh[idir][1] * dh[jdir][1] +
545 32676 : dh[idir][2] * dh[jdir][2];
546 :
547 736498 : for (int i = index_min[idir]; i <= index_max[idir]; i++) {
548 703822 : const double di = i - gp[idir];
549 703822 : const double rii = di * di * h_ii;
550 703822 : const double rij_unit = di * h_ij;
551 703822 : const double exp_ij_unit = exp(-zetp * 2.0 * rij_unit);
552 :
553 : // compute exponentials symmetrically around cube center
554 703822 : const int j_center = (int)gp[jdir];
555 703822 : const double dj_center = j_center - gp[jdir];
556 703822 : const double rij_center = dj_center * rij_unit;
557 703822 : const double exp_ij_center = exp(-zetp * (rii + 2.0 * rij_center));
558 :
559 : // above center
560 703822 : double exp_ij = exp_ij_center;
561 8769656 : for (int j = j_center; j <= index_max[jdir]; j++) {
562 8065834 : const int idx = (j - index_min[jdir]) * stride_i + i - index_min[idir];
563 8065834 : exp_table[idx] = exp_ij; // exp(-zetp * (di*di*h_ii + 2*di*dj*h_ij));
564 8065834 : exp_ij *= exp_ij_unit;
565 : }
566 :
567 : // below center
568 703822 : const double exp_ij_unit_inv = 1.0 / exp_ij_unit;
569 703822 : exp_ij = exp_ij_center * exp_ij_unit_inv;
570 8150996 : for (int j = j_center - 1; j >= index_min[jdir]; j--) {
571 7447174 : const int idx = (j - index_min[jdir]) * stride_i + i - index_min[idir];
572 7447174 : exp_table[idx] = exp_ij; // exp(-zetp * (di*di*h_ii + 2*di*dj*h_ij));
573 7447174 : exp_ij *= exp_ij_unit_inv;
574 : }
575 : }
576 32676 : }
577 :
578 : /*******************************************************************************
579 : * \brief Collocates coefficients C_ijk onto the grid for general case.
580 : * \author Ole Schuett
581 : ******************************************************************************/
582 : static inline void
583 10892 : general_cijk_to_grid(const int border_mask, const int lp, const double zetp,
584 : const double dh[3][3], const double dh_inv[3][3],
585 : const double rp[3], const int npts_global[3],
586 : const int npts_local[3], const int shift_local[3],
587 : const int border_width[3], const double radius,
588 : GRID_CONST_WHEN_COLLOCATE double *cijk,
589 10892 : GRID_CONST_WHEN_INTEGRATE double *grid) {
590 :
591 : // Default for border_mask == 0.
592 10892 : int bounds_i[2] = {0, npts_local[0] - 1};
593 10892 : int bounds_j[2] = {0, npts_local[1] - 1};
594 10892 : int bounds_k[2] = {0, npts_local[2] - 1};
595 :
596 : // See also rs_find_node() in task_list_methods.F.
597 : // If the bit is set then we need to exclude the border in that direction.
598 10892 : if (border_mask & (1 << 0))
599 0 : bounds_i[0] += border_width[0];
600 10892 : if (border_mask & (1 << 1))
601 0 : bounds_i[1] -= border_width[0];
602 10892 : if (border_mask & (1 << 2))
603 0 : bounds_j[0] += border_width[1];
604 10892 : if (border_mask & (1 << 3))
605 0 : bounds_j[1] -= border_width[1];
606 10892 : if (border_mask & (1 << 4))
607 0 : bounds_k[0] += border_width[2];
608 10892 : if (border_mask & (1 << 5))
609 0 : bounds_k[1] -= border_width[2];
610 :
611 : // center in grid coords
612 : // gp = MATMUL(dh_inv, rp)
613 10892 : double gp[3] = {0.0, 0.0, 0.0};
614 43568 : for (int i = 0; i < 3; i++) {
615 130704 : for (int j = 0; j < 3; j++) {
616 98028 : gp[i] += dh_inv[j][i] * rp[j];
617 : }
618 : }
619 :
620 : // Get the min max indices that contain at least the cube that contains a
621 : // sphere around rp of radius radius if the cell is very non-orthogonal this
622 : // implies that many useless points are included this estimate can be improved
623 : // (i.e. not box but sphere should be used)
624 10892 : int index_min[3] = {INT_MAX, INT_MAX, INT_MAX};
625 10892 : int index_max[3] = {INT_MIN, INT_MIN, INT_MIN};
626 43568 : for (int i = -1; i <= 1; i++) {
627 130704 : for (int j = -1; j <= 1; j++) {
628 392112 : for (int k = -1; k <= 1; k++) {
629 294084 : const double x = rp[0] + i * radius;
630 294084 : const double y = rp[1] + j * radius;
631 294084 : const double z = rp[2] + k * radius;
632 1176336 : for (int idir = 0; idir < 3; idir++) {
633 882252 : const double resc =
634 882252 : dh_inv[0][idir] * x + dh_inv[1][idir] * y + dh_inv[2][idir] * z;
635 882252 : index_min[idir] = imin(index_min[idir], (int)floor(resc));
636 882252 : index_max[idir] = imax(index_max[idir], (int)ceil(resc));
637 : }
638 : }
639 : }
640 : }
641 :
642 : // Precompute mappings
643 10892 : const int range_i = index_max[0] - index_min[0] + 1;
644 10892 : int map_i[range_i];
645 10892 : general_precompute_mapping(index_min[0], index_max[0], shift_local[0],
646 : npts_global[0], bounds_i, map_i);
647 10892 : const int range_j = index_max[1] - index_min[1] + 1;
648 10892 : int map_j[range_j];
649 10892 : general_precompute_mapping(index_min[1], index_max[1], shift_local[1],
650 : npts_global[1], bounds_j, map_j);
651 10892 : const int range_k = index_max[2] - index_min[2] + 1;
652 10892 : int map_k[range_k];
653 10892 : general_precompute_mapping(index_min[2], index_max[2], shift_local[2],
654 : npts_global[2], bounds_k, map_k);
655 :
656 : // Precompute exponentials
657 10892 : double exp_ij[range_i * range_j];
658 10892 : general_fill_exp_table(0, 1, index_min, index_max, zetp, dh, gp, exp_ij);
659 10892 : double exp_jk[range_j * range_k];
660 10892 : general_fill_exp_table(1, 2, index_min, index_max, zetp, dh, gp, exp_jk);
661 10892 : double exp_ki[range_k * range_i];
662 10892 : general_fill_exp_table(2, 0, index_min, index_max, zetp, dh, gp, exp_ki);
663 :
664 : // go over the grid, but cycle if the point is not within the radius
665 10892 : const int cij_size = (lp + 1) * (lp + 1);
666 10892 : double cij[cij_size];
667 186802 : for (int k = index_min[2]; k <= index_max[2]; k++) {
668 175910 : const int kg = map_k[k - index_min[2]];
669 175910 : if (kg < bounds_k[0] || bounds_k[1] < kg) {
670 0 : continue; // skip over out-of-bounds indicies
671 : }
672 175910 : const double dk = k - gp[2];
673 :
674 : // zero coef_xyt
675 175910 : memset(cij, 0, cij_size * sizeof(double));
676 :
677 : #if (GRID_DO_COLLOCATE)
678 : // collocate
679 100520 : general_cijk_to_cij(lp, dk, cijk, cij);
680 100520 : general_cij_to_grid(lp, k, kg, npts_local, index_min, index_max, map_i,
681 : map_j, dh, gp, radius, exp_ij, exp_jk, exp_ki, cij,
682 : grid);
683 : #else
684 : // integrate
685 75390 : general_cij_to_grid(lp, k, kg, npts_local, index_min, index_max, map_i,
686 : map_j, dh, gp, radius, exp_ij, exp_jk, exp_ki, cij,
687 : grid);
688 75390 : general_cijk_to_cij(lp, dk, cijk, cij);
689 : #endif
690 : }
691 10892 : }
692 :
693 : /*******************************************************************************
694 : * \brief Transforms coefficients C_xyz into C_ijk.
695 : * \author Ole Schuett
696 : ******************************************************************************/
697 : static inline void
698 10892 : general_cxyz_to_cijk(const int lp, const double dh[3][3],
699 : GRID_CONST_WHEN_COLLOCATE double *cxyz,
700 10892 : GRID_CONST_WHEN_INTEGRATE double *cijk) {
701 :
702 : // transform P_{lxp,lyp,lzp} into a P_{lip,ljp,lkp} such that
703 : // sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-x_p)**lxp (y-y_p)**lyp (z-z_p)**lzp =
704 : // sum_{lip,ljp,lkp} P_{lip,ljp,lkp} (i-i_p)**lip (j-j_p)**ljp (k-k_p)**lkp
705 :
706 : // transform using multinomials
707 10892 : double hmatgridp[lp + 1][3][3];
708 43568 : for (int i = 0; i < 3; i++) {
709 130704 : for (int j = 0; j < 3; j++) {
710 98028 : hmatgridp[0][j][i] = 1.0;
711 308088 : for (int k = 1; k <= lp; k++) {
712 210060 : hmatgridp[k][j][i] = hmatgridp[k - 1][j][i] * dh[j][i];
713 : }
714 : }
715 : }
716 :
717 : const int lpx = lp;
718 45124 : for (int klx = 0; klx <= lpx; klx++) {
719 105808 : for (int jlx = 0; jlx <= lpx - klx; jlx++) {
720 196056 : for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
721 124480 : const int lx = ilx + jlx + klx;
722 124480 : const int lpy = lp - lx;
723 318980 : for (int kly = 0; kly <= lpy; kly++) {
724 477692 : for (int jly = 0; jly <= lpy - kly; jly++) {
725 675304 : for (int ily = 0; ily <= lpy - kly - jly; ily++) {
726 392112 : const int ly = ily + jly + kly;
727 392112 : const int lpz = lp - lx - ly;
728 914928 : for (int klz = 0; klz <= lpz; klz++) {
729 1199676 : for (int jlz = 0; jlz <= lpz - klz; jlz++) {
730 1532660 : for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
731 855800 : const int lz = ilz + jlz + klz;
732 855800 : const int il = ilx + ily + ilz;
733 855800 : const int jl = jlx + jly + jlz;
734 855800 : const int kl = klx + kly + klz;
735 855800 : const int lp1 = lp + 1;
736 855800 : const int cijk_index =
737 855800 : kl * lp1 * lp1 + jl * lp1 + il; // [kl,jl,il]
738 855800 : const int cxyz_index =
739 855800 : lz * lp1 * lp1 + ly * lp1 + lx; // [lz,ly,lx]
740 855800 : const double p =
741 855800 : hmatgridp[ilx][0][0] * hmatgridp[jlx][1][0] *
742 855800 : hmatgridp[klx][2][0] * hmatgridp[ily][0][1] *
743 855800 : hmatgridp[jly][1][1] * hmatgridp[kly][2][1] *
744 855800 : hmatgridp[ilz][0][2] * hmatgridp[jlz][1][2] *
745 855800 : hmatgridp[klz][2][2] * fac(lx) * fac(ly) * fac(lz) /
746 855800 : (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
747 855800 : fac(jlz) * fac(klx) * fac(kly) * fac(klz));
748 : #if (GRID_DO_COLLOCATE)
749 342320 : cijk[cijk_index] += cxyz[cxyz_index] * p; // collocate
750 : #else
751 513480 : cxyz[cxyz_index] += cijk[cijk_index] * p; // integrate
752 : #endif
753 : }
754 : }
755 : }
756 : }
757 : }
758 : }
759 : }
760 : }
761 : }
762 10892 : }
763 :
764 : /*******************************************************************************
765 : * \brief Collocates coefficients C_xyz onto the grid for general case.
766 : * \author Ole Schuett
767 : ******************************************************************************/
768 : static inline void
769 10892 : general_cxyz_to_grid(const int border_mask, const int lp, const double zetp,
770 : const double dh[3][3], const double dh_inv[3][3],
771 : const double rp[3], const int npts_global[3],
772 : const int npts_local[3], const int shift_local[3],
773 : const int border_width[3], const double radius,
774 : GRID_CONST_WHEN_COLLOCATE double *cxyz,
775 10892 : GRID_CONST_WHEN_INTEGRATE double *grid) {
776 :
777 10892 : const size_t cijk_size = (lp + 1) * (lp + 1) * (lp + 1);
778 10892 : double cijk[cijk_size];
779 10892 : memset(cijk, 0, cijk_size * sizeof(double));
780 :
781 : #if (GRID_DO_COLLOCATE)
782 : // collocate
783 6224 : general_cxyz_to_cijk(lp, dh, cxyz, cijk);
784 6224 : general_cijk_to_grid(border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
785 : npts_local, shift_local, border_width, radius, cijk,
786 : grid);
787 : #else
788 : // integrate
789 4668 : general_cijk_to_grid(border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
790 : npts_local, shift_local, border_width, radius, cijk,
791 : grid);
792 4668 : general_cxyz_to_cijk(lp, dh, cxyz, cijk);
793 : #endif
794 10892 : }
795 :
796 : /*******************************************************************************
797 : * \brief Collocates coefficients C_xyz onto the grid.
798 : * \author Ole Schuett
799 : ******************************************************************************/
800 : static inline void
801 14168 : cxyz_to_grid(const bool orthorhombic, const int border_mask, const int lp,
802 : const double zetp, const double dh[3][3],
803 : const double dh_inv[3][3], const double rp[3],
804 : const int npts_global[3], const int npts_local[3],
805 : const int shift_local[3], const int border_width[3],
806 : const double radius, GRID_CONST_WHEN_COLLOCATE double *cxyz,
807 : GRID_CONST_WHEN_INTEGRATE double *grid) {
808 :
809 14168 : enum grid_library_kernel k;
810 14168 : if (orthorhombic && border_mask == 0) {
811 3276 : k = (GRID_DO_COLLOCATE) ? GRID_COLLOCATE_ORTHO : GRID_INTEGRATE_ORTHO;
812 3276 : ortho_cxyz_to_grid(lp, zetp, dh, dh_inv, rp, npts_global, npts_local,
813 : shift_local, radius, cxyz, grid);
814 : } else {
815 10892 : k = (GRID_DO_COLLOCATE) ? GRID_COLLOCATE_GENERAL : GRID_INTEGRATE_GENERAL;
816 10892 : general_cxyz_to_grid(border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
817 : npts_local, shift_local, border_width, radius, cxyz,
818 : grid);
819 : }
820 14168 : grid_library_counter_add(lp, GRID_BACKEND_REF, k, 1);
821 14168 : }
822 :
823 : /*******************************************************************************
824 : * \brief Transforms coefficients C_ab into C_xyz.
825 : * \author Ole Schuett
826 : ******************************************************************************/
827 14168 : static inline void cab_to_cxyz(const int la_max, const int la_min,
828 : const int lb_max, const int lb_min,
829 : const double prefactor, const double ra[3],
830 : const double rb[3], const double rp[3],
831 : GRID_CONST_WHEN_COLLOCATE double *cab,
832 14168 : GRID_CONST_WHEN_INTEGRATE double *cxyz) {
833 :
834 : // Computes the polynomial expansion coefficients:
835 : // (x-a)**lxa (x-b)**lxb -> sum_{ls} alpha(ls,lxa,lxb,1)*(x-p)**ls
836 14168 : const int lp = la_max + lb_max;
837 14168 : double alpha[3][lb_max + 1][la_max + 1][lp + 1];
838 14168 : memset(alpha, 0, 3 * (lb_max + 1) * (la_max + 1) * (lp + 1) * sizeof(double));
839 :
840 56672 : for (int i = 0; i < 3; i++) {
841 42504 : const double drpa = rp[i] - ra[i];
842 42504 : const double drpb = rp[i] - rb[i];
843 133914 : for (int lxa = 0; lxa <= la_max; lxa++) {
844 276780 : for (int lxb = 0; lxb <= lb_max; lxb++) {
845 : double binomial_k_lxa = 1.0;
846 : double a = 1.0;
847 483030 : for (int k = 0; k <= lxa; k++) {
848 : double binomial_l_lxb = 1.0;
849 : double b = 1.0;
850 750288 : for (int l = 0; l <= lxb; l++) {
851 452628 : alpha[i][lxb][lxa][lxa - l + lxb - k] +=
852 452628 : binomial_k_lxa * binomial_l_lxb * a * b;
853 452628 : binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
854 452628 : b *= drpb;
855 : }
856 297660 : binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
857 297660 : a *= drpa;
858 : }
859 : }
860 : }
861 : }
862 :
863 : // *** initialise the coefficient matrix, we transform the sum
864 : //
865 : // sum_{lxa,lya,lza,lxb,lyb,lzb} P_{lxa,lya,lza,lxb,lyb,lzb} *
866 : // (x-a_x)**lxa (y-a_y)**lya (z-a_z)**lza (x-b_x)**lxb (y-a_y)**lya
867 : // (z-a_z)**lza
868 : //
869 : // into
870 : //
871 : // sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-p_x)**lxp (y-p_y)**lyp (z-p_z)**lzp
872 : //
873 : // where p is center of the product gaussian, and lp = la_max + lb_max
874 : // (current implementation is l**7)
875 : //
876 :
877 42900 : for (int lzb = 0; lzb <= lb_max; lzb++) {
878 90522 : for (int lza = 0; lza <= la_max; lza++) {
879 155750 : for (int lyb = 0; lyb <= lb_max - lzb; lyb++) {
880 244836 : for (int lya = 0; lya <= la_max - lza; lya++) {
881 150876 : const int lxb_min = imax(lb_min - lzb - lyb, 0);
882 150876 : const int lxa_min = imax(la_min - lza - lya, 0);
883 349736 : for (int lxb = lxb_min; lxb <= lb_max - lzb - lyb; lxb++) {
884 475362 : for (int lxa = lxa_min; lxa <= la_max - lza - lya; lxa++) {
885 276502 : const int ico = coset(lxa, lya, lza);
886 276502 : const int jco = coset(lxb, lyb, lzb);
887 276502 : const int cab_index = jco * ncoset(la_max) + ico; // [jco, ico]
888 718144 : for (int lzp = 0; lzp <= lza + lzb; lzp++) {
889 1537350 : for (int lyp = 0; lyp <= lp - lza - lzb; lyp++) {
890 3148618 : for (int lxp = 0; lxp <= lp - lza - lzb - lyp; lxp++) {
891 2052910 : const double p = alpha[0][lxb][lxa][lxp] *
892 2052910 : alpha[1][lyb][lya][lyp] *
893 2052910 : alpha[2][lzb][lza][lzp] * prefactor;
894 2052910 : const int lp1 = lp + 1;
895 2052910 : const int cxyz_index =
896 2052910 : lzp * lp1 * lp1 + lyp * lp1 + lxp; // [lzp, lyp, lxp]
897 : #if (GRID_DO_COLLOCATE)
898 795876 : cxyz[cxyz_index] += cab[cab_index] * p; // collocate
899 : #else
900 1257034 : cab[cab_index] += cxyz[cxyz_index] * p; // integrate
901 : #endif
902 : }
903 : }
904 : }
905 : }
906 : }
907 : }
908 : }
909 : }
910 : }
911 14168 : }
912 :
913 : /*******************************************************************************
914 : * \brief Collocates coefficients C_ab onto the grid.
915 : * \author Ole Schuett
916 : ******************************************************************************/
917 : static inline void
918 14168 : cab_to_grid(const bool orthorhombic, const int border_mask, const int la_max,
919 : const int la_min, const int lb_max, const int lb_min,
920 : const double zeta, const double zetb, const double rscale,
921 : const double dh[3][3], const double dh_inv[3][3],
922 : const double ra[3], const double rab[3], const int npts_global[3],
923 : const int npts_local[3], const int shift_local[3],
924 : const int border_width[3], const double radius,
925 : GRID_CONST_WHEN_COLLOCATE double *cab,
926 14168 : GRID_CONST_WHEN_INTEGRATE double *grid) {
927 :
928 : // Check if radius is too small to be mapped onto grid of given resolution.
929 14168 : double dh_max = 0.0;
930 56672 : for (int i = 0; i < 3; i++) {
931 170016 : for (int j = 0; j < 3; j++) {
932 127512 : dh_max = fmax(dh_max, fabs(dh[i][j]));
933 : }
934 : }
935 14168 : if (2.0 * radius < dh_max) {
936 0 : return;
937 : }
938 :
939 14168 : const double zetp = zeta + zetb;
940 14168 : const double f = zetb / zetp;
941 14168 : const double rab2 = rab[0] * rab[0] + rab[1] * rab[1] + rab[2] * rab[2];
942 14168 : const double prefactor = rscale * exp(-zeta * f * rab2);
943 14168 : double rp[3], rb[3];
944 56672 : for (int i = 0; i < 3; i++) {
945 42504 : rp[i] = ra[i] + f * rab[i];
946 42504 : rb[i] = ra[i] + rab[i];
947 : }
948 :
949 14168 : const int lp = la_max + lb_max;
950 14168 : const size_t cxyz_size = (lp + 1) * (lp + 1) * (lp + 1);
951 14168 : double cxyz[cxyz_size];
952 14168 : memset(cxyz, 0, cxyz_size * sizeof(double));
953 :
954 : #if (GRID_DO_COLLOCATE)
955 : // collocate
956 7862 : cab_to_cxyz(la_max, la_min, lb_max, lb_min, prefactor, ra, rb, rp, cab, cxyz);
957 7862 : cxyz_to_grid(orthorhombic, border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
958 : npts_local, shift_local, border_width, radius, cxyz, grid);
959 : #else
960 : // integrate
961 6306 : cxyz_to_grid(orthorhombic, border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
962 : npts_local, shift_local, border_width, radius, cxyz, grid);
963 6306 : cab_to_cxyz(la_max, la_min, lb_max, lb_min, prefactor, ra, rb, rp, cab, cxyz);
964 : #endif
965 : }
966 :
967 : // EOF
|