Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <stdio.h>
10 : #include <stdlib.h>
11 : #include <string.h>
12 : #if defined(__LIBXSMM)
13 : #include <libxsmm.h>
14 : #endif
15 : #include "../common/grid_common.h"
16 : #include "grid_dgemm_coefficients.h"
17 : #include "grid_dgemm_private_header.h"
18 : #include "grid_dgemm_tensor_local.h"
19 :
20 0 : void transform_xyz_to_triangular(const tensor *const coef,
21 : double *const coef_xyz) {
22 0 : assert(coef != NULL);
23 0 : assert(coef_xyz != NULL);
24 :
25 0 : int lxyz = 0;
26 0 : const int lp = (coef->size[0] - 1);
27 0 : for (int lzp = 0; lzp <= lp; lzp++) {
28 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
29 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
30 0 : coef_xyz[lxyz] = idx3(coef[0], lzp, lyp, lxp);
31 : }
32 : }
33 : }
34 0 : }
35 :
36 0 : void transform_yxz_to_triangular(const tensor *const coef,
37 : double *const coef_xyz) {
38 0 : assert(coef != NULL);
39 0 : assert(coef_xyz != NULL);
40 0 : int lxyz = 0;
41 0 : const int lp = (coef->size[0] - 1);
42 0 : for (int lzp = 0; lzp <= lp; lzp++) {
43 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
44 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
45 0 : coef_xyz[lxyz] = idx3(coef[0], lyp, lxp, lzp);
46 : }
47 : }
48 : }
49 0 : }
50 :
51 0 : void transform_triangular_to_xyz(const double *const coef_xyz,
52 : tensor *const coef) {
53 0 : assert(coef != NULL);
54 0 : assert(coef_xyz != NULL);
55 0 : int lxyz = 0;
56 0 : const int lp = coef->size[0] - 1;
57 0 : for (int lzp = 0; lzp <= lp; lzp++) {
58 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
59 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
60 0 : idx3(coef[0], lzp, lyp, lxp) = coef_xyz[lxyz];
61 : }
62 : /* initialize the remaining coefficients to zero */
63 0 : for (int lxp = lp - lzp - lyp + 1; lxp <= lp; lxp++)
64 0 : idx3(coef[0], lzp, lyp, lxp) = 0.0;
65 : }
66 : }
67 0 : }
68 :
69 : /* Rotate from the (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 to (x - x_{12}) ^ k
70 : * in all three directions */
71 :
72 44389 : void grid_compute_coefficients_dgemm(
73 : const int *lmin, const int *lmax, const int lp, const double prefactor,
74 : const tensor *const alpha, // [3][lb_max+1][la_max+1][lp+1]
75 : const tensor *const pab,
76 : tensor *coef_xyz) //[lp+1][lp+1][lp+1]
77 : {
78 : /* can be done with dgemms as well, since it is a change of basis from (x -
79 : * x1) (x - x2) to (x - x12)^alpha */
80 :
81 44389 : assert(alpha != NULL);
82 44389 : assert(coef_xyz != NULL);
83 44389 : assert(coef_xyz->data != NULL);
84 44389 : memset(coef_xyz->data, 0, coef_xyz->alloc_size_ * sizeof(double));
85 : // we need a proper fix for that. We can use the tensor structure for this
86 :
87 117327 : for (int lzb = 0; lzb <= lmax[1]; lzb++) {
88 177890 : for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
89 104952 : const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
90 225253 : for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
91 120301 : const int jco = coset(lxb, lyb, lzb);
92 326469 : for (int lza = 0; lza <= lmax[0]; lza++) {
93 508451 : for (int lya = 0; lya <= lmax[0] - lza; lya++) {
94 302283 : const int lxa_min = imax(lmin[0] - lza - lya, 0);
95 655153 : for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
96 352870 : const int ico = coset(lxa, lya, lza);
97 352870 : const double pab_ = idx2(pab[0], jco, ico);
98 918466 : for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
99 565596 : const double p1 =
100 565596 : idx4(alpha[0], 0, lxb, lxa, lxp) * pab_ * prefactor;
101 1858758 : for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
102 3623778 : for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
103 2330616 : const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
104 2330616 : idx4(alpha[0], 2, lzb, lza, lzp) * p1;
105 2330616 : idx3(coef_xyz[0], lxp, lzp, lyp) += p2;
106 : }
107 : }
108 : }
109 : }
110 : }
111 : }
112 : }
113 : }
114 : }
115 44389 : }
116 :
117 : /* Rotate from (x - x_{12}) ^ k to (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 in
118 : * all three directions */
119 :
120 43611 : void grid_compute_vab(
121 : const int *const lmin, const int *const lmax, const int lp,
122 : const double prefactor,
123 : const tensor *const alpha, // transformation parameters (x - x_1)^n (x -
124 : // x_2)^m -> (x - x_12) ^ l
125 : const tensor *const coef_xyz, tensor *vab) {
126 : /* can be done with dgemms as well, since it is a change of basis from (x -
127 : * x1) (x - x2) to (x - x12)^alpha */
128 :
129 43611 : assert(alpha != NULL);
130 43611 : assert(coef_xyz != NULL);
131 43611 : assert(coef_xyz->data != NULL);
132 :
133 43611 : memset(vab->data, 0, sizeof(double) * vab->alloc_size_);
134 : // we need a proper fix for that. We can use the tensor structure for this
135 :
136 119897 : for (int lzb = 0; lzb <= lmax[1]; lzb++) {
137 191684 : for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
138 115398 : const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
139 258640 : for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
140 143242 : const int jco = coset(lxb, lyb, lzb);
141 466762 : for (int lza = 0; lza <= lmax[0]; lza++) {
142 932165 : for (int lya = 0; lya <= lmax[0] - lza; lya++) {
143 608645 : const int lxa_min = imax(lmin[0] - lza - lya, 0);
144 1586726 : for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
145 978081 : const int ico = coset(lxa, lya, lza);
146 978081 : double pab_ = 0.0;
147 :
148 : /* this can be done with 3 dgemms actually but need to
149 : * set coef accordingly (triangular along the second
150 : * diagonal) */
151 :
152 2967826 : for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
153 9357873 : for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
154 26851891 : for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
155 19483763 : const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
156 19483763 : idx4(alpha[0], 2, lzb, lza, lzp) *
157 19483763 : idx4(alpha[0], 0, lxb, lxa, lxp) *
158 : prefactor;
159 19483763 : pab_ += idx3(coef_xyz[0], lyp, lxp, lzp) * p2;
160 : }
161 : }
162 : }
163 978081 : idx2(vab[0], jco, ico) += pab_;
164 : }
165 : }
166 : }
167 : }
168 : }
169 : }
170 43611 : }
171 :
172 : // *****************************************************************************
173 88000 : void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3],
174 : const double rp[3], const int *lmax,
175 : tensor *alpha) {
176 88000 : assert(alpha != NULL);
177 : // Initialize with zeros.
178 88000 : memset(alpha->data, 0, alpha->alloc_size_ * sizeof(double));
179 :
180 : //
181 : // compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls}
182 : // alpha(ls,lxa,lxb,1)*(x-p)**ls
183 : //
184 :
185 352000 : for (int iaxis = 0; iaxis < 3; iaxis++) {
186 264000 : const double drpa = rp[iaxis] - ra[iaxis];
187 264000 : const double drpb = rp[iaxis] - rb[iaxis];
188 728991 : for (int lxa = 0; lxa <= lmax[0]; lxa++) {
189 1296336 : for (int lxb = 0; lxb <= lmax[1]; lxb++) {
190 : double binomial_k_lxa = 1.0;
191 : double a = 1.0;
192 2174889 : for (int k = 0; k <= lxa; k++) {
193 : double binomial_l_lxb = 1.0;
194 : double b = 1.0;
195 3447798 : for (int l = 0; l <= lxb; l++) {
196 2104254 : idx4(alpha[0], iaxis, lxb, lxa, lxa - l + lxb - k) +=
197 2104254 : binomial_k_lxa * binomial_l_lxb * a * b;
198 2104254 : binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
199 2104254 : b *= drpb;
200 : }
201 1343544 : binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
202 1343544 : a *= drpa;
203 : }
204 : }
205 : }
206 : }
207 88000 : }
208 :
209 : /* this function computes the coefficients initially expressed in the cartesian
210 : * space to the grid space. It is inplane and can also be done with
211 : * matrix-matrix multiplication. It is in fact a tensor reduction. */
212 :
213 23275 : void grid_transform_coef_xzy_to_ikj(const double dh[3][3],
214 : const tensor *coef_xyz) {
215 23275 : assert(coef_xyz != NULL);
216 23275 : const int lp = coef_xyz->size[0] - 1;
217 23275 : tensor coef_ijk;
218 :
219 : /* this tensor corresponds to the term
220 : * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
221 : * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
222 : * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
223 : * III.A of the notes */
224 23275 : tensor hmatgridp;
225 :
226 46550 : initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
227 23275 : coef_xyz->size[2]);
228 :
229 23275 : coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
230 :
231 23275 : if (coef_ijk.data == NULL)
232 0 : abort();
233 :
234 23275 : memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
235 23275 : initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
236 :
237 46550 : hmatgridp.data =
238 23275 : grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
239 :
240 : // transform using multinomials
241 93100 : for (int i = 0; i < 3; i++) {
242 279300 : for (int j = 0; j < 3; j++) {
243 209475 : idx3(hmatgridp, 0, j, i) = 1.0;
244 485865 : for (int k = 1; k <= lp; k++) {
245 276390 : idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
246 : }
247 : }
248 : }
249 :
250 : const int lpx = lp;
251 77260 : for (int klx = 0; klx <= lpx; klx++) {
252 153606 : for (int jlx = 0; jlx <= lpx - klx; jlx++) {
253 262180 : for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
254 162559 : const int lx = ilx + jlx + klx;
255 162559 : const int lpy = lp - lx;
256 162559 : const double tx = idx3(hmatgridp, ilx, 0, 0) *
257 162559 : idx3(hmatgridp, jlx, 1, 0) *
258 162559 : idx3(hmatgridp, klx, 2, 0) * fac(lx) * inv_fac[klx] *
259 162559 : inv_fac[jlx] * inv_fac[ilx];
260 :
261 407866 : for (int kly = 0; kly <= lpy; kly++) {
262 595812 : for (int jly = 0; jly <= lpy - kly; jly++) {
263 831430 : for (int ily = 0; ily <= lpy - kly - jly; ily++) {
264 480925 : const int ly = ily + jly + kly;
265 480925 : const int lpz = lp - lx - ly;
266 480925 : const double ty = tx * idx3(hmatgridp, ily, 0, 1) *
267 480925 : idx3(hmatgridp, jly, 1, 1) *
268 480925 : idx3(hmatgridp, kly, 2, 1) * fac(ly) *
269 480925 : inv_fac[kly] * inv_fac[jly] * inv_fac[ily];
270 1120396 : for (int klz = 0; klz <= lpz; klz++) {
271 1468650 : for (int jlz = 0; jlz <= lpz - klz; jlz++) {
272 1882396 : for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
273 1053217 : const int lz = ilz + jlz + klz;
274 1053217 : const int il = ilx + ily + ilz;
275 1053217 : const int jl = jlx + jly + jlz;
276 1053217 : const int kl = klx + kly + klz;
277 : // const int lijk= coef_map[kl][jl][il];
278 : /* the fac table is the factorial. It
279 : * would be better to use the
280 : * multinomials. */
281 1053217 : idx3(coef_ijk, il, kl, jl) +=
282 1053217 : idx3(coef_xyz[0], lx, lz, ly) * ty *
283 1053217 : idx3(hmatgridp, ilz, 0, 2) *
284 1053217 : idx3(hmatgridp, jlz, 1, 2) *
285 1053217 : idx3(hmatgridp, klz, 2, 2) * fac(lz) * inv_fac[klz] *
286 1053217 : inv_fac[jlz] * inv_fac[ilz];
287 : }
288 : }
289 : }
290 : }
291 : }
292 : }
293 : }
294 : }
295 : }
296 :
297 23275 : memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
298 23275 : grid_free_scratch(coef_ijk.data);
299 23275 : grid_free_scratch(hmatgridp.data);
300 23275 : }
301 :
302 : /* Rotate the coefficients computed in the local grid coordinates to the
303 : * cartesians coorinates. The order of the indices indicates how the
304 : * coefficients are stored */
305 22497 : void grid_transform_coef_jik_to_yxz(const double dh[3][3],
306 : const tensor *coef_xyz) {
307 22497 : assert(coef_xyz);
308 22497 : const int lp = coef_xyz->size[0] - 1;
309 22497 : tensor coef_ijk;
310 :
311 : /* this tensor corresponds to the term
312 : * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
313 : * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
314 : * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
315 : * III.A of the notes */
316 22497 : tensor hmatgridp;
317 :
318 44994 : initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
319 22497 : coef_xyz->size[2]);
320 :
321 22497 : coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
322 22497 : if (coef_ijk.data == NULL)
323 0 : abort();
324 :
325 22497 : memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
326 22497 : initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
327 :
328 44994 : hmatgridp.data =
329 22497 : grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
330 :
331 : // transform using multinomials
332 89988 : for (int i = 0; i < 3; i++) {
333 269964 : for (int j = 0; j < 3; j++) {
334 202473 : idx3(hmatgridp, 0, j, i) = 1.0;
335 537849 : for (int k = 1; k <= lp; k++) {
336 335376 : idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
337 : }
338 : }
339 : }
340 :
341 : const int lpx = lp;
342 82258 : for (int klx = 0; klx <= lpx; klx++) {
343 188726 : for (int jlx = 0; jlx <= lpx - klx; jlx++) {
344 377872 : for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
345 248907 : const int lx = ilx + jlx + klx;
346 248907 : const int lpy = lp - lx;
347 695552 : for (int kly = 0; kly <= lpy; kly++) {
348 1206486 : for (int jly = 0; jly <= lpy - kly; jly++) {
349 1999282 : for (int ily = 0; ily <= lpy - kly - jly; ily++) {
350 1239441 : const int ly = ily + jly + kly;
351 1239441 : const int lpz = lp - lx - ly;
352 3192148 : for (int klz = 0; klz <= lpz; klz++) {
353 4939324 : for (int jlz = 0; jlz <= lpz - klz; jlz++) {
354 7438266 : for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
355 4451649 : const int lz = ilz + jlz + klz;
356 4451649 : const int il = ilx + ily + ilz;
357 4451649 : const int jl = jlx + jly + jlz;
358 4451649 : const int kl = klx + kly + klz;
359 : // const int lijk= coef_map[kl][jl][il];
360 : /* the fac table is the factorial. It
361 : * would be better to use the
362 : * multinomials. */
363 4451649 : idx3(coef_ijk, ly, lx, lz) +=
364 4451649 : idx3(coef_xyz[0], jl, il, kl) *
365 4451649 : idx3(hmatgridp, ilx, 0, 0) *
366 4451649 : idx3(hmatgridp, jlx, 1, 0) *
367 4451649 : idx3(hmatgridp, klx, 2, 0) *
368 4451649 : idx3(hmatgridp, ily, 0, 1) *
369 4451649 : idx3(hmatgridp, jly, 1, 1) *
370 4451649 : idx3(hmatgridp, kly, 2, 1) *
371 4451649 : idx3(hmatgridp, ilz, 0, 2) *
372 4451649 : idx3(hmatgridp, jlz, 1, 2) *
373 4451649 : idx3(hmatgridp, klz, 2, 2) * fac(lx) * fac(ly) *
374 4451649 : fac(lz) /
375 4451649 : (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
376 4451649 : fac(jlz) * fac(klx) * fac(kly) * fac(klz));
377 : }
378 : }
379 : }
380 : }
381 : }
382 : }
383 : }
384 : }
385 : }
386 22497 : memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
387 22497 : grid_free_scratch(coef_ijk.data);
388 22497 : grid_free_scratch(hmatgridp.data);
389 22497 : }
|