Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <stddef.h>
11 : #include <stdio.h>
12 : #include <stdlib.h>
13 : #include <string.h>
14 :
15 : #include "common/grid_common.h"
16 : #include "common/grid_constants.h"
17 : #include "common/grid_library.h"
18 : #include "grid_task_list.h"
19 :
20 : /*******************************************************************************
21 : * \brief Allocates a task list which can be passed to grid_collocate_task_list.
22 : * See grid_task_list.h for details.
23 : * \author Ole Schuett
24 : ******************************************************************************/
25 13462 : void grid_create_task_list(
26 : const bool orthorhombic, const int ntasks, const int nlevels,
27 : const int natoms, const int nkinds, const int nblocks,
28 : const int block_offsets[nblocks], const double atom_positions[natoms][3],
29 : const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
30 : const int level_list[ntasks], const int iatom_list[ntasks],
31 : const int jatom_list[ntasks], const int iset_list[ntasks],
32 : const int jset_list[ntasks], const int ipgf_list[ntasks],
33 : const int jpgf_list[ntasks], const int border_mask_list[ntasks],
34 : const int block_num_list[ntasks], const double radius_list[ntasks],
35 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
36 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
37 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
38 : const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
39 :
40 13462 : const grid_library_config config = grid_library_get_config();
41 :
42 13462 : grid_task_list *task_list = NULL;
43 :
44 13462 : if (*task_list_out == NULL) {
45 8068 : task_list = malloc(sizeof(grid_task_list));
46 8068 : assert(task_list != NULL);
47 8068 : memset(task_list, 0, sizeof(grid_task_list));
48 :
49 : // Resolve AUTO to a concrete backend.
50 8068 : if (config.backend == GRID_BACKEND_AUTO) {
51 :
52 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
53 : task_list->backend = GRID_BACKEND_HIP;
54 : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
55 : task_list->backend = GRID_BACKEND_GPU;
56 : #else
57 8048 : task_list->backend = GRID_BACKEND_CPU;
58 : #endif
59 : } else {
60 20 : task_list->backend = config.backend;
61 : }
62 : } else {
63 : // Reuse existing task list.
64 5394 : task_list = *task_list_out;
65 5394 : free(task_list->npts_local);
66 : }
67 :
68 : // Store npts_local for bounds checking and validation.
69 13462 : task_list->nlevels = nlevels;
70 13462 : size_t size = nlevels * 3 * sizeof(int);
71 13462 : task_list->npts_local = malloc(size);
72 13462 : assert(task_list->npts_local != NULL);
73 13462 : memcpy(task_list->npts_local, npts_local, size);
74 :
75 : // Always create reference backend because it might be needed for validation.
76 13462 : grid_ref_create_task_list(
77 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
78 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
79 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
80 : block_num_list, radius_list, rab_list, npts_global, npts_local,
81 : shift_local, border_width, dh, dh_inv, &task_list->ref);
82 :
83 : // Create other backend, if selected.
84 13462 : switch (task_list->backend) {
85 : case GRID_BACKEND_REF:
86 : break; // was already created above
87 13438 : case GRID_BACKEND_CPU:
88 13438 : grid_cpu_create_task_list(
89 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
90 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
91 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
92 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
93 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
94 13438 : break;
95 20 : case GRID_BACKEND_DGEMM:
96 20 : grid_dgemm_create_task_list(
97 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
98 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
99 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
100 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
101 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
102 20 : break;
103 :
104 0 : case GRID_BACKEND_GPU:
105 : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
106 : grid_gpu_create_task_list(
107 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
108 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
109 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
110 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
111 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
112 : #else
113 0 : fprintf(stderr,
114 : "Error: The GPU grid backend is not available. "
115 : "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
116 0 : abort();
117 : #endif
118 0 : break;
119 :
120 0 : case GRID_BACKEND_HIP:
121 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
122 : grid_hip_create_task_list(
123 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
124 : &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
125 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
126 : border_mask_list, block_num_list, radius_list, &rab_list[0][0],
127 : &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
128 : &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
129 : #else
130 0 : fprintf(stderr, "Error: The HIP grid backend is not available. "
131 : "Please re-compile with -D__OFFLOAD_HIP");
132 0 : abort();
133 : #endif
134 0 : break;
135 :
136 0 : default:
137 0 : printf("Error: Unknown grid backend: %i.\n", config.backend);
138 0 : abort();
139 13462 : break;
140 : }
141 :
142 13462 : *task_list_out = task_list;
143 13462 : }
144 :
145 : /*******************************************************************************
146 : * \brief Deallocates given task list, basis_sets have to be freed separately.
147 : * \author Ole Schuett
148 : ******************************************************************************/
149 8068 : void grid_free_task_list(grid_task_list *task_list) {
150 :
151 8068 : if (task_list->ref != NULL) {
152 8068 : grid_ref_free_task_list(task_list->ref);
153 8068 : task_list->ref = NULL;
154 : }
155 8068 : if (task_list->cpu != NULL) {
156 8056 : grid_cpu_free_task_list(task_list->cpu);
157 8056 : task_list->cpu = NULL;
158 : }
159 8068 : if (task_list->dgemm != NULL) {
160 8 : grid_dgemm_free_task_list(task_list->dgemm);
161 8 : task_list->dgemm = NULL;
162 : }
163 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
164 : if (task_list->gpu != NULL) {
165 : grid_gpu_free_task_list(task_list->gpu);
166 : task_list->gpu = NULL;
167 : }
168 : #endif
169 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
170 : if (task_list->hip != NULL) {
171 : grid_hip_free_task_list(task_list->hip);
172 : task_list->hip = NULL;
173 : }
174 : #endif
175 :
176 8068 : free(task_list->npts_local);
177 8068 : free(task_list);
178 8068 : }
179 :
180 : /*******************************************************************************
181 : * \brief Collocate all tasks of in given list onto given grids.
182 : * See grid_task_list.h for details.
183 : * \author Ole Schuett
184 : ******************************************************************************/
185 193883 : void grid_collocate_task_list(const grid_task_list *task_list,
186 : const enum grid_func func, const int nlevels,
187 : const int npts_local[nlevels][3],
188 : const offload_buffer *pab_blocks,
189 : offload_buffer *grids[nlevels]) {
190 :
191 : // Bounds check.
192 193883 : assert(task_list->nlevels == nlevels);
193 961331 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
194 767448 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
195 767448 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
196 767448 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
197 : }
198 :
199 193883 : switch (task_list->backend) {
200 26 : case GRID_BACKEND_REF:
201 26 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
202 : grids);
203 26 : break;
204 193699 : case GRID_BACKEND_CPU:
205 193699 : grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
206 : grids);
207 193699 : break;
208 158 : case GRID_BACKEND_DGEMM:
209 158 : grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
210 : grids);
211 158 : break;
212 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
213 : case GRID_BACKEND_GPU:
214 : grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
215 : grids);
216 : break;
217 : #endif
218 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
219 : case GRID_BACKEND_HIP:
220 : grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
221 : grids);
222 : break;
223 : #endif
224 0 : default:
225 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
226 0 : abort();
227 193883 : break;
228 : }
229 :
230 : // Perform validation if enabled.
231 193883 : if (grid_library_get_config().validate) {
232 : // Allocate space for reference results.
233 26 : offload_buffer *grids_ref[nlevels];
234 130 : for (int level = 0; level < nlevels; level++) {
235 104 : const int npts_local_total =
236 104 : npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
237 104 : grids_ref[level] = NULL;
238 104 : offload_create_buffer(npts_local_total, &grids_ref[level]);
239 : }
240 :
241 : // Call reference implementation.
242 26 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
243 : grids_ref);
244 :
245 : // Compare results.
246 26 : const double tolerance = 1e-12;
247 26 : double max_rel_diff = 0.0;
248 130 : for (int level = 0; level < nlevels; level++) {
249 2374 : for (int i = 0; i < npts_local[level][0]; i++) {
250 81112 : for (int j = 0; j < npts_local[level][1]; j++) {
251 3619188 : for (int k = 0; k < npts_local[level][2]; k++) {
252 3540346 : const int idx = k * npts_local[level][1] * npts_local[level][0] +
253 3540346 : j * npts_local[level][0] + i;
254 3540346 : const double ref_value = grids_ref[level]->host_buffer[idx];
255 3540346 : const double test_value = grids[level]->host_buffer[idx];
256 3540346 : const double diff = fabs(test_value - ref_value);
257 3540346 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
258 3540346 : max_rel_diff = fmax(max_rel_diff, rel_diff);
259 3540346 : if (rel_diff > tolerance) {
260 0 : fprintf(stderr, "Error: Validation failure in grid collocate\n");
261 0 : fprintf(stderr, " diff: %le\n", diff);
262 0 : fprintf(stderr, " rel_diff: %le\n", rel_diff);
263 0 : fprintf(stderr, " value: %le\n", ref_value);
264 0 : fprintf(stderr, " level: %i\n", level);
265 0 : fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
266 0 : abort();
267 : }
268 : }
269 : }
270 : }
271 104 : offload_free_buffer(grids_ref[level]);
272 104 : printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
273 : }
274 : }
275 193883 : }
276 :
277 : /*******************************************************************************
278 : * \brief Integrate all tasks of in given list from given grids.
279 : * See grid_task_list.h for details.
280 : * \author Ole Schuett
281 : ******************************************************************************/
282 181981 : void grid_integrate_task_list(
283 : const grid_task_list *task_list, const bool compute_tau, const int natoms,
284 : const int nlevels, const int npts_local[nlevels][3],
285 : const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
286 : offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
287 :
288 : // Bounds check.
289 181981 : assert(task_list->nlevels == nlevels);
290 901911 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
291 719930 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
292 719930 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
293 719930 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
294 : }
295 :
296 181981 : assert(forces == NULL || pab_blocks != NULL);
297 181981 : assert(virial == NULL || pab_blocks != NULL);
298 :
299 181981 : switch (task_list->backend) {
300 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
301 : case GRID_BACKEND_GPU:
302 : grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
303 : pab_blocks, grids, hab_blocks, forces, virial);
304 : break;
305 : #endif
306 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
307 : case GRID_BACKEND_HIP:
308 : grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
309 : pab_blocks, grids, hab_blocks, &forces[0][0],
310 : &virial[0][0]);
311 : break;
312 : #endif
313 156 : case GRID_BACKEND_DGEMM:
314 156 : grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
315 : nlevels, pab_blocks, grids, hab_blocks,
316 : forces, virial);
317 156 : break;
318 181801 : case GRID_BACKEND_CPU:
319 181801 : grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
320 : pab_blocks, grids, hab_blocks, forces, virial);
321 181801 : break;
322 24 : case GRID_BACKEND_REF:
323 24 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
324 : pab_blocks, grids, hab_blocks, forces, virial);
325 24 : break;
326 0 : default:
327 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
328 0 : abort();
329 181981 : break;
330 : }
331 :
332 : // Perform validation if enabled.
333 181981 : if (grid_library_get_config().validate) {
334 : // Allocate space for reference results.
335 24 : const int hab_length = hab_blocks->size / sizeof(double);
336 24 : offload_buffer *hab_blocks_ref = NULL;
337 24 : offload_create_buffer(hab_length, &hab_blocks_ref);
338 24 : double forces_ref[natoms][3], virial_ref[3][3];
339 :
340 : // Call reference implementation.
341 48 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
342 : pab_blocks, grids, hab_blocks_ref,
343 : (forces != NULL) ? forces_ref : NULL,
344 : (virial != NULL) ? virial_ref : NULL);
345 :
346 : // Compare hab.
347 24 : const double hab_tolerance = 1e-12;
348 24 : double hab_max_rel_diff = 0.0;
349 2025 : for (int i = 0; i < hab_length; i++) {
350 2001 : const double ref_value = hab_blocks_ref->host_buffer[i];
351 2001 : const double test_value = hab_blocks->host_buffer[i];
352 2001 : const double diff = fabs(test_value - ref_value);
353 2001 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
354 2001 : hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
355 2001 : if (rel_diff > hab_tolerance) {
356 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
357 0 : fprintf(stderr, " hab diff: %le\n", diff);
358 0 : fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
359 0 : fprintf(stderr, " hab value: %le\n", ref_value);
360 0 : fprintf(stderr, " hab i: %i\n", i);
361 0 : abort();
362 : }
363 : }
364 :
365 : // Compare forces.
366 24 : const double forces_tolerance = 1e-8; // account for higher numeric noise
367 24 : double forces_max_rel_diff = 0.0;
368 24 : if (forces != NULL) {
369 14 : for (int iatom = 0; iatom < natoms; iatom++) {
370 40 : for (int idir = 0; idir < 3; idir++) {
371 30 : const double ref_value = forces_ref[iatom][idir];
372 30 : const double test_value = forces[iatom][idir];
373 30 : const double diff = fabs(test_value - ref_value);
374 30 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
375 30 : forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
376 30 : if (rel_diff > forces_tolerance) {
377 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
378 0 : fprintf(stderr, " forces diff: %le\n", diff);
379 0 : fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
380 0 : fprintf(stderr, " forces value: %le\n", ref_value);
381 0 : fprintf(stderr, " forces atom: %i\n", iatom);
382 0 : fprintf(stderr, " forces dir: %i\n", idir);
383 0 : abort();
384 : }
385 : }
386 : }
387 : }
388 :
389 : // Compare virial.
390 24 : const double virial_tolerance = 1e-8; // account for higher numeric noise
391 24 : double virial_max_rel_diff = 0.0;
392 24 : if (virial != NULL) {
393 0 : for (int i = 0; i < 3; i++) {
394 0 : for (int j = 0; j < 3; j++) {
395 0 : const double ref_value = virial_ref[i][j];
396 0 : const double test_value = virial[i][j];
397 0 : const double diff = fabs(test_value - ref_value);
398 0 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
399 0 : virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
400 0 : if (rel_diff > virial_tolerance) {
401 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
402 0 : fprintf(stderr, " virial diff: %le\n", diff);
403 0 : fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
404 0 : fprintf(stderr, " virial value: %le\n", ref_value);
405 0 : fprintf(stderr, " virial ij: %i %i\n", i, j);
406 0 : abort();
407 : }
408 : }
409 : }
410 : }
411 :
412 24 : printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
413 : hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
414 24 : offload_free_buffer(hab_blocks_ref);
415 : }
416 181981 : }
417 :
418 : // EOF
|