Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <fenv.h>
10 : #include <limits.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdarg.h>
14 : #include <stdbool.h>
15 : #include <stdio.h>
16 : #include <stdlib.h>
17 : #include <string.h>
18 :
19 : #include "../offload/offload_buffer.h"
20 : #include "common/grid_common.h"
21 : #include "grid_replay.h"
22 :
23 : #include "cpu/grid_cpu_collocate.h"
24 : #include "cpu/grid_cpu_integrate.h"
25 : #include "grid_task_list.h"
26 :
27 : /*******************************************************************************
28 : * \brief Reads next line from given filehandle and handles errors.
29 : * \author Ole Schuett
30 : ******************************************************************************/
31 655456 : static void read_next_line(char line[], int length, FILE *fp) {
32 1310912 : if (fgets(line, length, fp) == NULL) {
33 0 : fprintf(stderr, "Error: Could not read line.\n");
34 0 : abort();
35 : }
36 655456 : }
37 :
38 : /*******************************************************************************
39 : * \brief Parses next line from file, expecting it to match "${key} ${format}".
40 : * \author Ole Schuett
41 : ******************************************************************************/
42 655248 : static void parse_next_line(const char key[], FILE *fp, const char format[],
43 : const int nargs, ...) {
44 655248 : char line[100];
45 655248 : read_next_line(line, sizeof(line), fp);
46 :
47 655248 : char full_format[100];
48 655248 : strcpy(full_format, key);
49 655248 : strcat(full_format, " ");
50 655248 : strcat(full_format, format);
51 :
52 655248 : va_list varargs;
53 655248 : va_start(varargs, nargs);
54 655248 : if (vsscanf(line, full_format, varargs) != nargs) {
55 0 : fprintf(stderr, "Error: Could not parse line.\n");
56 0 : fprintf(stderr, "Line: %s\n", line);
57 0 : fprintf(stderr, "Format: %s\n", full_format);
58 0 : abort();
59 : }
60 655248 : va_end(varargs);
61 655248 : }
62 :
63 : /*******************************************************************************
64 : * \brief Shorthand for parsing a single integer value.
65 : * \author Ole Schuett
66 : ******************************************************************************/
67 1248 : static int parse_int(const char key[], FILE *fp) {
68 1248 : int value;
69 1248 : parse_next_line(key, fp, "%i", 1, &value);
70 1248 : return value;
71 : }
72 :
73 : /*******************************************************************************
74 : * \brief Shorthand for parsing a vector of three integer values.
75 : * \author Ole Schuett
76 : ******************************************************************************/
77 416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
78 416 : parse_next_line(key, fp, "%i %i %i", 3, &vec[0], &vec[1], &vec[2]);
79 416 : }
80 :
81 : /*******************************************************************************
82 : * \brief Shorthand for parsing a single double value.
83 : * \author Ole Schuett
84 : ******************************************************************************/
85 416 : static double parse_double(const char key[], FILE *fp) {
86 416 : double value;
87 416 : parse_next_line(key, fp, "%le", 1, &value);
88 416 : return value;
89 : }
90 :
91 : /*******************************************************************************
92 : * \brief Shorthand for parsing a vector of three double values.
93 : * \author Ole Schuett
94 : ******************************************************************************/
95 416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
96 416 : parse_next_line(key, fp, "%le %le %le", 3, &vec[0], &vec[1], &vec[2]);
97 416 : }
98 :
99 : /*******************************************************************************
100 : * \brief Shorthand for parsing a 3x3 matrix of doubles.
101 : * \author Ole Schuett
102 : ******************************************************************************/
103 312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
104 312 : char format[100];
105 1248 : for (int i = 0; i < 3; i++) {
106 936 : sprintf(format, "%i %%le %%le %%le", i);
107 936 : parse_next_line(key, fp, format, 3, &mat[i][0], &mat[i][1], &mat[i][2]);
108 : }
109 312 : }
110 :
111 : /*******************************************************************************
112 : * \brief Creates mock basis set using the identity as decontraction matrix.
113 : * \author Ole Schuett
114 : ******************************************************************************/
115 104 : static void create_dummy_basis_set(const int size, const int lmin,
116 : const int lmax, const double zet,
117 104 : grid_basis_set **basis_set) {
118 :
119 104 : double sphi_mutable[size][size];
120 1788 : for (int i = 0; i < size; i++) {
121 47808 : for (int j = 0; j < size; j++) {
122 90564 : sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
123 : }
124 : }
125 104 : const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
126 :
127 104 : const int npgf = size / ncoset(lmax);
128 104 : assert(size == npgf * ncoset(lmax));
129 :
130 104 : const int first_sgf[1] = {1};
131 :
132 104 : double zet_array_mutable[1][npgf];
133 808 : for (int i = 0; i < npgf; i++) {
134 704 : zet_array_mutable[0][i] = zet;
135 : }
136 104 : const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
137 :
138 104 : grid_create_basis_set(/*nset=*/1,
139 : /*nsgf=*/size,
140 : /*maxco=*/size,
141 : /*maxpgf=*/npgf,
142 : /*lmin=*/&lmin,
143 : /*lmax=*/&lmax,
144 : /*npgf=*/&npgf,
145 : /*nsgf_set=*/&size,
146 : /*first_sgf=*/first_sgf,
147 : /*sphi=*/sphi,
148 : /*zet=*/zet_array, basis_set);
149 104 : }
150 :
151 : /*******************************************************************************
152 : * \brief Creates mock task list with one task per cycle.
153 : * \author Ole Schuett
154 : ******************************************************************************/
155 52 : static void create_dummy_task_list(
156 : const bool orthorhombic, const int border_mask, const double ra[3],
157 : const double rab[3], const double radius, const grid_basis_set *basis_set_a,
158 : const grid_basis_set *basis_set_b, const int o1, const int o2,
159 : const int la_max, const int lb_max, const int cycles,
160 : const int cycles_per_block, const int npts_global[][3],
161 : const int npts_local[][3], const int shift_local[][3],
162 : const int border_width[][3], const double dh[][3][3],
163 52 : const double dh_inv[][3][3], grid_task_list **task_list) {
164 :
165 52 : const int ntasks = cycles;
166 52 : const int nlevels = 1;
167 52 : const int natoms = 2;
168 52 : const int nkinds = 2;
169 52 : int nblocks = cycles / cycles_per_block + 1;
170 :
171 : /* we can not have more blocks than the number of tasks */
172 52 : if (cycles == 1) {
173 52 : nblocks = 1;
174 : }
175 :
176 52 : int block_offsets[nblocks];
177 52 : memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
178 52 : const double atom_positions[2][3] = {
179 52 : {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
180 52 : const int atom_kinds[2] = {1, 2};
181 52 : const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
182 52 : const int ipgf = o1 / ncoset(la_max) + 1;
183 52 : const int jpgf = o2 / ncoset(lb_max) + 1;
184 52 : assert(o1 == (ipgf - 1) * ncoset(la_max));
185 52 : assert(o2 == (jpgf - 1) * ncoset(lb_max));
186 :
187 52 : int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
188 52 : int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
189 52 : jpgf_list[ntasks];
190 52 : int border_mask_list[ntasks], block_num_list[ntasks];
191 52 : double radius_list[ntasks], rab_list_mutable[ntasks][3];
192 104 : for (int i = 0; i < cycles; i++) {
193 52 : level_list[i] = 1;
194 52 : iatom_list[i] = 1;
195 52 : jatom_list[i] = 2;
196 52 : iset_list[i] = 1;
197 52 : jset_list[i] = 1;
198 52 : ipgf_list[i] = ipgf;
199 52 : jpgf_list[i] = jpgf;
200 52 : border_mask_list[i] = border_mask;
201 52 : block_num_list[i] = i / cycles_per_block + 1;
202 52 : radius_list[i] = radius;
203 52 : rab_list_mutable[i][0] = rab[0];
204 52 : rab_list_mutable[i][1] = rab[1];
205 52 : rab_list_mutable[i][2] = rab[2];
206 : }
207 52 : const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
208 :
209 52 : grid_create_task_list(
210 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
211 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
212 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
213 : block_num_list, radius_list, rab_list, npts_global, npts_local,
214 : shift_local, border_width, dh, dh_inv, task_list);
215 52 : }
216 :
217 : /*******************************************************************************
218 : * \brief Reads a .task file, collocates/integrates it, and compares results.
219 : * See grid_replay.h for details.
220 : * \author Ole Schuett
221 : ******************************************************************************/
222 104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
223 : const bool batch, const int cycles_per_block,
224 104 : const double tolerance) {
225 :
226 104 : if (cycles < 1) {
227 0 : fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
228 0 : exit(1);
229 : }
230 :
231 104 : if (cycles_per_block < 1 || cycles_per_block > cycles) {
232 0 : fprintf(stderr,
233 : "Error: Cycles per block has to be between 1 and cycles.\n");
234 0 : exit(1);
235 : }
236 :
237 104 : FILE *fp = fopen(filename, "r");
238 104 : if (fp == NULL) {
239 0 : fprintf(stderr, "Could not open task file: %s\n", filename);
240 0 : exit(1);
241 : }
242 :
243 104 : char header_line[100];
244 104 : read_next_line(header_line, sizeof(header_line), fp);
245 104 : if (strcmp(header_line, "#Grid task v10\n") != 0) {
246 0 : fprintf(stderr, "Error: Wrong file header.\n");
247 0 : abort();
248 : }
249 :
250 104 : const bool orthorhombic = parse_int("orthorhombic", fp);
251 104 : const int border_mask = parse_int("border_mask", fp);
252 104 : const enum grid_func func = (enum grid_func)parse_int("func", fp);
253 104 : const bool compute_tau = (func == GRID_FUNC_DADB);
254 104 : const int la_max = parse_int("la_max", fp);
255 104 : const int la_min = parse_int("la_min", fp);
256 104 : const int lb_max = parse_int("lb_max", fp);
257 104 : const int lb_min = parse_int("lb_min", fp);
258 104 : const double zeta = parse_double("zeta", fp);
259 104 : const double zetb = parse_double("zetb", fp);
260 104 : const double rscale = parse_double("rscale", fp);
261 :
262 104 : double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
263 104 : parse_double3x3("dh", fp, dh_mutable);
264 104 : parse_double3x3("dh_inv", fp, dh_inv_mutable);
265 104 : parse_double3("ra", fp, ra);
266 104 : parse_double3("rab", fp, rab);
267 104 : const double(*dh)[3] = (const double(*)[3])dh_mutable;
268 104 : const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
269 :
270 104 : int npts_global[3], npts_local[3], shift_local[3], border_width[3];
271 104 : parse_int3("npts_global", fp, npts_global);
272 104 : parse_int3("npts_local", fp, npts_local);
273 104 : parse_int3("shift_local", fp, shift_local);
274 104 : parse_int3("border_width", fp, border_width);
275 :
276 104 : const double radius = parse_double("radius", fp);
277 104 : const int o1 = parse_int("o1", fp);
278 104 : const int o2 = parse_int("o2", fp);
279 104 : const int n1 = parse_int("n1", fp);
280 104 : const int n2 = parse_int("n2", fp);
281 :
282 104 : double pab_mutable[n2][n1];
283 104 : char format[100];
284 1688 : for (int i = 0; i < n2; i++) {
285 46120 : for (int j = 0; j < n1; j++) {
286 44536 : sprintf(format, "%i %i %%le", i, j);
287 44536 : parse_next_line("pab", fp, format, 1, &pab_mutable[i][j]);
288 : }
289 : }
290 104 : const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
291 :
292 104 : const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
293 104 : offload_buffer *grid_ref = NULL;
294 104 : offload_create_buffer(npts_local_total, &grid_ref);
295 104 : memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
296 :
297 104 : const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
298 578328 : for (int n = 0; n < ngrid_nonzero; n++) {
299 578224 : int i, j, k;
300 578224 : double value;
301 578224 : parse_next_line("grid", fp, "%i %i %i %le", 4, &i, &j, &k, &value);
302 578224 : grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
303 578224 : j * npts_local[0] + i] = value;
304 : }
305 :
306 104 : double hab_ref[n2][n1];
307 104 : memset(hab_ref, 0, n2 * n1 * sizeof(double));
308 896 : for (int i = o2; i < ncoset(lb_max) + o2; i++) {
309 29848 : for (int j = o1; j < ncoset(la_max) + o1; j++) {
310 29056 : sprintf(format, "%i %i %%le", i, j);
311 29056 : parse_next_line("hab", fp, format, 1, &hab_ref[i][j]);
312 : }
313 : }
314 :
315 104 : double forces_ref[2][3];
316 104 : parse_double3("force_a", fp, forces_ref[0]);
317 104 : parse_double3("force_b", fp, forces_ref[1]);
318 :
319 104 : double virial_ref[3][3];
320 104 : parse_double3x3("virial", fp, virial_ref);
321 :
322 104 : char footer_line[100];
323 104 : read_next_line(footer_line, sizeof(footer_line), fp);
324 104 : if (strcmp(footer_line, "#THE_END\n") != 0) {
325 0 : fprintf(stderr, "Error: Wrong footer line.\n");
326 0 : abort();
327 : }
328 :
329 104 : offload_buffer *grid_test = NULL;
330 104 : offload_create_buffer(npts_local_total, &grid_test);
331 104 : double hab_test[n2][n1];
332 104 : double forces_test[2][3];
333 104 : double virial_test[3][3];
334 104 : double start_time, end_time;
335 :
336 104 : if (batch) {
337 52 : grid_basis_set *basisa = NULL, *basisb = NULL;
338 52 : create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
339 52 : create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
340 52 : grid_task_list *task_list = NULL;
341 52 : create_dummy_task_list(
342 : orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
343 : la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
344 : (const int(*)[3])npts_local, (const int(*)[3])shift_local,
345 : (const int(*)[3])border_width, (const double(*)[3][3])dh,
346 : (const double(*)[3][3])dh_inv, &task_list);
347 52 : offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
348 52 : offload_create_buffer(n1 * n2, &pab_blocks);
349 52 : offload_create_buffer(n1 * n2, &hab_blocks);
350 52 : const double f = (collocate) ? rscale : 1.0;
351 944 : for (int i = 0; i < n1; i++) {
352 23160 : for (int j = 0; j < n2; j++) {
353 22268 : pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
354 : }
355 : }
356 52 : start_time = omp_get_wtime();
357 52 : const int nlevels = 1;
358 52 : const int natoms = 2;
359 52 : if (collocate) {
360 : // collocate
361 26 : offload_buffer *grids[1] = {grid_test};
362 26 : grid_collocate_task_list(task_list, func, nlevels,
363 : (const int(*)[3])npts_local, pab_blocks, grids);
364 : } else {
365 : // integrate
366 26 : const offload_buffer *grids[1] = {grid_ref};
367 26 : grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
368 : (const int(*)[3])npts_local, pab_blocks, grids,
369 : hab_blocks, forces_test, virial_test);
370 422 : for (int i = 0; i < n2; i++) {
371 11530 : for (int j = 0; j < n1; j++) {
372 11134 : hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
373 : }
374 : }
375 : }
376 52 : end_time = omp_get_wtime();
377 52 : grid_free_basis_set(basisa);
378 52 : grid_free_basis_set(basisb);
379 52 : grid_free_task_list(task_list);
380 52 : offload_free_buffer(pab_blocks);
381 52 : offload_free_buffer(hab_blocks);
382 : } else {
383 52 : start_time = omp_get_wtime();
384 52 : if (collocate) {
385 : // collocate
386 26 : memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
387 52 : for (int i = 0; i < cycles; i++) {
388 26 : grid_cpu_collocate_pgf_product(
389 : orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
390 : zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
391 : shift_local, border_width, radius, o1, o2, n1, n2, pab,
392 26 : grid_test->host_buffer);
393 : }
394 : } else {
395 : // integrate
396 26 : memset(hab_test, 0, n2 * n1 * sizeof(double));
397 26 : memset(forces_test, 0, 2 * 3 * sizeof(double));
398 26 : double virials_test[2][3][3] = {0};
399 52 : for (int i = 0; i < cycles; i++) {
400 26 : grid_cpu_integrate_pgf_product(
401 : orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
402 : lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
403 : shift_local, border_width, radius, o1, o2, n1, n2,
404 26 : grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
405 : NULL, NULL, NULL);
406 : }
407 104 : for (int i = 0; i < 3; i++) {
408 312 : for (int j = 0; j < 3; j++) {
409 234 : virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
410 : }
411 : }
412 : }
413 52 : end_time = omp_get_wtime();
414 : }
415 :
416 104 : double max_value = 0.0;
417 104 : double max_rel_diff = 0.0;
418 104 : const double derivatives_precision = 1e-4; // account for higher numeric noise
419 104 : if (collocate) {
420 : // collocate
421 : // compare grid
422 7151464 : for (int i = 0; i < npts_local_total; i++) {
423 7151412 : const double ref_value = cycles * grid_ref->host_buffer[i];
424 7151412 : const double test_value = grid_test->host_buffer[i];
425 7151412 : const double diff = fabs(test_value - ref_value);
426 7151412 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
427 7151412 : max_rel_diff = fmax(max_rel_diff, rel_diff);
428 7151412 : max_value = fmax(max_value, fabs(test_value));
429 : }
430 : } else {
431 : // integrate
432 : // compare hab
433 844 : for (int i = 0; i < n2; i++) {
434 23060 : for (int j = 0; j < n1; j++) {
435 22268 : const double ref_value = cycles * hab_ref[i][j];
436 22268 : const double test_value = hab_test[i][j];
437 22268 : const double diff = fabs(test_value - ref_value);
438 22268 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
439 22268 : max_rel_diff = fmax(max_rel_diff, rel_diff);
440 22268 : max_value = fmax(max_value, fabs(test_value));
441 22268 : if (rel_diff > tolerance) {
442 0 : printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
443 : j, ref_value, test_value, diff, rel_diff);
444 : }
445 : }
446 : }
447 : // compare forces
448 156 : for (int i = 0; i < 2; i++) {
449 416 : for (int j = 0; j < 3; j++) {
450 312 : const double ref_value = cycles * forces_ref[i][j];
451 312 : const double test_value = forces_test[i][j];
452 312 : const double diff = fabs(test_value - ref_value);
453 312 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
454 312 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
455 312 : if (rel_diff * derivatives_precision > tolerance) {
456 0 : printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
457 : i, j, ref_value, test_value, diff, rel_diff);
458 : }
459 : }
460 : }
461 : // compare virial
462 208 : for (int i = 0; i < 3; i++) {
463 624 : for (int j = 0; j < 3; j++) {
464 468 : const double ref_value = cycles * virial_ref[i][j];
465 468 : const double test_value = virial_test[i][j];
466 468 : const double diff = fabs(test_value - ref_value);
467 468 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
468 468 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
469 468 : if (rel_diff * derivatives_precision > tolerance) {
470 0 : printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
471 : i, j, ref_value, test_value, diff, rel_diff);
472 : }
473 : }
474 : }
475 : }
476 104 : printf("Task: %-55s %9s %-7s Cycles: %e Max value: %le "
477 : "Max rel diff: %le Time: %le sec\n",
478 : filename, collocate ? "Collocate" : "Integrate",
479 104 : batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
480 : end_time - start_time);
481 :
482 104 : offload_free_buffer(grid_ref);
483 104 : offload_free_buffer(grid_test);
484 :
485 : // Check floating point exceptions.
486 104 : if (fetestexcept(FE_DIVBYZERO) != 0) {
487 0 : fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
488 0 : exit(1);
489 : }
490 104 : if (fetestexcept(FE_OVERFLOW) != 0) {
491 0 : fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
492 0 : exit(1);
493 : }
494 :
495 104 : return max_rel_diff < tolerance;
496 : }
497 :
498 : // EOF
|