LCOV - code coverage report
Current view: top level - src/grid - grid_task_list.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 134 185 72.4 %
Date: 2024-11-22 07:00:40 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <stddef.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "common/grid_common.h"
      16             : #include "common/grid_constants.h"
      17             : #include "common/grid_library.h"
      18             : #include "grid_task_list.h"
      19             : 
      20             : /*******************************************************************************
      21             :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      22             :  *        See grid_task_list.h for details.
      23             :  * \author Ole Schuett
      24             :  ******************************************************************************/
      25       13462 : void grid_create_task_list(
      26             :     const bool orthorhombic, const int ntasks, const int nlevels,
      27             :     const int natoms, const int nkinds, const int nblocks,
      28             :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      29             :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      30             :     const int level_list[ntasks], const int iatom_list[ntasks],
      31             :     const int jatom_list[ntasks], const int iset_list[ntasks],
      32             :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      33             :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      34             :     const int block_num_list[ntasks], const double radius_list[ntasks],
      35             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      36             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      37             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      38             :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      39             : 
      40       13462 :   const grid_library_config config = grid_library_get_config();
      41             : 
      42       13462 :   grid_task_list *task_list = NULL;
      43             : 
      44       13462 :   if (*task_list_out == NULL) {
      45        8068 :     task_list = malloc(sizeof(grid_task_list));
      46        8068 :     assert(task_list != NULL);
      47        8068 :     memset(task_list, 0, sizeof(grid_task_list));
      48             : 
      49             :     // Resolve AUTO to a concrete backend.
      50        8068 :     if (config.backend == GRID_BACKEND_AUTO) {
      51             : 
      52             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
      53             :       task_list->backend = GRID_BACKEND_HIP;
      54             : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      55             :       task_list->backend = GRID_BACKEND_GPU;
      56             : #else
      57        8048 :       task_list->backend = GRID_BACKEND_CPU;
      58             : #endif
      59             :     } else {
      60          20 :       task_list->backend = config.backend;
      61             :     }
      62             :   } else {
      63             :     // Reuse existing task list.
      64        5394 :     task_list = *task_list_out;
      65        5394 :     free(task_list->npts_local);
      66             :   }
      67             : 
      68             :   // Store npts_local for bounds checking and validation.
      69       13462 :   task_list->nlevels = nlevels;
      70       13462 :   size_t size = nlevels * 3 * sizeof(int);
      71       13462 :   task_list->npts_local = malloc(size);
      72       13462 :   assert(task_list->npts_local != NULL);
      73       13462 :   memcpy(task_list->npts_local, npts_local, size);
      74             : 
      75             :   // Always create reference backend because it might be needed for validation.
      76       13462 :   grid_ref_create_task_list(
      77             :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      78             :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      79             :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      80             :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      81             :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      82             : 
      83             :   // Create other backend, if selected.
      84       13462 :   switch (task_list->backend) {
      85             :   case GRID_BACKEND_REF:
      86             :     break; // was already created above
      87       13438 :   case GRID_BACKEND_CPU:
      88       13438 :     grid_cpu_create_task_list(
      89             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      90             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      91             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      92             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      93             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
      94       13438 :     break;
      95          20 :   case GRID_BACKEND_DGEMM:
      96          20 :     grid_dgemm_create_task_list(
      97             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      98             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      99             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     100             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     101             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
     102          20 :     break;
     103             : 
     104           0 :   case GRID_BACKEND_GPU:
     105             : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
     106             :     grid_gpu_create_task_list(
     107             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     108             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     109             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     110             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     111             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
     112             : #else
     113           0 :     fprintf(stderr,
     114             :             "Error: The GPU grid backend is not available. "
     115             :             "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
     116           0 :     abort();
     117             : #endif
     118           0 :     break;
     119             : 
     120           0 :   case GRID_BACKEND_HIP:
     121             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     122             :     grid_hip_create_task_list(
     123             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     124             :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     125             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     126             :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     127             :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     128             :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
     129             : #else
     130           0 :     fprintf(stderr, "Error: The HIP grid backend is not available. "
     131             :                     "Please re-compile with -D__OFFLOAD_HIP");
     132           0 :     abort();
     133             : #endif
     134           0 :     break;
     135             : 
     136           0 :   default:
     137           0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     138           0 :     abort();
     139       13462 :     break;
     140             :   }
     141             : 
     142       13462 :   *task_list_out = task_list;
     143       13462 : }
     144             : 
     145             : /*******************************************************************************
     146             :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     147             :  * \author Ole Schuett
     148             :  ******************************************************************************/
     149        8068 : void grid_free_task_list(grid_task_list *task_list) {
     150             : 
     151        8068 :   if (task_list->ref != NULL) {
     152        8068 :     grid_ref_free_task_list(task_list->ref);
     153        8068 :     task_list->ref = NULL;
     154             :   }
     155        8068 :   if (task_list->cpu != NULL) {
     156        8056 :     grid_cpu_free_task_list(task_list->cpu);
     157        8056 :     task_list->cpu = NULL;
     158             :   }
     159        8068 :   if (task_list->dgemm != NULL) {
     160           8 :     grid_dgemm_free_task_list(task_list->dgemm);
     161           8 :     task_list->dgemm = NULL;
     162             :   }
     163             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     164             :   if (task_list->gpu != NULL) {
     165             :     grid_gpu_free_task_list(task_list->gpu);
     166             :     task_list->gpu = NULL;
     167             :   }
     168             : #endif
     169             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     170             :   if (task_list->hip != NULL) {
     171             :     grid_hip_free_task_list(task_list->hip);
     172             :     task_list->hip = NULL;
     173             :   }
     174             : #endif
     175             : 
     176        8068 :   free(task_list->npts_local);
     177        8068 :   free(task_list);
     178        8068 : }
     179             : 
     180             : /*******************************************************************************
     181             :  * \brief Collocate all tasks of in given list onto given grids.
     182             :  *        See grid_task_list.h for details.
     183             :  * \author Ole Schuett
     184             :  ******************************************************************************/
     185      193883 : void grid_collocate_task_list(const grid_task_list *task_list,
     186             :                               const enum grid_func func, const int nlevels,
     187             :                               const int npts_local[nlevels][3],
     188             :                               const offload_buffer *pab_blocks,
     189             :                               offload_buffer *grids[nlevels]) {
     190             : 
     191             :   // Bounds check.
     192      193883 :   assert(task_list->nlevels == nlevels);
     193      961331 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     194      767448 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     195      767448 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     196      767448 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     197             :   }
     198             : 
     199      193883 :   switch (task_list->backend) {
     200          26 :   case GRID_BACKEND_REF:
     201          26 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     202             :                                  grids);
     203          26 :     break;
     204      193699 :   case GRID_BACKEND_CPU:
     205      193699 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     206             :                                  grids);
     207      193699 :     break;
     208         158 :   case GRID_BACKEND_DGEMM:
     209         158 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     210             :                                    grids);
     211         158 :     break;
     212             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     213             :   case GRID_BACKEND_GPU:
     214             :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     215             :                                  grids);
     216             :     break;
     217             : #endif
     218             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     219             :   case GRID_BACKEND_HIP:
     220             :     grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
     221             :                                  grids);
     222             :     break;
     223             : #endif
     224           0 :   default:
     225           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     226           0 :     abort();
     227      193883 :     break;
     228             :   }
     229             : 
     230             :   // Perform validation if enabled.
     231      193883 :   if (grid_library_get_config().validate) {
     232             :     // Allocate space for reference results.
     233          26 :     offload_buffer *grids_ref[nlevels];
     234         130 :     for (int level = 0; level < nlevels; level++) {
     235         104 :       const int npts_local_total =
     236         104 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     237         104 :       grids_ref[level] = NULL;
     238         104 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     239             :     }
     240             : 
     241             :     // Call reference implementation.
     242          26 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     243             :                                  grids_ref);
     244             : 
     245             :     // Compare results.
     246          26 :     const double tolerance = 1e-12;
     247          26 :     double max_rel_diff = 0.0;
     248         130 :     for (int level = 0; level < nlevels; level++) {
     249        2374 :       for (int i = 0; i < npts_local[level][0]; i++) {
     250       81112 :         for (int j = 0; j < npts_local[level][1]; j++) {
     251     3619188 :           for (int k = 0; k < npts_local[level][2]; k++) {
     252     3540346 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     253     3540346 :                             j * npts_local[level][0] + i;
     254     3540346 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     255     3540346 :             const double test_value = grids[level]->host_buffer[idx];
     256     3540346 :             const double diff = fabs(test_value - ref_value);
     257     3540346 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     258     3540346 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     259     3540346 :             if (rel_diff > tolerance) {
     260           0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     261           0 :               fprintf(stderr, "   diff:     %le\n", diff);
     262           0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     263           0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     264           0 :               fprintf(stderr, "   level:    %i\n", level);
     265           0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     266           0 :               abort();
     267             :             }
     268             :           }
     269             :         }
     270             :       }
     271         104 :       offload_free_buffer(grids_ref[level]);
     272         104 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     273             :     }
     274             :   }
     275      193883 : }
     276             : 
     277             : /*******************************************************************************
     278             :  * \brief Integrate all tasks of in given list from given grids.
     279             :  *        See grid_task_list.h for details.
     280             :  * \author Ole Schuett
     281             :  ******************************************************************************/
     282      181981 : void grid_integrate_task_list(
     283             :     const grid_task_list *task_list, const bool compute_tau, const int natoms,
     284             :     const int nlevels, const int npts_local[nlevels][3],
     285             :     const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
     286             :     offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
     287             : 
     288             :   // Bounds check.
     289      181981 :   assert(task_list->nlevels == nlevels);
     290      901911 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     291      719930 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     292      719930 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     293      719930 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     294             :   }
     295             : 
     296      181981 :   assert(forces == NULL || pab_blocks != NULL);
     297      181981 :   assert(virial == NULL || pab_blocks != NULL);
     298             : 
     299      181981 :   switch (task_list->backend) {
     300             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     301             :   case GRID_BACKEND_GPU:
     302             :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
     303             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     304             :     break;
     305             : #endif
     306             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     307             :   case GRID_BACKEND_HIP:
     308             :     grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
     309             :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     310             :                                  &virial[0][0]);
     311             :     break;
     312             : #endif
     313         156 :   case GRID_BACKEND_DGEMM:
     314         156 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     315             :                                    nlevels, pab_blocks, grids, hab_blocks,
     316             :                                    forces, virial);
     317         156 :     break;
     318      181801 :   case GRID_BACKEND_CPU:
     319      181801 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     320             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     321      181801 :     break;
     322          24 :   case GRID_BACKEND_REF:
     323          24 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     324             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     325          24 :     break;
     326           0 :   default:
     327           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     328           0 :     abort();
     329      181981 :     break;
     330             :   }
     331             : 
     332             :   // Perform validation if enabled.
     333      181981 :   if (grid_library_get_config().validate) {
     334             :     // Allocate space for reference results.
     335          24 :     const int hab_length = hab_blocks->size / sizeof(double);
     336          24 :     offload_buffer *hab_blocks_ref = NULL;
     337          24 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     338          24 :     double forces_ref[natoms][3], virial_ref[3][3];
     339             : 
     340             :     // Call reference implementation.
     341          48 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     342             :                                  pab_blocks, grids, hab_blocks_ref,
     343             :                                  (forces != NULL) ? forces_ref : NULL,
     344             :                                  (virial != NULL) ? virial_ref : NULL);
     345             : 
     346             :     // Compare hab.
     347          24 :     const double hab_tolerance = 1e-12;
     348          24 :     double hab_max_rel_diff = 0.0;
     349        2025 :     for (int i = 0; i < hab_length; i++) {
     350        2001 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     351        2001 :       const double test_value = hab_blocks->host_buffer[i];
     352        2001 :       const double diff = fabs(test_value - ref_value);
     353        2001 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     354        2001 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     355        2001 :       if (rel_diff > hab_tolerance) {
     356           0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     357           0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     358           0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     359           0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     360           0 :         fprintf(stderr, "   hab i:        %i\n", i);
     361           0 :         abort();
     362             :       }
     363             :     }
     364             : 
     365             :     // Compare forces.
     366          24 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     367          24 :     double forces_max_rel_diff = 0.0;
     368          24 :     if (forces != NULL) {
     369          14 :       for (int iatom = 0; iatom < natoms; iatom++) {
     370          40 :         for (int idir = 0; idir < 3; idir++) {
     371          30 :           const double ref_value = forces_ref[iatom][idir];
     372          30 :           const double test_value = forces[iatom][idir];
     373          30 :           const double diff = fabs(test_value - ref_value);
     374          30 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     375          30 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     376          30 :           if (rel_diff > forces_tolerance) {
     377           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     378           0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     379           0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     380           0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     381           0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     382           0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     383           0 :             abort();
     384             :           }
     385             :         }
     386             :       }
     387             :     }
     388             : 
     389             :     // Compare virial.
     390          24 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     391          24 :     double virial_max_rel_diff = 0.0;
     392          24 :     if (virial != NULL) {
     393           0 :       for (int i = 0; i < 3; i++) {
     394           0 :         for (int j = 0; j < 3; j++) {
     395           0 :           const double ref_value = virial_ref[i][j];
     396           0 :           const double test_value = virial[i][j];
     397           0 :           const double diff = fabs(test_value - ref_value);
     398           0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     399           0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     400           0 :           if (rel_diff > virial_tolerance) {
     401           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     402           0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     403           0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     404           0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     405           0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     406           0 :             abort();
     407             :           }
     408             :         }
     409             :       }
     410             :     }
     411             : 
     412          24 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     413             :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     414          24 :     offload_free_buffer(hab_blocks_ref);
     415             :   }
     416      181981 : }
     417             : 
     418             : // EOF

Generated by: LCOV version 1.15