LCOV - code coverage report
Current view: top level - src/grid/common - grid_library.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:69d170a) Lines: 76 82 92.7 %
Date: 2024-12-23 07:26:16 Functions: 8 8 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <omp.h>
      10             : #include <stddef.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "../../offload/offload_runtime.h"
      16             : #include "grid_common.h"
      17             : #include "grid_constants.h"
      18             : #include "grid_library.h"
      19             : 
      20             : // counter dimensions
      21             : #define GRID_NBACKENDS 5
      22             : #define GRID_NKERNELS 4
      23             : #define GRID_MAX_LP 20
      24             : 
      25             : typedef struct {
      26             :   grid_sphere_cache sphere_cache;
      27             :   long counters[GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP];
      28             : } grid_library_globals;
      29             : 
      30             : static grid_library_globals **per_thread_globals = NULL;
      31             : static bool library_initialized = false;
      32             : static int max_threads = 0;
      33             : static grid_library_config config = {
      34             :     .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
      35             : 
      36             : #if !defined(_OPENMP)
      37             : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
      38             : #endif
      39             : 
      40             : #if defined(NDEBUG)
      41             : #error                                                                         \
      42             :     "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
      43             : #endif
      44             : 
      45             : /*******************************************************************************
      46             :  * \brief Initializes the grid library.
      47             :  * \author Ole Schuett
      48             :  ******************************************************************************/
      49        9176 : void grid_library_init(void) {
      50        9176 :   if (library_initialized) {
      51           0 :     printf("Error: Grid library was already initialized.\n");
      52           0 :     abort();
      53             :   }
      54             : 
      55             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      56             :   // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
      57             :   // CUDA does not allow to increase this limit after a kernel was launched.
      58             :   // Unfortunately, the required memory is hard to predict because we neither
      59             :   // know which tasks will be run nor how many thread blocks the available GPU
      60             :   // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
      61             :   offloadEnsureMallocHeapSize(64 * 1024 * 1024);
      62             : #endif
      63             : 
      64        9176 :   max_threads = omp_get_max_threads();
      65        9176 :   per_thread_globals = malloc(max_threads * sizeof(grid_library_globals *));
      66        9176 :   assert(per_thread_globals != NULL);
      67             : 
      68             : // Using parallel regions to ensure memory is allocated near a thread's core.
      69             : #pragma omp parallel default(none) shared(per_thread_globals)                  \
      70             :     num_threads(max_threads)
      71             :   {
      72             :     const int ithread = omp_get_thread_num();
      73             :     per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
      74             :     assert(per_thread_globals[ithread] != NULL);
      75             :     memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
      76             :   }
      77             : 
      78        9176 :   library_initialized = true;
      79        9176 : }
      80             : 
      81             : /*******************************************************************************
      82             :  * \brief Finalizes the grid library.
      83             :  * \author Ole Schuett
      84             :  ******************************************************************************/
      85        9176 : void grid_library_finalize(void) {
      86        9176 :   if (!library_initialized) {
      87           0 :     printf("Error: Grid library is not initialized.\n");
      88           0 :     abort();
      89             :   }
      90             : 
      91       18352 :   for (int i = 0; i < max_threads; i++) {
      92        9176 :     grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache);
      93        9176 :     free(per_thread_globals[i]);
      94             :   }
      95        9176 :   free(per_thread_globals);
      96        9176 :   per_thread_globals = NULL;
      97        9176 :   library_initialized = false;
      98        9176 : }
      99             : 
     100             : /*******************************************************************************
     101             :  * \brief Returns a pointer to the thread local sphere cache.
     102             :  * \author Ole Schuett
     103             :  ******************************************************************************/
     104   123087524 : grid_sphere_cache *grid_library_get_sphere_cache(void) {
     105   123087524 :   const int ithread = omp_get_thread_num();
     106   123087524 :   assert(ithread < max_threads);
     107   123087524 :   return &per_thread_globals[ithread]->sphere_cache;
     108             : }
     109             : 
     110             : /*******************************************************************************
     111             :  * \brief Configures the grid library.
     112             :  * \author Ole Schuett
     113             :  ******************************************************************************/
     114        9292 : void grid_library_set_config(const enum grid_backend backend,
     115             :                              const bool validate, const bool apply_cutoff) {
     116        9292 :   config.backend = backend;
     117        9292 :   config.validate = validate;
     118        9292 :   config.apply_cutoff = apply_cutoff;
     119        9292 : }
     120             : 
     121             : /*******************************************************************************
     122             :  * \brief Returns the library config.
     123             :  * \author Ole Schuett
     124             :  ******************************************************************************/
     125      387444 : grid_library_config grid_library_get_config(void) { return config; }
     126             : 
     127             : /*******************************************************************************
     128             :  * \brief Adds given increment to counter specified by lp, backend, and kernel.
     129             :  * \author Ole Schuett
     130             :  ******************************************************************************/
     131   128388866 : void grid_library_counter_add(const int lp, const enum grid_backend backend,
     132             :                               const enum grid_library_kernel kernel,
     133             :                               const int increment) {
     134   128388866 :   assert(lp >= 0);
     135   128388866 :   assert(kernel < GRID_NKERNELS);
     136   128388866 :   const int back = backend - GRID_BACKEND_REF;
     137   128388866 :   assert(back < GRID_NBACKENDS);
     138   128388866 :   const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
     139   128388866 :                   imin(lp, GRID_MAX_LP - 1);
     140   128388866 :   const int ithread = omp_get_thread_num();
     141   128388866 :   assert(ithread < max_threads);
     142   128388866 :   per_thread_globals[ithread]->counters[idx] += increment;
     143   128388866 : }
     144             : 
     145             : /*******************************************************************************
     146             :  * \brief Comperator passed to qsort to compare two counters.
     147             :  * \author Ole Schuett
     148             :  ******************************************************************************/
     149    15601000 : static int compare_counters(const void *a, const void *b) {
     150    15601000 :   return *(long *)b - *(long *)a;
     151             : }
     152             : 
     153             : /*******************************************************************************
     154             :  * \brief Prints statistics gathered by the grid library.
     155             :  * \author Ole Schuett
     156             :  ******************************************************************************/
     157        9294 : void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
     158             :                               const int mpi_comm,
     159             :                               void (*print_func)(char *, int),
     160        9294 :                               const int output_unit) {
     161        9294 :   if (!library_initialized) {
     162           0 :     printf("Error: Grid library is not initialized.\n");
     163           0 :     abort();
     164             :   }
     165             : 
     166             :   // Sum all counters across threads and mpi ranks.
     167        9294 :   const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
     168        9294 :   long counters[ncounters][2];
     169        9294 :   memset(counters, 0, ncounters * 2 * sizeof(long));
     170        9294 :   double total = 0.0;
     171     3726894 :   for (int i = 0; i < ncounters; i++) {
     172     3717600 :     counters[i][1] = i; // needed as inverse index after qsort
     173     7435200 :     for (int j = 0; j < max_threads; j++) {
     174     3717600 :       counters[i][0] += per_thread_globals[j]->counters[i];
     175             :     }
     176     3717600 :     mpi_sum_func(&counters[i][0], mpi_comm);
     177     3717600 :     total += counters[i][0];
     178             :   }
     179             : 
     180             :   // Sort counters.
     181        9294 :   qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
     182             : 
     183             :   // Print counters.
     184        9294 :   print_func("\n", output_unit);
     185        9294 :   print_func(" ----------------------------------------------------------------"
     186             :              "---------------\n",
     187             :              output_unit);
     188        9294 :   print_func(" -                                                               "
     189             :              "              -\n",
     190             :              output_unit);
     191        9294 :   print_func(" -                                GRID STATISTICS                "
     192             :              "              -\n",
     193             :              output_unit);
     194        9294 :   print_func(" -                                                               "
     195             :              "              -\n",
     196             :              output_unit);
     197        9294 :   print_func(" ----------------------------------------------------------------"
     198             :              "---------------\n",
     199             :              output_unit);
     200        9294 :   print_func(" LP    KERNEL             BACKEND                              "
     201             :              "COUNT     PERCENT\n",
     202             :              output_unit);
     203             : 
     204        9294 :   const char *kernel_names[] = {"collocate ortho", "integrate ortho",
     205             :                                 "collocate general", "integrate general"};
     206        9294 :   const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
     207             : 
     208     3726894 :   for (int i = 0; i < ncounters; i++) {
     209     3717600 :     if (counters[i][0] == 0)
     210     3667338 :       continue; // skip empty counters
     211       50262 :     const double percent = 100.0 * counters[i][0] / total;
     212       50262 :     const int idx = counters[i][1];
     213       50262 :     const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
     214       50262 :     const int back = idx / backend_stride;
     215       50262 :     const int kern = (idx % backend_stride) / GRID_MAX_LP;
     216       50262 :     const int lp = (idx % backend_stride) % GRID_MAX_LP;
     217       50262 :     char buffer[100];
     218       50262 :     snprintf(buffer, sizeof(buffer), " %-5i %-17s  %-6s  %34li %10.2f%%\n", lp,
     219             :              kernel_names[kern], backend_names[back], counters[i][0], percent);
     220       50262 :     print_func(buffer, output_unit);
     221             :   }
     222             : 
     223        9294 :   print_func(" ----------------------------------------------------------------"
     224             :              "---------------\n",
     225             :              output_unit);
     226        9294 : }
     227             : 
     228             : // EOF

Generated by: LCOV version 1.15