LCOV - code coverage report
Current view: top level - src/dbm - dbm_library.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4c33f95) Lines: 80 91 87.9 %
Date: 2025-01-30 06:53:08 Functions: 6 6 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <inttypes.h>
      10             : #include <omp.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "dbm_library.h"
      16             : #include "dbm_mempool.h"
      17             : #include "dbm_mpi.h"
      18             : 
      19             : #define DBM_NUM_COUNTERS 64
      20             : 
      21             : static int64_t **per_thread_counters = NULL;
      22             : static bool library_initialized = false;
      23             : static int max_threads = 0;
      24             : 
      25             : #if !defined(_OPENMP)
      26             : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
      27             : #endif
      28             : 
      29             : /*******************************************************************************
      30             :  * \brief Initializes the DBM library.
      31             :  * \author Ole Schuett
      32             :  ******************************************************************************/
      33        9208 : void dbm_library_init(void) {
      34        9208 :   assert(omp_get_num_threads() == 1);
      35             : 
      36        9208 :   if (library_initialized) {
      37           0 :     fprintf(stderr, "DBM library was already initialized.\n");
      38           0 :     abort();
      39             :   }
      40             : 
      41        9208 :   max_threads = omp_get_max_threads();
      42        9208 :   per_thread_counters = malloc(max_threads * sizeof(int64_t *));
      43        9208 :   assert(per_thread_counters != NULL);
      44             : 
      45             :   // Using parallel regions to ensure memory is allocated near a thread's core.
      46             : #pragma omp parallel default(none) shared(per_thread_counters)                 \
      47             :     num_threads(max_threads)
      48             :   {
      49             :     const int ithread = omp_get_thread_num();
      50             :     const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
      51             :     per_thread_counters[ithread] = malloc(counters_size);
      52             :     assert(per_thread_counters[ithread] != NULL);
      53             :     memset(per_thread_counters[ithread], 0, counters_size);
      54             :   }
      55             : 
      56        9208 :   library_initialized = true;
      57        9208 : }
      58             : 
      59             : /*******************************************************************************
      60             :  * \brief Finalizes the DBM library.
      61             :  * \author Ole Schuett
      62             :  ******************************************************************************/
      63        9208 : void dbm_library_finalize(void) {
      64        9208 :   assert(omp_get_num_threads() == 1);
      65             : 
      66        9208 :   if (!library_initialized) {
      67           0 :     fprintf(stderr, "Error: DBM library is not initialized.\n");
      68           0 :     abort();
      69             :   }
      70             : 
      71       18416 :   for (int i = 0; i < max_threads; i++) {
      72        9208 :     free(per_thread_counters[i]);
      73             :   }
      74        9208 :   free(per_thread_counters);
      75        9208 :   per_thread_counters = NULL;
      76             : 
      77        9208 :   dbm_mempool_clear();
      78        9208 :   library_initialized = false;
      79        9208 : }
      80             : 
      81             : /*******************************************************************************
      82             :  * \brief Computes min(3, floor(log10(x))).
      83             :  * \author Ole Schuett
      84             :  ******************************************************************************/
      85    65010777 : static int floorlog10(const int x) {
      86    65010777 :   if (x >= 1000) {
      87             :     return 3;
      88             :   }
      89    65010393 :   if (x >= 100) {
      90             :     return 2;
      91             :   }
      92    62425394 :   if (x >= 10) {
      93    18550700 :     return 1;
      94             :   }
      95             :   return 0;
      96             : }
      97             : 
      98             : /*******************************************************************************
      99             :  * \brief Add given block multiplication to stats. This routine is thread-safe.
     100             :  * \author Ole Schuett
     101             :  ******************************************************************************/
     102    21670259 : void dbm_library_counter_increment(const int m, const int n, const int k) {
     103    21670259 :   const int ithread = omp_get_thread_num();
     104    21670259 :   assert(ithread < max_threads);
     105    21670259 :   const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
     106    21670259 :   per_thread_counters[ithread][idx]++;
     107    21670259 : }
     108             : 
     109             : /*******************************************************************************
     110             :  * \brief Comperator passed to qsort to compare two counters.
     111             :  * \author Ole Schuett
     112             :  ******************************************************************************/
     113     1793420 : static int compare_counters(const void *a, const void *b) {
     114     1793420 :   return *(const int64_t *)b - *(const int64_t *)a;
     115             : }
     116             : 
     117             : /*******************************************************************************
     118             :  * \brief Prints statistics gathered by the DBM library.
     119             :  * \author Ole Schuett
     120             :  ******************************************************************************/
     121        9326 : void dbm_library_print_stats(const int fortran_comm,
     122             :                              void (*print_func)(char *, int),
     123             :                              const int output_unit) {
     124        9326 :   assert(omp_get_num_threads() == 1);
     125             : 
     126        9326 :   if (!library_initialized) {
     127           0 :     fprintf(stderr, "Error: DBM library is not initialized.\n");
     128           0 :     abort();
     129             :   }
     130             : 
     131        9326 :   const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm);
     132             :   // Sum all counters across threads and mpi ranks.
     133        9326 :   int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
     134        9326 :   double total = 0.0;
     135      606190 :   for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
     136      596864 :     counters[i][1] = i; // needed as inverse index after qsort
     137     1193728 :     for (int j = 0; j < max_threads; j++) {
     138      596864 :       counters[i][0] += per_thread_counters[j][i];
     139             :     }
     140      596864 :     dbm_mpi_sum_int64(&counters[i][0], 1, comm);
     141      596864 :     total += counters[i][0];
     142             :   }
     143             : 
     144             :   // Sort counters.
     145        9326 :   qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
     146             : 
     147             :   // Print counters.
     148        9326 :   print_func("\n", output_unit);
     149        9326 :   print_func(" ----------------------------------------------------------------"
     150             :              "---------------\n",
     151             :              output_unit);
     152        9326 :   print_func(" -                                                               "
     153             :              "              -\n",
     154             :              output_unit);
     155        9326 :   print_func(" -                                DBM STATISTICS                 "
     156             :              "              -\n",
     157             :              output_unit);
     158        9326 :   print_func(" -                                                               "
     159             :              "              -\n",
     160             :              output_unit);
     161        9326 :   print_func(" ----------------------------------------------------------------"
     162             :              "---------------\n",
     163             :              output_unit);
     164        9326 :   print_func("    M  x    N  x    K                                          "
     165             :              "COUNT     PERCENT\n",
     166             :              output_unit);
     167             : 
     168        9326 :   const char *labels[] = {"?", "??", "???", ">999"};
     169        9326 :   char buffer[100];
     170      606190 :   for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
     171      596864 :     if (counters[i][0] == 0) {
     172      594088 :       continue; // skip empty counters
     173             :     }
     174        2776 :     const double percent = 100.0 * counters[i][0] / total;
     175        2776 :     const int idx = counters[i][1];
     176        2776 :     const int m = (idx % 64) / 16;
     177        2776 :     const int n = (idx % 16) / 4;
     178        2776 :     const int k = (idx % 4) / 1;
     179        2776 :     snprintf(buffer, sizeof(buffer),
     180             :              " %4s  x %4s  x %4s %46" PRId64 " %10.2f%%\n", labels[m],
     181             :              labels[n], labels[k], counters[i][0], percent);
     182        2776 :     print_func(buffer, output_unit);
     183             :   }
     184             : 
     185        9326 :   print_func(" ----------------------------------------------------------------"
     186             :              "---------------\n",
     187             :              output_unit);
     188             : 
     189        9326 :   dbm_memstats_t memstats;
     190        9326 :   dbm_mempool_statistics(&memstats);
     191        9326 :   dbm_mpi_max_uint64(&memstats.device_mallocs, 1, comm);
     192        9326 :   dbm_mpi_max_uint64(&memstats.host_mallocs, 1, comm);
     193             : 
     194        9326 :   if (0 != memstats.device_mallocs || 0 != memstats.host_mallocs) {
     195         360 :     print_func(" Memory consumption                           "
     196             :                " Number of allocations  Size [MiB]\n",
     197             :                output_unit);
     198             :   }
     199        9326 :   if (0 < memstats.device_mallocs) {
     200           0 :     dbm_mpi_max_uint64(&memstats.device_size, 1, comm);
     201           0 :     snprintf(buffer, sizeof(buffer),
     202             :              " Device                                        "
     203             :              " %20" PRIuPTR "  %10" PRIuPTR "\n",
     204           0 :              (uintptr_t)memstats.device_mallocs,
     205           0 :              (uintptr_t)((memstats.device_size + (512U << 10)) >> 20));
     206           0 :     print_func(buffer, output_unit);
     207             :   }
     208        9326 :   if (0 < memstats.host_mallocs) {
     209         360 :     dbm_mpi_max_uint64(&memstats.host_size, 1, comm);
     210         360 :     snprintf(buffer, sizeof(buffer),
     211             :              " Host                                          "
     212             :              " %20" PRIuPTR "  %10" PRIuPTR "\n",
     213         360 :              (uintptr_t)memstats.host_mallocs,
     214         360 :              (uintptr_t)((memstats.host_size + (512U << 10)) >> 20));
     215         360 :     print_func(buffer, output_unit);
     216             :   }
     217        9326 :   if (0 < memstats.device_mallocs || 0 < memstats.host_mallocs) {
     218         360 :     print_func(
     219             :         " ----------------------------------------------------------------"
     220             :         "---------------\n",
     221             :         output_unit);
     222             :   }
     223        9326 : }
     224             : 
     225             : // EOF

Generated by: LCOV version 1.15