Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : 8 : #include <assert.h> 9 : #include <inttypes.h> 10 : #include <omp.h> 11 : #include <stdbool.h> 12 : #include <stdio.h> 13 : #include <stdlib.h> 14 : #include <string.h> 15 : 16 : #include "dbm_library.h" 17 : #include "dbm_mempool.h" 18 : #include "dbm_mpi.h" 19 : 20 : #define DBM_NUM_COUNTERS 64 21 : 22 : static int64_t **per_thread_counters = NULL; 23 : static bool library_initialized = false; 24 : static int max_threads = 0; 25 : 26 : #if !defined(_OPENMP) 27 : #error "OpenMP is required. Please add -fopenmp to your C compiler flags." 28 : #endif 29 : 30 : /******************************************************************************* 31 : * \brief Initializes the DBM library. 32 : * \author Ole Schuett 33 : ******************************************************************************/ 34 8534 : void dbm_library_init(void) { 35 8534 : assert(omp_get_num_threads() == 1); 36 : 37 8534 : if (library_initialized) { 38 0 : fprintf(stderr, "DBM library was already initialized.\n"); 39 0 : abort(); 40 : } 41 : 42 8534 : max_threads = omp_get_max_threads(); 43 8534 : per_thread_counters = malloc(max_threads * sizeof(int64_t *)); 44 8534 : assert(per_thread_counters != NULL); 45 : 46 : // Using parallel regions to ensure memory is allocated near a thread's core. 47 : #pragma omp parallel default(none) shared(per_thread_counters) \ 48 : num_threads(max_threads) 49 : { 50 : const int ithread = omp_get_thread_num(); 51 : const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t); 52 : per_thread_counters[ithread] = malloc(counters_size); 53 : assert(per_thread_counters[ithread] != NULL); 54 : memset(per_thread_counters[ithread], 0, counters_size); 55 : } 56 : 57 8534 : library_initialized = true; 58 8534 : } 59 : 60 : /******************************************************************************* 61 : * \brief Finalizes the DBM library. 62 : * \author Ole Schuett 63 : ******************************************************************************/ 64 8534 : void dbm_library_finalize(void) { 65 8534 : assert(omp_get_num_threads() == 1); 66 : 67 8534 : if (!library_initialized) { 68 0 : fprintf(stderr, "Error: DBM library is not initialized.\n"); 69 0 : abort(); 70 : } 71 : 72 17068 : for (int i = 0; i < max_threads; i++) { 73 8534 : free(per_thread_counters[i]); 74 : } 75 8534 : free(per_thread_counters); 76 8534 : per_thread_counters = NULL; 77 : 78 8534 : dbm_mempool_clear(); 79 8534 : library_initialized = false; 80 8534 : } 81 : 82 : /******************************************************************************* 83 : * \brief Computes min(3, floor(log10(x))). 84 : * \author Ole Schuett 85 : ******************************************************************************/ 86 65164005 : static int floorlog10(const int x) { 87 65164005 : if (x >= 1000) { 88 : return 3; 89 : } 90 65163621 : if (x >= 100) { 91 : return 2; 92 : } 93 62578359 : if (x >= 10) { 94 18548837 : return 1; 95 : } 96 : return 0; 97 : } 98 : 99 : /******************************************************************************* 100 : * \brief Add given block multiplication to stats. This routine is thread-safe. 101 : * \author Ole Schuett 102 : ******************************************************************************/ 103 21721335 : void dbm_library_counter_increment(const int m, const int n, const int k) { 104 21721335 : const int ithread = omp_get_thread_num(); 105 21721335 : assert(ithread < max_threads); 106 21721335 : const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k); 107 21721335 : per_thread_counters[ithread][idx]++; 108 21721335 : } 109 : 110 : /******************************************************************************* 111 : * \brief Comperator passed to qsort to compare two counters. 112 : * \author Ole Schuett 113 : ******************************************************************************/ 114 1663952 : static int compare_counters(const void *a, const void *b) { 115 1663952 : return *(const int64_t *)b - *(const int64_t *)a; 116 : } 117 : 118 : /******************************************************************************* 119 : * \brief Prints statistics gathered by the DBM library. 120 : * \author Ole Schuett 121 : ******************************************************************************/ 122 8652 : void dbm_library_print_stats(const int fortran_comm, 123 : void (*print_func)(char *, int), 124 : const int output_unit) { 125 8652 : assert(omp_get_num_threads() == 1); 126 : 127 8652 : if (!library_initialized) { 128 0 : fprintf(stderr, "Error: DBM library is not initialized.\n"); 129 0 : abort(); 130 : } 131 : 132 8652 : const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm); 133 : // Sum all counters across threads and mpi ranks. 134 8652 : int64_t counters[DBM_NUM_COUNTERS][2]; 135 8652 : memset(counters, 0, DBM_NUM_COUNTERS * 2 * sizeof(int64_t)); 136 8652 : double total = 0.0; 137 562380 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) { 138 553728 : counters[i][1] = i; // needed as inverse index after qsort 139 1107456 : for (int j = 0; j < max_threads; j++) { 140 553728 : counters[i][0] += per_thread_counters[j][i]; 141 : } 142 553728 : dbm_mpi_sum_int64(&counters[i][0], 1, comm); 143 553728 : total += counters[i][0]; 144 : } 145 : 146 : // Sort counters. 147 8652 : qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters); 148 : 149 : // Print counters. 150 8652 : print_func("\n", output_unit); 151 8652 : print_func(" ----------------------------------------------------------------" 152 : "---------------\n", 153 : output_unit); 154 8652 : print_func(" - " 155 : " -\n", 156 : output_unit); 157 8652 : print_func(" - DBM STATISTICS " 158 : " -\n", 159 : output_unit); 160 8652 : print_func(" - " 161 : " -\n", 162 : output_unit); 163 8652 : print_func(" ----------------------------------------------------------------" 164 : "---------------\n", 165 : output_unit); 166 8652 : print_func(" M x N x K " 167 : "COUNT PERCENT\n", 168 : output_unit); 169 : 170 8652 : const char *labels[] = {"?", "??", "???", ">999"}; 171 562380 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) { 172 553728 : if (counters[i][0] == 0) { 173 551002 : continue; // skip empty counters 174 : } 175 2726 : const double percent = 100.0 * counters[i][0] / total; 176 2726 : const int idx = counters[i][1]; 177 2726 : const int m = (idx % 64) / 16; 178 2726 : const int n = (idx % 16) / 4; 179 2726 : const int k = (idx % 4) / 1; 180 2726 : char buffer[100]; 181 2726 : snprintf(buffer, sizeof(buffer), 182 : " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m], 183 : labels[n], labels[k], counters[i][0], percent); 184 2726 : print_func(buffer, output_unit); 185 : } 186 : 187 8652 : print_func(" ----------------------------------------------------------------" 188 : "---------------\n", 189 : output_unit); 190 8652 : } 191 : 192 : // EOF