Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_shard.h" 16 : 17 : /******************************************************************************* 18 : * \brief Internal routine for finding a power of two greater than given number. 19 : * \author Ole Schuett 20 : ******************************************************************************/ 21 5986039 : static int next_power2(const int start) { 22 5986039 : int candidate = 2; 23 20968564 : while (candidate < start) { 24 14982525 : candidate *= 2; 25 : } 26 5986039 : return candidate; 27 : } 28 : 29 : /******************************************************************************* 30 : * \brief Internal routine for finding a prime greater equal than given number. 31 : * \author Ole Schuett 32 : ******************************************************************************/ 33 5986039 : static int next_prime(const int start) { 34 5986039 : int candidate = start, divisor = 0; 35 23813942 : while (divisor < candidate) { 36 363308643 : for (divisor = 2; divisor < candidate; divisor++) { 37 357322604 : if (candidate % divisor == 0) { 38 11841864 : candidate++; 39 11841864 : break; 40 : } 41 : } 42 : } 43 5986039 : return candidate; 44 : } 45 : 46 : /******************************************************************************* 47 : * \brief Internal routine for initializing a shard's hashtable. 48 : * \author Ole Schuett 49 : ******************************************************************************/ 50 5986039 : static void hashtable_init(dbm_shard_t *shard) { 51 : // Choosing size as power of two allows to replace modulo with bitwise AND. 52 11972078 : shard->hashtable_size = 53 5986039 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated); 54 5986039 : shard->hashtable_prime = next_prime(shard->hashtable_size); 55 5986039 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 56 5986039 : assert(shard->hashtable != NULL); 57 5986039 : } 58 : 59 : /******************************************************************************* 60 : * \brief Internal routine for initializing a shard. 61 : * \author Ole Schuett 62 : ******************************************************************************/ 63 1707391 : void dbm_shard_init(dbm_shard_t *shard) { 64 1707391 : shard->nblocks = 0; 65 1707391 : shard->nblocks_allocated = 0; 66 1707391 : shard->blocks = NULL; 67 1707391 : hashtable_init(shard); 68 1707391 : shard->data_size = 0; 69 1707391 : shard->data_promised = 0; 70 1707391 : shard->data_allocated = 0; 71 1707391 : shard->data = NULL; 72 1707391 : omp_init_lock(&shard->lock); 73 1707391 : } 74 : 75 : /******************************************************************************* 76 : * \brief Internal routine for copying content of shard_b into shard_a. 77 : * \author Ole Schuett 78 : ******************************************************************************/ 79 428240 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 80 428240 : assert(shard_a != NULL && shard_b != NULL); 81 : 82 428240 : if (shard_a->nblocks_allocated < shard_b->nblocks) { 83 401937 : free(shard_a->blocks); 84 401937 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t)); 85 401937 : shard_a->nblocks_allocated = shard_b->nblocks; 86 401937 : assert(shard_a->blocks != NULL); 87 : } 88 428240 : shard_a->nblocks = shard_b->nblocks; 89 : 90 428240 : if (shard_a->hashtable_size < shard_b->hashtable_size) { 91 404652 : free(shard_a->hashtable); 92 404652 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 93 404652 : assert(shard_a->hashtable != NULL); 94 : } 95 428240 : shard_a->hashtable_size = shard_b->hashtable_size; 96 428240 : shard_a->hashtable_prime = shard_b->hashtable_prime; 97 : 98 428240 : if (shard_a->data_allocated < shard_b->data_size) { 99 401937 : free(shard_a->data); 100 401937 : shard_a->data = malloc(shard_b->data_size * sizeof(double)); 101 401937 : shard_a->data_allocated = shard_b->data_size; 102 401937 : assert(shard_a->data != NULL); 103 : } 104 428240 : shard_a->data_size = shard_b->data_size; 105 : 106 428240 : if (shard_a->blocks != NULL) { 107 401967 : memcpy(shard_a->blocks, shard_b->blocks, 108 401967 : shard_b->nblocks * sizeof(dbm_block_t)); 109 : } 110 428240 : if (shard_a->hashtable != NULL) { 111 428240 : memcpy(shard_a->hashtable, shard_b->hashtable, 112 428240 : shard_b->hashtable_size * sizeof(int)); 113 : } 114 428240 : if (shard_a->data != NULL) { 115 401967 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 116 : } 117 428240 : } 118 : 119 : /******************************************************************************* 120 : * \brief Internal routine for releasing a shard. 121 : * \author Ole Schuett 122 : ******************************************************************************/ 123 1707391 : void dbm_shard_release(dbm_shard_t *shard) { 124 1707391 : free(shard->blocks); 125 1707391 : free(shard->hashtable); 126 1707391 : free(shard->data); 127 1707391 : omp_destroy_lock(&shard->lock); 128 1707391 : } 129 : 130 : /******************************************************************************* 131 : * \brief Private hash function based on Cantor pairing function. 132 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 133 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 134 : * Using unsigned int to return a positive number even after overflow. 135 : * \author Ole Schuett 136 : ******************************************************************************/ 137 208800905 : static inline unsigned int hash(const unsigned int row, 138 : const unsigned int col) { 139 208800905 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 140 : } 141 : 142 : /******************************************************************************* 143 : * \brief Internal routine for masking a slot in the hash-table. 144 : * \author Hans Pabst 145 : ******************************************************************************/ 146 208800905 : static inline int hashtable_mask(const dbm_shard_t *shard) { 147 208800905 : return shard->hashtable_size - 1; 148 : } 149 : 150 : /******************************************************************************* 151 : * \brief Private routine for inserting a block into a shard's hashtable. 152 : * \author Ole Schuett 153 : ******************************************************************************/ 154 97710431 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 155 97710431 : assert(0 <= block_idx && block_idx < shard->nblocks); 156 97710431 : const dbm_block_t *blk = &shard->blocks[block_idx]; 157 97710431 : const unsigned int h = hash(blk->row, blk->col); 158 97710431 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard); 159 97759511 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 160 102952734 : for (int i = slot; i < shard->hashtable_size; ++i) { 161 102928194 : if (shard->hashtable[i] == 0) { // 0 means empty 162 97710431 : shard->hashtable[i] = block_idx + 1; // 1-based 163 97710431 : return; 164 : } 165 : } 166 : } 167 : } 168 : 169 : /******************************************************************************* 170 : * \brief Internal routine for looking up a block from a shard. 171 : * \author Ole Schuett 172 : ******************************************************************************/ 173 111090474 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 174 : const int col) { 175 111090474 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard); 176 70737 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 177 117337324 : for (int i = slot; i < shard->hashtable_size; ++i) { 178 117266587 : const int block_idx = shard->hashtable[i]; 179 117266587 : if (block_idx == 0) { // 1-based, 0 means empty 180 : return NULL; // block not found 181 : } 182 74369988 : assert(0 < block_idx && block_idx <= shard->nblocks); 183 74369988 : dbm_block_t *blk = &shard->blocks[block_idx - 1]; 184 74369988 : if (blk->row == row && blk->col == col) { 185 : return blk; 186 : } 187 : } 188 : } 189 : } 190 : 191 : /******************************************************************************* 192 : * \brief Internal routine for allocating the metadata of a new block. 193 : * \author Ole Schuett 194 : ******************************************************************************/ 195 50745646 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 196 : const int col, const int block_size) { 197 : // Grow blocks array if necessary. 198 50745646 : if (shard->nblocks_allocated < shard->nblocks + 1) { 199 4278648 : shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1); 200 4278648 : shard->blocks = 201 4278648 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 202 4278648 : assert(shard->blocks != NULL); 203 : 204 : // rebuild hashtable 205 4278648 : free(shard->hashtable); 206 4278648 : hashtable_init(shard); 207 51243433 : for (int i = 0; i < shard->nblocks; i++) { 208 46964785 : hashtable_insert(shard, i); 209 : } 210 : } 211 : 212 50745646 : const int new_block_idx = shard->nblocks; 213 50745646 : shard->nblocks++; 214 50745646 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 215 50745646 : new_block->row = row; 216 50745646 : new_block->col = col; 217 50745646 : new_block->offset = shard->data_promised; 218 50745646 : shard->data_promised += block_size; 219 : // The data_size will be increased after the memory is allocated and zeroed. 220 50745646 : hashtable_insert(shard, new_block_idx); 221 50745646 : return new_block; 222 : } 223 : 224 : /******************************************************************************* 225 : * \brief Internal routine for allocating and zeroing any promised block's data. 226 : * \author Ole Schuett 227 : ******************************************************************************/ 228 12738633 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 229 : 230 : // Reallocate data array if necessary. 231 12738633 : if (shard->data_promised > shard->data_allocated) { 232 1397893 : shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised; 233 1397893 : shard->data = realloc(shard->data, shard->data_allocated * sizeof(double)); 234 1397893 : assert(shard->data != NULL); 235 : } 236 : 237 : // Zero new blocks. 238 : // The following memset is usually the first touch of the memory, which leads 239 : // to frequent page faults. The executing thread determines the NUMA location 240 12738633 : if (shard->data_promised > shard->data_size) { 241 12421959 : const int tail = shard->data_promised - shard->data_size; 242 12421959 : memset(&shard->data[shard->data_size], 0, tail * sizeof(double)); 243 12421959 : shard->data_size = shard->data_promised; 244 : } 245 12738633 : } 246 : 247 : /******************************************************************************* 248 : * \brief Internal routine for getting block or promising a new one. 249 : * \author Ole Schuett 250 : ******************************************************************************/ 251 27905596 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 252 : const int col, 253 : const int block_size) { 254 27905596 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 255 27905596 : if (existing_blk != NULL) { 256 : return existing_blk; 257 : } else { 258 26073627 : return dbm_shard_promise_new_block(shard, row, col, block_size); 259 : } 260 : } 261 : 262 : /******************************************************************************* 263 : * \brief Internal routine for getting block or allocating a new one. 264 : * \author Ole Schuett 265 : ******************************************************************************/ 266 38933982 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 267 : const int col, 268 : const int block_size) { 269 38933982 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 270 38933982 : if (existing_blk != NULL) { 271 : return existing_blk; 272 : } 273 : 274 : // Create a new block. 275 11169329 : dbm_block_t *new_blk = 276 11169329 : dbm_shard_promise_new_block(shard, row, col, block_size); 277 11169329 : dbm_shard_allocate_promised_blocks(shard); 278 : 279 11169329 : return new_blk; 280 : } 281 : 282 : // EOF