Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_shard.h" 16 : 17 : /******************************************************************************* 18 : * \brief Internal routine for finding a power of two greater than given number. 19 : * \author Ole Schuett 20 : ******************************************************************************/ 21 5879896 : static int next_power2(const int start) { 22 5879896 : int candidate = 2; 23 20724919 : while (candidate < start) { 24 14845023 : candidate *= 2; 25 : } 26 5879896 : return candidate; 27 : } 28 : 29 : /******************************************************************************* 30 : * \brief Internal routine for finding a prime greater equal than given number. 31 : * \author Ole Schuett 32 : ******************************************************************************/ 33 5879896 : static int next_prime(const int start) { 34 5879896 : int candidate = start, divisor = 0; 35 23466949 : while (divisor < candidate) { 36 361495243 : for (divisor = 2; divisor < candidate; divisor++) { 37 355615347 : if (candidate % divisor == 0) { 38 11707157 : candidate++; 39 11707157 : break; 40 : } 41 : } 42 : } 43 5879896 : return candidate; 44 : } 45 : 46 : /******************************************************************************* 47 : * \brief Internal routine for initializing a shard's hashtable. 48 : * \author Ole Schuett 49 : ******************************************************************************/ 50 5879896 : static void hashtable_init(dbm_shard_t *shard) { 51 : // Choosing size as power of two allows to replace modulo with bitwise AND. 52 11759792 : shard->hashtable_size = 53 5879896 : next_power2(HASHTABLE_FACTOR * shard->nblocks_allocated); 54 5879896 : shard->hashtable_mask = shard->hashtable_size - 1; 55 5879896 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 5879896 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 5879896 : assert(shard->hashtable != NULL); 58 5879896 : } 59 : 60 : /******************************************************************************* 61 : * \brief Internal routine for initializing a shard. 62 : * \author Ole Schuett 63 : ******************************************************************************/ 64 1645937 : void dbm_shard_init(dbm_shard_t *shard) { 65 1645937 : shard->nblocks = 0; 66 1645937 : shard->nblocks_allocated = 0; 67 1645937 : shard->blocks = NULL; 68 1645937 : hashtable_init(shard); 69 1645937 : shard->data_size = 0; 70 1645937 : shard->data_promised = 0; 71 1645937 : shard->data_allocated = 0; 72 1645937 : shard->data = NULL; 73 1645937 : omp_init_lock(&shard->lock); 74 1645937 : } 75 : 76 : /******************************************************************************* 77 : * \brief Internal routine for copying content of shard_b into shard_a. 78 : * \author Ole Schuett 79 : ******************************************************************************/ 80 408576 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 81 408576 : assert(shard_a != NULL && shard_b != NULL); 82 : 83 408576 : if (shard_a->nblocks_allocated < shard_b->nblocks) { 84 381583 : free(shard_a->blocks); 85 381583 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t)); 86 381583 : shard_a->nblocks_allocated = shard_b->nblocks; 87 381583 : assert(shard_a->blocks != NULL); 88 : } 89 408576 : shard_a->nblocks = shard_b->nblocks; 90 : 91 408576 : if (shard_a->hashtable_size < shard_b->hashtable_size) { 92 384298 : free(shard_a->hashtable); 93 384298 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 94 384298 : assert(shard_a->hashtable != NULL); 95 : } 96 408576 : shard_a->hashtable_size = shard_b->hashtable_size; 97 408576 : shard_a->hashtable_mask = shard_b->hashtable_mask; 98 408576 : shard_a->hashtable_prime = shard_b->hashtable_prime; 99 : 100 408576 : if (shard_a->data_allocated < shard_b->data_size) { 101 381583 : free(shard_a->data); 102 381583 : shard_a->data = malloc(shard_b->data_size * sizeof(double)); 103 381583 : shard_a->data_allocated = shard_b->data_size; 104 381583 : assert(shard_a->data != NULL); 105 : } 106 408576 : shard_a->data_size = shard_b->data_size; 107 : 108 408576 : if (shard_a->blocks != NULL) { 109 381613 : memcpy(shard_a->blocks, shard_b->blocks, 110 381613 : shard_b->nblocks * sizeof(dbm_block_t)); 111 : } 112 408576 : if (shard_a->hashtable != NULL) { 113 408576 : memcpy(shard_a->hashtable, shard_b->hashtable, 114 408576 : shard_b->hashtable_size * sizeof(int)); 115 : } 116 408576 : if (shard_a->data != NULL) { 117 381613 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 118 : } 119 408576 : } 120 : 121 : /******************************************************************************* 122 : * \brief Internal routine for releasing a shard. 123 : * \author Ole Schuett 124 : ******************************************************************************/ 125 1645937 : void dbm_shard_release(dbm_shard_t *shard) { 126 1645937 : free(shard->blocks); 127 1645937 : free(shard->hashtable); 128 1645937 : free(shard->data); 129 1645937 : omp_destroy_lock(&shard->lock); 130 1645937 : } 131 : 132 : /******************************************************************************* 133 : * \brief Private hash function based on Cantor pairing function. 134 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 135 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 136 : * Using unsigned int to return a positive number even after overflow. 137 : * \author Ole Schuett 138 : ******************************************************************************/ 139 207939612 : static inline unsigned int hash(const unsigned int row, 140 : const unsigned int col) { 141 207939612 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 142 : } 143 : 144 : /******************************************************************************* 145 : * \brief Private routine for inserting a block into a shard's hashtable. 146 : * \author Ole Schuett 147 : ******************************************************************************/ 148 97225041 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 149 97225041 : assert(0 <= block_idx && block_idx < shard->nblocks); 150 97225041 : const dbm_block_t *blk = &shard->blocks[block_idx]; 151 97225041 : const int row = blk->row, col = blk->col; 152 97225041 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 153 107630889 : while (true) { 154 102427965 : if (shard->hashtable[slot] == 0) { 155 97225041 : shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty 156 97225041 : return; 157 : } 158 : // linear probing 159 5202924 : slot = (slot + 1) & shard->hashtable_mask; 160 : } 161 : } 162 : 163 : /******************************************************************************* 164 : * \brief Internal routine for looking up a block from a shard. 165 : * \author Ole Schuett 166 : ******************************************************************************/ 167 110714571 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 168 : const int col) { 169 110714571 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 170 123033871 : while (true) { 171 116874221 : const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty. 172 116874221 : if (block_idx < 0) { 173 : return NULL; // block not found 174 : } 175 74161897 : assert(0 <= block_idx && block_idx < shard->nblocks); 176 74161897 : dbm_block_t *blk = &shard->blocks[block_idx]; 177 74161897 : if (blk->row == row && blk->col == col) { 178 : return blk; 179 : } 180 : // linear probing 181 6159650 : slot = (slot + 1) & shard->hashtable_mask; 182 : } 183 : } 184 : 185 : /******************************************************************************* 186 : * \brief Internal routine for allocating the metadata of a new block. 187 : * \author Ole Schuett 188 : ******************************************************************************/ 189 50491303 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 190 : const int col, const int block_size) { 191 : // Grow blocks array if necessary. 192 50491303 : if (shard->nblocks_allocated < shard->nblocks + 1) { 193 4233959 : shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1); 194 4233959 : shard->blocks = 195 4233959 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 196 4233959 : assert(shard->blocks != NULL); 197 : 198 : // rebuild hashtable 199 4233959 : free(shard->hashtable); 200 4233959 : hashtable_init(shard); 201 50967697 : for (int i = 0; i < shard->nblocks; i++) { 202 46733738 : hashtable_insert(shard, i); 203 : } 204 : } 205 : 206 50491303 : const int new_block_idx = shard->nblocks; 207 50491303 : shard->nblocks++; 208 50491303 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 209 50491303 : new_block->row = row; 210 50491303 : new_block->col = col; 211 50491303 : new_block->offset = shard->data_promised; 212 50491303 : shard->data_promised += block_size; 213 : // The data_size will be increase after the memory is allocated and zeroed. 214 50491303 : hashtable_insert(shard, new_block_idx); 215 50491303 : return new_block; 216 : } 217 : 218 : /******************************************************************************* 219 : * \brief Internal routine for allocating and zeroing any promised block's data. 220 : * \author Ole Schuett 221 : ******************************************************************************/ 222 12726731 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 223 : 224 : // Reallocate data array if necessary. 225 12726731 : if (shard->data_promised > shard->data_allocated) { 226 1400511 : shard->data_allocated = ALLOCATION_FACTOR * shard->data_promised; 227 1400511 : shard->data = realloc(shard->data, shard->data_allocated * sizeof(double)); 228 1400511 : assert(shard->data != NULL); 229 : } 230 : 231 : // Zero new blocks. 232 : // The following memset is usually the first touch of the memory, which leads 233 : // to frequent page faults. The executing thread determines the NUMA location 234 12726731 : if (shard->data_promised > shard->data_size) { 235 12428181 : const int tail = shard->data_promised - shard->data_size; 236 12428181 : memset(&shard->data[shard->data_size], 0, tail * sizeof(double)); 237 12428181 : shard->data_size = shard->data_promised; 238 : } 239 12726731 : } 240 : 241 : /******************************************************************************* 242 : * \brief Internal routine for getting block or promising a new one. 243 : * \author Ole Schuett 244 : ******************************************************************************/ 245 27702018 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 246 : const int col, 247 : const int block_size) { 248 27702018 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 249 27702018 : if (existing_blk != NULL) { 250 : return existing_blk; 251 : } else { 252 25847896 : return dbm_shard_promise_new_block(shard, row, col, block_size); 253 : } 254 : } 255 : 256 : /******************************************************************************* 257 : * \brief Internal routine for getting block or allocating a new one. 258 : * \author Ole Schuett 259 : ******************************************************************************/ 260 38736975 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 261 : const int col, 262 : const int block_size) { 263 38736975 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 264 38736975 : if (existing_blk != NULL) { 265 : return existing_blk; 266 : } 267 : 268 : // Create a new block. 269 11193107 : dbm_block_t *new_blk = 270 11193107 : dbm_shard_promise_new_block(shard, row, col, block_size); 271 11193107 : dbm_shard_allocate_promised_blocks(shard); 272 : 273 11193107 : return new_blk; 274 : } 275 : 276 : // EOF