Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_shard.h" 16 : 17 : /******************************************************************************* 18 : * \brief Internal routine for finding a power of two greater than given number. 19 : * \author Ole Schuett 20 : ******************************************************************************/ 21 1226751 : static int next_power2(const int start) { 22 1226751 : int candidate = 2; 23 11135984 : while (candidate < start) { 24 9909233 : candidate *= 2; 25 : } 26 1226751 : return candidate; 27 : } 28 : 29 : /******************************************************************************* 30 : * \brief Internal routine for finding a prime greater equal than given number. 31 : * \author Ole Schuett 32 : ******************************************************************************/ 33 1226751 : static int next_prime(const int start) { 34 1226751 : int candidate = start, divisor = 0; 35 13341397 : while (divisor < candidate) { 36 767944245 : for (divisor = 2; divisor < candidate; divisor++) { 37 766717494 : if (candidate % divisor == 0) { 38 10887895 : candidate++; 39 10887895 : break; 40 : } 41 : } 42 : } 43 1226751 : return candidate; 44 : } 45 : 46 : /******************************************************************************* 47 : * \brief Internal routine for initializing a shard's hashtable. 48 : * \author Ole Schuett 49 : ******************************************************************************/ 50 1226751 : static void hashtable_init(dbm_shard_t *shard) { 51 : // Choosing size as power of two allows to replace modulo with bitwise AND. 52 2453502 : shard->hashtable_size = 53 1226751 : next_power2(HASHTABLE_FACTOR * shard->nblocks_allocated); 54 1226751 : shard->hashtable_mask = shard->hashtable_size - 1; 55 1226751 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 1226751 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 1226751 : } 58 : 59 : /******************************************************************************* 60 : * \brief Internal routine for initializing a shard. 61 : * \author Ole Schuett 62 : ******************************************************************************/ 63 1138271 : void dbm_shard_init(dbm_shard_t *shard) { 64 1138271 : shard->nblocks = 0; 65 1138271 : shard->nblocks_allocated = INITIAL_NBLOCKS_ALLOCATED; 66 1138271 : shard->blocks = malloc(shard->nblocks_allocated * sizeof(dbm_block_t)); 67 1138271 : hashtable_init(shard); 68 1138271 : shard->data_size = 0; 69 1138271 : shard->data_promised = 0; 70 1138271 : shard->data_allocated = INITIAL_DATA_ALLOCATED; 71 1138271 : shard->data = malloc(shard->data_allocated * sizeof(double)); 72 : 73 1138271 : omp_init_lock(&shard->lock); 74 1138271 : } 75 : 76 : /******************************************************************************* 77 : * \brief Internal routine for copying content of shard_b into shard_a. 78 : * \author Ole Schuett 79 : ******************************************************************************/ 80 265848 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 81 265848 : free(shard_a->blocks); 82 265848 : shard_a->nblocks = shard_b->nblocks; 83 265848 : shard_a->nblocks_allocated = shard_b->nblocks_allocated; 84 265848 : shard_a->blocks = malloc(shard_b->nblocks_allocated * sizeof(dbm_block_t)); 85 265848 : memcpy(shard_a->blocks, shard_b->blocks, 86 265848 : shard_b->nblocks * sizeof(dbm_block_t)); 87 : 88 265848 : free(shard_a->hashtable); 89 265848 : shard_a->hashtable_size = shard_b->hashtable_size; 90 265848 : shard_a->hashtable_mask = shard_b->hashtable_mask; 91 265848 : shard_a->hashtable_prime = shard_b->hashtable_prime; 92 265848 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 93 265848 : memcpy(shard_a->hashtable, shard_b->hashtable, 94 265848 : shard_b->hashtable_size * sizeof(int)); 95 : 96 265848 : free(shard_a->data); 97 265848 : shard_a->data_allocated = shard_b->data_allocated; 98 265848 : shard_a->data = malloc(shard_b->data_allocated * sizeof(double)); 99 265848 : shard_a->data_size = shard_b->data_size; 100 265848 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 101 265848 : } 102 : 103 : /******************************************************************************* 104 : * \brief Internal routine for releasing a shard. 105 : * \author Ole Schuett 106 : ******************************************************************************/ 107 1138271 : void dbm_shard_release(dbm_shard_t *shard) { 108 1138271 : free(shard->blocks); 109 1138271 : free(shard->hashtable); 110 1138271 : free(shard->data); 111 1138271 : omp_destroy_lock(&shard->lock); 112 1138271 : } 113 : 114 : /******************************************************************************* 115 : * \brief Private hash function based on Cantor pairing function. 116 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 117 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 118 : * Using unsigned int to return a positive number even after overflow. 119 : * \author Ole Schuett 120 : ******************************************************************************/ 121 182648617 : static inline unsigned int hash(const unsigned int row, 122 : const unsigned int col) { 123 182648617 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 124 : } 125 : 126 : /******************************************************************************* 127 : * \brief Private routine for inserting a block into a shard's hashtable. 128 : * \author Ole Schuett 129 : ******************************************************************************/ 130 72319673 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 131 72319673 : assert(0 <= block_idx && block_idx < shard->nblocks); 132 72319673 : const dbm_block_t *blk = &shard->blocks[block_idx]; 133 72319673 : const int row = blk->row, col = blk->col; 134 72319673 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 135 79336471 : while (true) { 136 75828072 : if (shard->hashtable[slot] == 0) { 137 72319673 : shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty 138 72319673 : return; 139 : } 140 : // linear probing 141 3508399 : slot = (slot + 1) & shard->hashtable_mask; 142 : } 143 : } 144 : 145 : /******************************************************************************* 146 : * \brief Internal routine for looking up a block from a shard. 147 : * \author Ole Schuett 148 : ******************************************************************************/ 149 110328944 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 150 : const int col) { 151 110328944 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 152 118811042 : while (true) { 153 114569993 : const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty. 154 114569993 : if (block_idx < 0) { 155 : return NULL; // block not found 156 : } 157 72248243 : assert(0 <= block_idx && block_idx < shard->nblocks); 158 72248243 : dbm_block_t *blk = &shard->blocks[block_idx]; 159 72248243 : if (blk->row == row && blk->col == col) { 160 : return blk; 161 : } 162 : // linear probing 163 4241049 : slot = (slot + 1) & shard->hashtable_mask; 164 : } 165 : } 166 : 167 : /******************************************************************************* 168 : * \brief Internal routine for allocating the metadata of a new block. 169 : * \author Ole Schuett 170 : ******************************************************************************/ 171 50129904 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 172 : const int col, const int block_size) { 173 : // Grow blocks array if necessary. 174 50129904 : if (shard->nblocks_allocated < shard->nblocks + 1) { 175 88480 : shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1); 176 88480 : shard->blocks = (dbm_block_t *)realloc( 177 88480 : shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 178 : 179 : // rebuild hashtable 180 88480 : free(shard->hashtable); 181 88480 : hashtable_init(shard); 182 22278249 : for (int i = 0; i < shard->nblocks; i++) { 183 22189769 : hashtable_insert(shard, i); 184 : } 185 : } 186 : 187 50129904 : const int new_block_idx = shard->nblocks; 188 50129904 : shard->nblocks++; 189 50129904 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 190 50129904 : new_block->row = row; 191 50129904 : new_block->col = col; 192 50129904 : new_block->offset = shard->data_promised; 193 50129904 : shard->data_promised += block_size; 194 : // The data_size will be increase after the memory is allocated and zeroed. 195 50129904 : hashtable_insert(shard, new_block_idx); 196 50129904 : return new_block; 197 : } 198 : 199 : /******************************************************************************* 200 : * \brief Internal routine for allocating and zeroing any promised block's data. 201 : * \author Ole Schuett 202 : ******************************************************************************/ 203 12537614 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 204 : 205 : // Reallocate data array if necessary. 206 12537614 : if (shard->data_promised > shard->data_allocated) { 207 357461 : shard->data_allocated = ALLOCATION_FACTOR * shard->data_promised; 208 357461 : shard->data = 209 357461 : (double *)realloc(shard->data, shard->data_allocated * sizeof(double)); 210 : } 211 : 212 : // Zero new blocks. 213 : // The following memset is usually the first touch of the memory, which leads 214 : // to frequent page faults. The executing thread determines the NUMA location 215 12537614 : if (shard->data_promised > shard->data_size) { 216 12272363 : const int tail = shard->data_promised - shard->data_size; 217 12272363 : memset(&shard->data[shard->data_size], 0, tail * sizeof(double)); 218 12272363 : shard->data_size = shard->data_promised; 219 : } 220 12537614 : } 221 : 222 : /******************************************************************************* 223 : * \brief Internal routine for getting block or promising a new one. 224 : * \author Ole Schuett 225 : ******************************************************************************/ 226 27392646 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 227 : const int col, 228 : const int block_size) { 229 27392646 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 230 27392646 : if (existing_blk != NULL) { 231 : return existing_blk; 232 : } else { 233 25543310 : return dbm_shard_promise_new_block(shard, row, col, block_size); 234 : } 235 : } 236 : 237 : /******************************************************************************* 238 : * \brief Internal routine for getting block or allocating a new one. 239 : * \author Ole Schuett 240 : ******************************************************************************/ 241 38562638 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 242 : const int col, 243 : const int block_size) { 244 38562638 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 245 38562638 : if (existing_blk != NULL) { 246 : return existing_blk; 247 : } 248 : 249 : // Create a new block. 250 11347111 : dbm_block_t *new_blk = 251 11347111 : dbm_shard_promise_new_block(shard, row, col, block_size); 252 11347111 : dbm_shard_allocate_promised_blocks(shard); 253 : 254 11347111 : return new_blk; 255 : } 256 : 257 : // EOF