Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_shard.h" 16 : 17 : /******************************************************************************* 18 : * \brief Internal routine for finding a power of two greater than given number. 19 : * \author Ole Schuett 20 : ******************************************************************************/ 21 1739752 : static int next_power2(const int start) { 22 1739752 : int candidate = 2; 23 15751157 : while (candidate < start) { 24 14011405 : candidate *= 2; 25 : } 26 1739752 : return candidate; 27 : } 28 : 29 : /******************************************************************************* 30 : * \brief Internal routine for finding a prime greater equal than given number. 31 : * \author Ole Schuett 32 : ******************************************************************************/ 33 1739752 : static int next_prime(const int start) { 34 1739752 : int candidate = start, divisor = 0; 35 18987776 : while (divisor < candidate) { 36 1045042710 : for (divisor = 2; divisor < candidate; divisor++) { 37 1043302958 : if (candidate % divisor == 0) { 38 15508272 : candidate++; 39 15508272 : break; 40 : } 41 : } 42 : } 43 1739752 : return candidate; 44 : } 45 : 46 : /******************************************************************************* 47 : * \brief Internal routine for initializing a shard's hashtable. 48 : * \author Ole Schuett 49 : ******************************************************************************/ 50 1739752 : static void hashtable_init(dbm_shard_t *shard) { 51 : // Choosing size as power of two allows to replace modulo with bitwise AND. 52 3479504 : shard->hashtable_size = 53 1739752 : next_power2(HASHTABLE_FACTOR * shard->nblocks_allocated); 54 1739752 : shard->hashtable_mask = shard->hashtable_size - 1; 55 1739752 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 1739752 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 1739752 : assert(shard->hashtable != NULL); 58 1739752 : } 59 : 60 : /******************************************************************************* 61 : * \brief Internal routine for initializing a shard. 62 : * \author Ole Schuett 63 : ******************************************************************************/ 64 1653121 : void dbm_shard_init(dbm_shard_t *shard) { 65 1653121 : shard->nblocks = 0; 66 1653121 : shard->nblocks_allocated = INITIAL_NBLOCKS_ALLOCATED; 67 1653121 : shard->blocks = malloc(shard->nblocks_allocated * sizeof(dbm_block_t)); 68 1653121 : assert(shard->blocks != NULL); 69 1653121 : hashtable_init(shard); 70 1653121 : shard->data_size = 0; 71 1653121 : shard->data_promised = 0; 72 1653121 : shard->data_allocated = INITIAL_DATA_ALLOCATED; 73 1653121 : shard->data = malloc(shard->data_allocated * sizeof(double)); 74 1653121 : assert(shard->data != NULL); 75 : 76 1653121 : omp_init_lock(&shard->lock); 77 1653121 : } 78 : 79 : /******************************************************************************* 80 : * \brief Internal routine for copying content of shard_b into shard_a. 81 : * \author Ole Schuett 82 : ******************************************************************************/ 83 410736 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 84 410736 : free(shard_a->blocks); 85 410736 : shard_a->nblocks = shard_b->nblocks; 86 410736 : shard_a->nblocks_allocated = shard_b->nblocks_allocated; 87 410736 : shard_a->blocks = malloc(shard_b->nblocks_allocated * sizeof(dbm_block_t)); 88 410736 : assert(shard_a->blocks != NULL); 89 410736 : memcpy(shard_a->blocks, shard_b->blocks, 90 410736 : shard_b->nblocks * sizeof(dbm_block_t)); 91 : 92 410736 : free(shard_a->hashtable); 93 410736 : shard_a->hashtable_size = shard_b->hashtable_size; 94 410736 : shard_a->hashtable_mask = shard_b->hashtable_mask; 95 410736 : shard_a->hashtable_prime = shard_b->hashtable_prime; 96 410736 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 97 410736 : assert(shard_a->hashtable != NULL); 98 410736 : memcpy(shard_a->hashtable, shard_b->hashtable, 99 410736 : shard_b->hashtable_size * sizeof(int)); 100 : 101 410736 : free(shard_a->data); 102 410736 : shard_a->data_allocated = shard_b->data_allocated; 103 410736 : shard_a->data = malloc(shard_b->data_allocated * sizeof(double)); 104 410736 : assert(shard_a->data != NULL); 105 410736 : shard_a->data_size = shard_b->data_size; 106 410736 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 107 410736 : } 108 : 109 : /******************************************************************************* 110 : * \brief Internal routine for releasing a shard. 111 : * \author Ole Schuett 112 : ******************************************************************************/ 113 1653121 : void dbm_shard_release(dbm_shard_t *shard) { 114 1653121 : free(shard->blocks); 115 1653121 : free(shard->hashtable); 116 1653121 : free(shard->data); 117 1653121 : omp_destroy_lock(&shard->lock); 118 1653121 : } 119 : 120 : /******************************************************************************* 121 : * \brief Private hash function based on Cantor pairing function. 122 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 123 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 124 : * Using unsigned int to return a positive number even after overflow. 125 : * \author Ole Schuett 126 : ******************************************************************************/ 127 183171260 : static inline unsigned int hash(const unsigned int row, 128 : const unsigned int col) { 129 183171260 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 130 : } 131 : 132 : /******************************************************************************* 133 : * \brief Private routine for inserting a block into a shard's hashtable. 134 : * \author Ole Schuett 135 : ******************************************************************************/ 136 72363664 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 137 72363664 : assert(0 <= block_idx && block_idx < shard->nblocks); 138 72363664 : const dbm_block_t *blk = &shard->blocks[block_idx]; 139 72363664 : const int row = blk->row, col = blk->col; 140 72363664 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 141 79258120 : while (true) { 142 75810892 : if (shard->hashtable[slot] == 0) { 143 72363664 : shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty 144 72363664 : return; 145 : } 146 : // linear probing 147 3447228 : slot = (slot + 1) & shard->hashtable_mask; 148 : } 149 : } 150 : 151 : /******************************************************************************* 152 : * \brief Internal routine for looking up a block from a shard. 153 : * \author Ole Schuett 154 : ******************************************************************************/ 155 110807596 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 156 : const int col) { 157 110807596 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 158 119135448 : while (true) { 159 114971522 : const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty. 160 114971522 : if (block_idx < 0) { 161 : return NULL; // block not found 162 : } 163 72221751 : assert(0 <= block_idx && block_idx < shard->nblocks); 164 72221751 : dbm_block_t *blk = &shard->blocks[block_idx]; 165 72221751 : if (blk->row == row && blk->col == col) { 166 : return blk; 167 : } 168 : // linear probing 169 4163926 : slot = (slot + 1) & shard->hashtable_mask; 170 : } 171 : } 172 : 173 : /******************************************************************************* 174 : * \brief Internal routine for allocating the metadata of a new block. 175 : * \author Ole Schuett 176 : ******************************************************************************/ 177 50537538 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 178 : const int col, const int block_size) { 179 : // Grow blocks array if necessary. 180 50537538 : if (shard->nblocks_allocated < shard->nblocks + 1) { 181 86631 : shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1); 182 86631 : shard->blocks = 183 86631 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 184 86631 : assert(shard->blocks != NULL); 185 : 186 : // rebuild hashtable 187 86631 : free(shard->hashtable); 188 86631 : hashtable_init(shard); 189 21912757 : for (int i = 0; i < shard->nblocks; i++) { 190 21826126 : hashtable_insert(shard, i); 191 : } 192 : } 193 : 194 50537538 : const int new_block_idx = shard->nblocks; 195 50537538 : shard->nblocks++; 196 50537538 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 197 50537538 : new_block->row = row; 198 50537538 : new_block->col = col; 199 50537538 : new_block->offset = shard->data_promised; 200 50537538 : shard->data_promised += block_size; 201 : // The data_size will be increase after the memory is allocated and zeroed. 202 50537538 : hashtable_insert(shard, new_block_idx); 203 50537538 : return new_block; 204 : } 205 : 206 : /******************************************************************************* 207 : * \brief Internal routine for allocating and zeroing any promised block's data. 208 : * \author Ole Schuett 209 : ******************************************************************************/ 210 12739210 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 211 : 212 : // Reallocate data array if necessary. 213 12739210 : if (shard->data_promised > shard->data_allocated) { 214 354254 : shard->data_allocated = ALLOCATION_FACTOR * shard->data_promised; 215 354254 : shard->data = realloc(shard->data, shard->data_allocated * sizeof(double)); 216 354254 : assert(shard->data != NULL); 217 : } 218 : 219 : // Zero new blocks. 220 : // The following memset is usually the first touch of the memory, which leads 221 : // to frequent page faults. The executing thread determines the NUMA location 222 12739210 : if (shard->data_promised > shard->data_size) { 223 12437504 : const int tail = shard->data_promised - shard->data_size; 224 12437504 : memset(&shard->data[shard->data_size], 0, tail * sizeof(double)); 225 12437504 : shard->data_size = shard->data_promised; 226 : } 227 12739210 : } 228 : 229 : /******************************************************************************* 230 : * \brief Internal routine for getting block or promising a new one. 231 : * \author Ole Schuett 232 : ******************************************************************************/ 233 27734424 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 234 : const int col, 235 : const int block_size) { 236 27734424 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 237 27734424 : if (existing_blk != NULL) { 238 : return existing_blk; 239 : } else { 240 25876014 : return dbm_shard_promise_new_block(shard, row, col, block_size); 241 : } 242 : } 243 : 244 : /******************************************************************************* 245 : * \brief Internal routine for getting block or allocating a new one. 246 : * \author Ole Schuett 247 : ******************************************************************************/ 248 38774720 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 249 : const int col, 250 : const int block_size) { 251 38774720 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 252 38774720 : if (existing_blk != NULL) { 253 : return existing_blk; 254 : } 255 : 256 : // Create a new block. 257 11197612 : dbm_block_t *new_blk = 258 11197612 : dbm_shard_promise_new_block(shard, row, col, block_size); 259 11197612 : dbm_shard_allocate_promised_blocks(shard); 260 : 261 11197612 : return new_blk; 262 : } 263 : 264 : // EOF