LCOV - code coverage report
Current view: top level - src/dbm - dbm_shard.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4c33f95) Lines: 129 129 100.0 %
Date: 2025-01-30 06:53:08 Functions: 12 12 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : #include <assert.h>
       8             : #include <omp.h>
       9             : #include <stdbool.h>
      10             : #include <stddef.h>
      11             : #include <stdlib.h>
      12             : #include <string.h>
      13             : 
      14             : #include "dbm_hyperparams.h"
      15             : #include "dbm_shard.h"
      16             : 
      17             : /*******************************************************************************
      18             :  * \brief Internal routine for finding a power of two greater than given number.
      19             :  * \author Ole Schuett
      20             :  ******************************************************************************/
      21     5879896 : static int next_power2(const int start) {
      22     5879896 :   int candidate = 2;
      23    20724919 :   while (candidate < start) {
      24    14845023 :     candidate *= 2;
      25             :   }
      26     5879896 :   return candidate;
      27             : }
      28             : 
      29             : /*******************************************************************************
      30             :  * \brief Internal routine for finding a prime greater equal than given number.
      31             :  * \author Ole Schuett
      32             :  ******************************************************************************/
      33     5879896 : static int next_prime(const int start) {
      34     5879896 :   int candidate = start, divisor = 0;
      35    23466949 :   while (divisor < candidate) {
      36   361495243 :     for (divisor = 2; divisor < candidate; divisor++) {
      37   355615347 :       if (candidate % divisor == 0) {
      38    11707157 :         candidate++;
      39    11707157 :         break;
      40             :       }
      41             :     }
      42             :   }
      43     5879896 :   return candidate;
      44             : }
      45             : 
      46             : /*******************************************************************************
      47             :  * \brief Internal routine for initializing a shard's hashtable.
      48             :  * \author Ole Schuett
      49             :  ******************************************************************************/
      50     5879896 : static void hashtable_init(dbm_shard_t *shard) {
      51             :   // Choosing size as power of two allows to replace modulo with bitwise AND.
      52    11759792 :   shard->hashtable_size =
      53     5879896 :       next_power2(HASHTABLE_FACTOR * shard->nblocks_allocated);
      54     5879896 :   shard->hashtable_mask = shard->hashtable_size - 1;
      55     5879896 :   shard->hashtable_prime = next_prime(shard->hashtable_size);
      56     5879896 :   shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
      57     5879896 :   assert(shard->hashtable != NULL);
      58     5879896 : }
      59             : 
      60             : /*******************************************************************************
      61             :  * \brief Internal routine for initializing a shard.
      62             :  * \author Ole Schuett
      63             :  ******************************************************************************/
      64     1645937 : void dbm_shard_init(dbm_shard_t *shard) {
      65     1645937 :   shard->nblocks = 0;
      66     1645937 :   shard->nblocks_allocated = 0;
      67     1645937 :   shard->blocks = NULL;
      68     1645937 :   hashtable_init(shard);
      69     1645937 :   shard->data_size = 0;
      70     1645937 :   shard->data_promised = 0;
      71     1645937 :   shard->data_allocated = 0;
      72     1645937 :   shard->data = NULL;
      73     1645937 :   omp_init_lock(&shard->lock);
      74     1645937 : }
      75             : 
      76             : /*******************************************************************************
      77             :  * \brief Internal routine for copying content of shard_b into shard_a.
      78             :  * \author Ole Schuett
      79             :  ******************************************************************************/
      80      408576 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
      81      408576 :   assert(shard_a != NULL && shard_b != NULL);
      82             : 
      83      408576 :   if (shard_a->nblocks_allocated < shard_b->nblocks) {
      84      381583 :     free(shard_a->blocks);
      85      381583 :     shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
      86      381583 :     shard_a->nblocks_allocated = shard_b->nblocks;
      87      381583 :     assert(shard_a->blocks != NULL);
      88             :   }
      89      408576 :   shard_a->nblocks = shard_b->nblocks;
      90             : 
      91      408576 :   if (shard_a->hashtable_size < shard_b->hashtable_size) {
      92      384298 :     free(shard_a->hashtable);
      93      384298 :     shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
      94      384298 :     assert(shard_a->hashtable != NULL);
      95             :   }
      96      408576 :   shard_a->hashtable_size = shard_b->hashtable_size;
      97      408576 :   shard_a->hashtable_mask = shard_b->hashtable_mask;
      98      408576 :   shard_a->hashtable_prime = shard_b->hashtable_prime;
      99             : 
     100      408576 :   if (shard_a->data_allocated < shard_b->data_size) {
     101      381583 :     free(shard_a->data);
     102      381583 :     shard_a->data = malloc(shard_b->data_size * sizeof(double));
     103      381583 :     shard_a->data_allocated = shard_b->data_size;
     104      381583 :     assert(shard_a->data != NULL);
     105             :   }
     106      408576 :   shard_a->data_size = shard_b->data_size;
     107             : 
     108      408576 :   if (shard_a->blocks != NULL) {
     109      381613 :     memcpy(shard_a->blocks, shard_b->blocks,
     110      381613 :            shard_b->nblocks * sizeof(dbm_block_t));
     111             :   }
     112      408576 :   if (shard_a->hashtable != NULL) {
     113      408576 :     memcpy(shard_a->hashtable, shard_b->hashtable,
     114      408576 :            shard_b->hashtable_size * sizeof(int));
     115             :   }
     116      408576 :   if (shard_a->data != NULL) {
     117      381613 :     memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
     118             :   }
     119      408576 : }
     120             : 
     121             : /*******************************************************************************
     122             :  * \brief Internal routine for releasing a shard.
     123             :  * \author Ole Schuett
     124             :  ******************************************************************************/
     125     1645937 : void dbm_shard_release(dbm_shard_t *shard) {
     126     1645937 :   free(shard->blocks);
     127     1645937 :   free(shard->hashtable);
     128     1645937 :   free(shard->data);
     129     1645937 :   omp_destroy_lock(&shard->lock);
     130     1645937 : }
     131             : 
     132             : /*******************************************************************************
     133             :  * \brief Private hash function based on Cantor pairing function.
     134             :  *        https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
     135             :  *        Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
     136             :  *        Using unsigned int to return a positive number even after overflow.
     137             :  * \author Ole Schuett
     138             :  ******************************************************************************/
     139   207939612 : static inline unsigned int hash(const unsigned int row,
     140             :                                 const unsigned int col) {
     141   207939612 :   return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
     142             : }
     143             : 
     144             : /*******************************************************************************
     145             :  * \brief Private routine for inserting a block into a shard's hashtable.
     146             :  * \author Ole Schuett
     147             :  ******************************************************************************/
     148    97225041 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
     149    97225041 :   assert(0 <= block_idx && block_idx < shard->nblocks);
     150    97225041 :   const dbm_block_t *blk = &shard->blocks[block_idx];
     151    97225041 :   const int row = blk->row, col = blk->col;
     152    97225041 :   int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask;
     153   107630889 :   while (true) {
     154   102427965 :     if (shard->hashtable[slot] == 0) {
     155    97225041 :       shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty
     156    97225041 :       return;
     157             :     }
     158             :     // linear probing
     159     5202924 :     slot = (slot + 1) & shard->hashtable_mask;
     160             :   }
     161             : }
     162             : 
     163             : /*******************************************************************************
     164             :  * \brief Internal routine for looking up a block from a shard.
     165             :  * \author Ole Schuett
     166             :  ******************************************************************************/
     167   110714571 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
     168             :                               const int col) {
     169   110714571 :   int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask;
     170   123033871 :   while (true) {
     171   116874221 :     const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty.
     172   116874221 :     if (block_idx < 0) {
     173             :       return NULL; // block not found
     174             :     }
     175    74161897 :     assert(0 <= block_idx && block_idx < shard->nblocks);
     176    74161897 :     dbm_block_t *blk = &shard->blocks[block_idx];
     177    74161897 :     if (blk->row == row && blk->col == col) {
     178             :       return blk;
     179             :     }
     180             :     // linear probing
     181     6159650 :     slot = (slot + 1) & shard->hashtable_mask;
     182             :   }
     183             : }
     184             : 
     185             : /*******************************************************************************
     186             :  * \brief Internal routine for allocating the metadata of a new block.
     187             :  * \author Ole Schuett
     188             :  ******************************************************************************/
     189    50491303 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
     190             :                                          const int col, const int block_size) {
     191             :   // Grow blocks array if necessary.
     192    50491303 :   if (shard->nblocks_allocated < shard->nblocks + 1) {
     193     4233959 :     shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1);
     194     4233959 :     shard->blocks =
     195     4233959 :         realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
     196     4233959 :     assert(shard->blocks != NULL);
     197             : 
     198             :     // rebuild hashtable
     199     4233959 :     free(shard->hashtable);
     200     4233959 :     hashtable_init(shard);
     201    50967697 :     for (int i = 0; i < shard->nblocks; i++) {
     202    46733738 :       hashtable_insert(shard, i);
     203             :     }
     204             :   }
     205             : 
     206    50491303 :   const int new_block_idx = shard->nblocks;
     207    50491303 :   shard->nblocks++;
     208    50491303 :   dbm_block_t *new_block = &shard->blocks[new_block_idx];
     209    50491303 :   new_block->row = row;
     210    50491303 :   new_block->col = col;
     211    50491303 :   new_block->offset = shard->data_promised;
     212    50491303 :   shard->data_promised += block_size;
     213             :   // The data_size will be increase after the memory is allocated and zeroed.
     214    50491303 :   hashtable_insert(shard, new_block_idx);
     215    50491303 :   return new_block;
     216             : }
     217             : 
     218             : /*******************************************************************************
     219             :  * \brief Internal routine for allocating and zeroing any promised block's data.
     220             :  * \author Ole Schuett
     221             :  ******************************************************************************/
     222    12726731 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
     223             : 
     224             :   // Reallocate data array if necessary.
     225    12726731 :   if (shard->data_promised > shard->data_allocated) {
     226     1400511 :     shard->data_allocated = ALLOCATION_FACTOR * shard->data_promised;
     227     1400511 :     shard->data = realloc(shard->data, shard->data_allocated * sizeof(double));
     228     1400511 :     assert(shard->data != NULL);
     229             :   }
     230             : 
     231             :   // Zero new blocks.
     232             :   // The following memset is usually the first touch of the memory, which leads
     233             :   // to frequent page faults. The executing thread determines the NUMA location
     234    12726731 :   if (shard->data_promised > shard->data_size) {
     235    12428181 :     const int tail = shard->data_promised - shard->data_size;
     236    12428181 :     memset(&shard->data[shard->data_size], 0, tail * sizeof(double));
     237    12428181 :     shard->data_size = shard->data_promised;
     238             :   }
     239    12726731 : }
     240             : 
     241             : /*******************************************************************************
     242             :  * \brief Internal routine for getting block or promising a new one.
     243             :  * \author Ole Schuett
     244             :  ******************************************************************************/
     245    27702018 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
     246             :                                             const int col,
     247             :                                             const int block_size) {
     248    27702018 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     249    27702018 :   if (existing_blk != NULL) {
     250             :     return existing_blk;
     251             :   } else {
     252    25847896 :     return dbm_shard_promise_new_block(shard, row, col, block_size);
     253             :   }
     254             : }
     255             : 
     256             : /*******************************************************************************
     257             :  * \brief Internal routine for getting block or allocating a new one.
     258             :  * \author Ole Schuett
     259             :  ******************************************************************************/
     260    38736975 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
     261             :                                              const int col,
     262             :                                              const int block_size) {
     263    38736975 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     264    38736975 :   if (existing_blk != NULL) {
     265             :     return existing_blk;
     266             :   }
     267             : 
     268             :   // Create a new block.
     269    11193107 :   dbm_block_t *new_blk =
     270    11193107 :       dbm_shard_promise_new_block(shard, row, col, block_size);
     271    11193107 :   dbm_shard_allocate_promised_blocks(shard);
     272             : 
     273    11193107 :   return new_blk;
     274             : }
     275             : 
     276             : // EOF

Generated by: LCOV version 1.15