LCOV - code coverage report
Current view: top level - src/dbm - dbm_shard.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b4bd748) Lines: 129 129 100.0 %
Date: 2025-03-09 07:56:22 Functions: 12 12 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : #include <assert.h>
       8             : #include <omp.h>
       9             : #include <stdbool.h>
      10             : #include <stddef.h>
      11             : #include <stdlib.h>
      12             : #include <string.h>
      13             : 
      14             : #include "dbm_hyperparams.h"
      15             : #include "dbm_shard.h"
      16             : 
      17             : /*******************************************************************************
      18             :  * \brief Internal routine for finding a power of two greater than given number.
      19             :  * \author Ole Schuett
      20             :  ******************************************************************************/
      21     5986039 : static int next_power2(const int start) {
      22     5986039 :   int candidate = 2;
      23    20968564 :   while (candidate < start) {
      24    14982525 :     candidate *= 2;
      25             :   }
      26     5986039 :   return candidate;
      27             : }
      28             : 
      29             : /*******************************************************************************
      30             :  * \brief Internal routine for finding a prime greater equal than given number.
      31             :  * \author Ole Schuett
      32             :  ******************************************************************************/
      33     5986039 : static int next_prime(const int start) {
      34     5986039 :   int candidate = start, divisor = 0;
      35    23813942 :   while (divisor < candidate) {
      36   363308643 :     for (divisor = 2; divisor < candidate; divisor++) {
      37   357322604 :       if (candidate % divisor == 0) {
      38    11841864 :         candidate++;
      39    11841864 :         break;
      40             :       }
      41             :     }
      42             :   }
      43     5986039 :   return candidate;
      44             : }
      45             : 
      46             : /*******************************************************************************
      47             :  * \brief Internal routine for initializing a shard's hashtable.
      48             :  * \author Ole Schuett
      49             :  ******************************************************************************/
      50     5986039 : static void hashtable_init(dbm_shard_t *shard) {
      51             :   // Choosing size as power of two allows to replace modulo with bitwise AND.
      52    11972078 :   shard->hashtable_size =
      53     5986039 :       next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
      54     5986039 :   shard->hashtable_prime = next_prime(shard->hashtable_size);
      55     5986039 :   shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
      56     5986039 :   assert(shard->hashtable != NULL);
      57     5986039 : }
      58             : 
      59             : /*******************************************************************************
      60             :  * \brief Internal routine for initializing a shard.
      61             :  * \author Ole Schuett
      62             :  ******************************************************************************/
      63     1707391 : void dbm_shard_init(dbm_shard_t *shard) {
      64     1707391 :   shard->nblocks = 0;
      65     1707391 :   shard->nblocks_allocated = 0;
      66     1707391 :   shard->blocks = NULL;
      67     1707391 :   hashtable_init(shard);
      68     1707391 :   shard->data_size = 0;
      69     1707391 :   shard->data_promised = 0;
      70     1707391 :   shard->data_allocated = 0;
      71     1707391 :   shard->data = NULL;
      72     1707391 :   omp_init_lock(&shard->lock);
      73     1707391 : }
      74             : 
      75             : /*******************************************************************************
      76             :  * \brief Internal routine for copying content of shard_b into shard_a.
      77             :  * \author Ole Schuett
      78             :  ******************************************************************************/
      79      428240 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
      80      428240 :   assert(shard_a != NULL && shard_b != NULL);
      81             : 
      82      428240 :   if (shard_a->nblocks_allocated < shard_b->nblocks) {
      83      401937 :     free(shard_a->blocks);
      84      401937 :     shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
      85      401937 :     shard_a->nblocks_allocated = shard_b->nblocks;
      86      401937 :     assert(shard_a->blocks != NULL);
      87             :   }
      88      428240 :   shard_a->nblocks = shard_b->nblocks;
      89             : 
      90      428240 :   if (shard_a->hashtable_size < shard_b->hashtable_size) {
      91      404652 :     free(shard_a->hashtable);
      92      404652 :     shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
      93      404652 :     assert(shard_a->hashtable != NULL);
      94             :   }
      95      428240 :   shard_a->hashtable_size = shard_b->hashtable_size;
      96      428240 :   shard_a->hashtable_prime = shard_b->hashtable_prime;
      97             : 
      98      428240 :   if (shard_a->data_allocated < shard_b->data_size) {
      99      401937 :     free(shard_a->data);
     100      401937 :     shard_a->data = malloc(shard_b->data_size * sizeof(double));
     101      401937 :     shard_a->data_allocated = shard_b->data_size;
     102      401937 :     assert(shard_a->data != NULL);
     103             :   }
     104      428240 :   shard_a->data_size = shard_b->data_size;
     105             : 
     106      428240 :   if (shard_a->blocks != NULL) {
     107      401967 :     memcpy(shard_a->blocks, shard_b->blocks,
     108      401967 :            shard_b->nblocks * sizeof(dbm_block_t));
     109             :   }
     110      428240 :   if (shard_a->hashtable != NULL) {
     111      428240 :     memcpy(shard_a->hashtable, shard_b->hashtable,
     112      428240 :            shard_b->hashtable_size * sizeof(int));
     113             :   }
     114      428240 :   if (shard_a->data != NULL) {
     115      401967 :     memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
     116             :   }
     117      428240 : }
     118             : 
     119             : /*******************************************************************************
     120             :  * \brief Internal routine for releasing a shard.
     121             :  * \author Ole Schuett
     122             :  ******************************************************************************/
     123     1707391 : void dbm_shard_release(dbm_shard_t *shard) {
     124     1707391 :   free(shard->blocks);
     125     1707391 :   free(shard->hashtable);
     126     1707391 :   free(shard->data);
     127     1707391 :   omp_destroy_lock(&shard->lock);
     128     1707391 : }
     129             : 
     130             : /*******************************************************************************
     131             :  * \brief Private hash function based on Cantor pairing function.
     132             :  *        https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
     133             :  *        Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
     134             :  *        Using unsigned int to return a positive number even after overflow.
     135             :  * \author Ole Schuett
     136             :  ******************************************************************************/
     137   208800905 : static inline unsigned int hash(const unsigned int row,
     138             :                                 const unsigned int col) {
     139   208800905 :   return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
     140             : }
     141             : 
     142             : /*******************************************************************************
     143             :  * \brief Internal routine for masking a slot in the hash-table.
     144             :  * \author Hans Pabst
     145             :  ******************************************************************************/
     146   208800905 : static inline int hashtable_mask(const dbm_shard_t *shard) {
     147   208800905 :   return shard->hashtable_size - 1;
     148             : }
     149             : 
     150             : /*******************************************************************************
     151             :  * \brief Private routine for inserting a block into a shard's hashtable.
     152             :  * \author Ole Schuett
     153             :  ******************************************************************************/
     154    97710431 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
     155    97710431 :   assert(0 <= block_idx && block_idx < shard->nblocks);
     156    97710431 :   const dbm_block_t *blk = &shard->blocks[block_idx];
     157    97710431 :   const unsigned int h = hash(blk->row, blk->col);
     158    97710431 :   int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
     159    97759511 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     160   102952734 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     161   102928194 :       if (shard->hashtable[i] == 0) {        // 0 means empty
     162    97710431 :         shard->hashtable[i] = block_idx + 1; // 1-based
     163    97710431 :         return;
     164             :       }
     165             :     }
     166             :   }
     167             : }
     168             : 
     169             : /*******************************************************************************
     170             :  * \brief Internal routine for looking up a block from a shard.
     171             :  * \author Ole Schuett
     172             :  ******************************************************************************/
     173   111090474 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
     174             :                               const int col) {
     175   111090474 :   int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
     176       70737 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     177   117337324 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     178   117266587 :       const int block_idx = shard->hashtable[i];
     179   117266587 :       if (block_idx == 0) { // 1-based, 0 means empty
     180             :         return NULL;        // block not found
     181             :       }
     182    74369988 :       assert(0 < block_idx && block_idx <= shard->nblocks);
     183    74369988 :       dbm_block_t *blk = &shard->blocks[block_idx - 1];
     184    74369988 :       if (blk->row == row && blk->col == col) {
     185             :         return blk;
     186             :       }
     187             :     }
     188             :   }
     189             : }
     190             : 
     191             : /*******************************************************************************
     192             :  * \brief Internal routine for allocating the metadata of a new block.
     193             :  * \author Ole Schuett
     194             :  ******************************************************************************/
     195    50745646 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
     196             :                                          const int col, const int block_size) {
     197             :   // Grow blocks array if necessary.
     198    50745646 :   if (shard->nblocks_allocated < shard->nblocks + 1) {
     199     4278648 :     shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
     200     4278648 :     shard->blocks =
     201     4278648 :         realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
     202     4278648 :     assert(shard->blocks != NULL);
     203             : 
     204             :     // rebuild hashtable
     205     4278648 :     free(shard->hashtable);
     206     4278648 :     hashtable_init(shard);
     207    51243433 :     for (int i = 0; i < shard->nblocks; i++) {
     208    46964785 :       hashtable_insert(shard, i);
     209             :     }
     210             :   }
     211             : 
     212    50745646 :   const int new_block_idx = shard->nblocks;
     213    50745646 :   shard->nblocks++;
     214    50745646 :   dbm_block_t *new_block = &shard->blocks[new_block_idx];
     215    50745646 :   new_block->row = row;
     216    50745646 :   new_block->col = col;
     217    50745646 :   new_block->offset = shard->data_promised;
     218    50745646 :   shard->data_promised += block_size;
     219             :   // The data_size will be increased after the memory is allocated and zeroed.
     220    50745646 :   hashtable_insert(shard, new_block_idx);
     221    50745646 :   return new_block;
     222             : }
     223             : 
     224             : /*******************************************************************************
     225             :  * \brief Internal routine for allocating and zeroing any promised block's data.
     226             :  * \author Ole Schuett
     227             :  ******************************************************************************/
     228    12738633 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
     229             : 
     230             :   // Reallocate data array if necessary.
     231    12738633 :   if (shard->data_promised > shard->data_allocated) {
     232     1397893 :     shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
     233     1397893 :     shard->data = realloc(shard->data, shard->data_allocated * sizeof(double));
     234     1397893 :     assert(shard->data != NULL);
     235             :   }
     236             : 
     237             :   // Zero new blocks.
     238             :   // The following memset is usually the first touch of the memory, which leads
     239             :   // to frequent page faults. The executing thread determines the NUMA location
     240    12738633 :   if (shard->data_promised > shard->data_size) {
     241    12421959 :     const int tail = shard->data_promised - shard->data_size;
     242    12421959 :     memset(&shard->data[shard->data_size], 0, tail * sizeof(double));
     243    12421959 :     shard->data_size = shard->data_promised;
     244             :   }
     245    12738633 : }
     246             : 
     247             : /*******************************************************************************
     248             :  * \brief Internal routine for getting block or promising a new one.
     249             :  * \author Ole Schuett
     250             :  ******************************************************************************/
     251    27905596 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
     252             :                                             const int col,
     253             :                                             const int block_size) {
     254    27905596 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     255    27905596 :   if (existing_blk != NULL) {
     256             :     return existing_blk;
     257             :   } else {
     258    26073627 :     return dbm_shard_promise_new_block(shard, row, col, block_size);
     259             :   }
     260             : }
     261             : 
     262             : /*******************************************************************************
     263             :  * \brief Internal routine for getting block or allocating a new one.
     264             :  * \author Ole Schuett
     265             :  ******************************************************************************/
     266    38933982 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
     267             :                                              const int col,
     268             :                                              const int block_size) {
     269    38933982 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     270    38933982 :   if (existing_blk != NULL) {
     271             :     return existing_blk;
     272             :   }
     273             : 
     274             :   // Create a new block.
     275    11169329 :   dbm_block_t *new_blk =
     276    11169329 :       dbm_shard_promise_new_block(shard, row, col, block_size);
     277    11169329 :   dbm_shard_allocate_promised_blocks(shard);
     278             : 
     279    11169329 :   return new_blk;
     280             : }
     281             : 
     282             : // EOF

Generated by: LCOV version 1.15