LCOV - code coverage report
Current view: top level - src/dbm - dbm_matrix.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b4bd748) Lines: 291 293 99.3 %
Date: 2025-03-09 07:56:22 Functions: 29 29 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <omp.h>
      11             : #include <stdbool.h>
      12             : #include <stddef.h>
      13             : #include <stdlib.h>
      14             : #include <string.h>
      15             : 
      16             : #include "dbm_hyperparams.h"
      17             : #include "dbm_matrix.h"
      18             : 
      19             : /*******************************************************************************
      20             :  * \brief Creates a new matrix.
      21             :  * \author Ole Schuett
      22             :  ******************************************************************************/
      23     1707391 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
      24             :                 const char name[], const int nrows, const int ncols,
      25             :                 const int row_sizes[nrows], const int col_sizes[ncols]) {
      26     1707391 :   assert(omp_get_num_threads() == 1);
      27             : 
      28     1707391 :   dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
      29             : 
      30     1707391 :   assert(dist->rows.length == nrows);
      31     1707391 :   assert(dist->cols.length == ncols);
      32     1707391 :   dbm_distribution_hold(dist);
      33     1707391 :   matrix->dist = dist;
      34             : 
      35     1707391 :   size_t size = (strlen(name) + 1) * sizeof(char);
      36     1707391 :   matrix->name = malloc(size);
      37     1707391 :   assert(matrix->name != NULL);
      38     1707391 :   memcpy(matrix->name, name, size);
      39             : 
      40     1707391 :   matrix->nrows = nrows;
      41     1707391 :   matrix->ncols = ncols;
      42             : 
      43     1707391 :   size = nrows * sizeof(int);
      44     1707391 :   matrix->row_sizes = malloc(size);
      45     1707391 :   assert(matrix->row_sizes != NULL);
      46     1707391 :   memcpy(matrix->row_sizes, row_sizes, size);
      47             : 
      48     1707391 :   size = ncols * sizeof(int);
      49     1707391 :   matrix->col_sizes = malloc(size);
      50     1707391 :   assert(matrix->col_sizes != NULL);
      51     1707391 :   memcpy(matrix->col_sizes, col_sizes, size);
      52             : 
      53     1707391 :   matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
      54     1707391 :   assert(matrix->shards != NULL);
      55     1707391 : #pragma omp parallel for
      56             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      57             :     dbm_shard_init(&matrix->shards[ishard]);
      58             :   }
      59             : 
      60     1707391 :   assert(*matrix_out == NULL);
      61     1707391 :   *matrix_out = matrix;
      62     1707391 : }
      63             : 
      64             : /*******************************************************************************
      65             :  * \brief Releases a matrix and all its ressources.
      66             :  * \author Ole Schuett
      67             :  ******************************************************************************/
      68     1707391 : void dbm_release(dbm_matrix_t *matrix) {
      69     1707391 :   assert(omp_get_num_threads() == 1);
      70     3414782 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      71     1707391 :     dbm_shard_release(&matrix->shards[ishard]);
      72             :   }
      73     1707391 :   dbm_distribution_release(matrix->dist);
      74     1707391 :   free(matrix->row_sizes);
      75     1707391 :   free(matrix->col_sizes);
      76     1707391 :   free(matrix->shards);
      77     1707391 :   free(matrix->name);
      78     1707391 :   free(matrix);
      79     1707391 : }
      80             : 
      81             : /*******************************************************************************
      82             :  * \brief Copies content of matrix_b into matrix_a.
      83             :  *        Matrices must have the same row/col block sizes and distribution.
      84             :  * \author Ole Schuett
      85             :  ******************************************************************************/
      86      428240 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
      87      428240 :   assert(omp_get_num_threads() == 1);
      88             : 
      89      428240 :   assert(matrix_b->nrows == matrix_a->nrows);
      90     4991453 :   for (int i = 0; i < matrix_b->nrows; i++) {
      91     4563213 :     assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
      92             :   }
      93      428240 :   assert(matrix_b->ncols == matrix_a->ncols);
      94    33683339 :   for (int i = 0; i < matrix_b->ncols; i++) {
      95    33255099 :     assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
      96             :   }
      97             : 
      98      428240 :   assert(matrix_a->dist == matrix_b->dist);
      99             : 
     100      428240 : #pragma omp parallel for DBM_OMP_SCHEDULE
     101             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
     102             :     dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
     103             :   }
     104      428240 : }
     105             : 
     106             : /*******************************************************************************
     107             :  * \brief Copies content of matrix_b into matrix_a.
     108             :  *        Matrices may have different distributions.
     109             :  * \author Ole Schuett
     110             :  ******************************************************************************/
     111         144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
     112         144 :   assert(omp_get_num_threads() == 1);
     113         144 :   assert(matrix->nrows == redist->nrows);
     114        6384 :   for (int i = 0; i < matrix->nrows; i++) {
     115        6240 :     assert(matrix->row_sizes[i] == redist->row_sizes[i]);
     116             :   }
     117         144 :   assert(matrix->ncols == redist->ncols);
     118        6384 :   for (int i = 0; i < matrix->ncols; i++) {
     119        6240 :     assert(matrix->col_sizes[i] == redist->col_sizes[i]);
     120             :   }
     121             : 
     122         144 :   assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
     123         144 :   const dbm_mpi_comm_t comm = redist->dist->comm;
     124         144 :   const int nranks = dbm_mpi_comm_size(comm);
     125             : 
     126             :   // 1st pass: Compute send_count.
     127         144 :   int send_count[nranks];
     128         144 :   memset(send_count, 0, nranks * sizeof(int));
     129         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     130         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     131        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     132        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     133        8792 :       const int row_size = matrix->row_sizes[blk->row];
     134        8792 :       const int col_size = matrix->col_sizes[blk->col];
     135        8792 :       const int block_size = row_size * col_size;
     136        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     137        8792 :       assert(0 <= rank && rank < nranks);
     138        8792 :       send_count[rank] += 2 + block_size;
     139             :     }
     140             :   }
     141             : 
     142             :   // 1st communication: Exchange counts.
     143         144 :   int recv_count[nranks];
     144         144 :   dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
     145             : 
     146             :   // Compute displacements and allocate data buffers.
     147         144 :   int send_displ[nranks + 1], recv_displ[nranks + 1];
     148         144 :   send_displ[0] = recv_displ[0] = 0;
     149         432 :   for (int irank = 1; irank <= nranks; irank++) {
     150         288 :     send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
     151         288 :     recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
     152             :   }
     153         144 :   const int total_send_count = send_displ[nranks];
     154         144 :   const int total_recv_count = recv_displ[nranks];
     155         144 :   double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
     156         144 :   double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
     157             : 
     158             :   // 2nd pass: Fill send_data.
     159         144 :   int send_data_positions[nranks];
     160         144 :   memcpy(send_data_positions, send_displ, nranks * sizeof(int));
     161         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     162         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     163        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     164        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     165        8792 :       const double *blk_data = &shard->data[blk->offset];
     166        8792 :       const int row_size = matrix->row_sizes[blk->row];
     167        8792 :       const int col_size = matrix->col_sizes[blk->col];
     168        8792 :       const int block_size = row_size * col_size;
     169        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     170        8792 :       const int pos = send_data_positions[rank];
     171        8792 :       data_send[pos + 0] = blk->row; // send integers as doubles
     172        8792 :       data_send[pos + 1] = blk->col;
     173        8792 :       memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
     174        8792 :       send_data_positions[rank] += 2 + block_size;
     175             :     }
     176             :   }
     177         432 :   for (int irank = 0; irank < nranks; irank++) {
     178         288 :     assert(send_data_positions[irank] == send_displ[irank + 1]);
     179             :   }
     180             : 
     181             :   // 2nd communication: Exchange data.
     182         144 :   dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
     183             :                            recv_count, recv_displ, comm);
     184         144 :   dbm_mpi_free_mem(data_send);
     185             : 
     186             :   // 3rd pass: Unpack data.
     187         144 :   dbm_clear(redist);
     188         144 :   int recv_data_pos = 0;
     189        8936 :   while (recv_data_pos < total_recv_count) {
     190        8792 :     const int row = (int)data_recv[recv_data_pos + 0];
     191        8792 :     const int col = (int)data_recv[recv_data_pos + 1];
     192        8792 :     assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
     193        8792 :     assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
     194        8792 :     dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
     195        8792 :     const int row_size = matrix->row_sizes[row];
     196        8792 :     const int col_size = matrix->col_sizes[col];
     197        8792 :     const int block_size = row_size * col_size;
     198        8792 :     recv_data_pos += 2 + block_size;
     199             :   }
     200         144 :   assert(recv_data_pos == total_recv_count);
     201         144 :   dbm_mpi_free_mem(data_recv);
     202         144 : }
     203             : 
     204             : /*******************************************************************************
     205             :  * \brief Looks up a block from given matrics. This routine is thread-safe.
     206             :  *        If the block is not found then a null pointer is returned.
     207             :  * \author Ole Schuett
     208             :  ******************************************************************************/
     209    22475441 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
     210             :                      double **block, int *row_size, int *col_size) {
     211    22475441 :   assert(0 <= row && row < matrix->nrows);
     212    22475441 :   assert(0 <= col && col < matrix->ncols);
     213    22475441 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     214    22475441 :   *row_size = matrix->row_sizes[row];
     215    22475441 :   *col_size = matrix->col_sizes[col];
     216    22475441 :   *block = NULL;
     217             : 
     218    22475441 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     219    22475441 :   const dbm_shard_t *shard = &matrix->shards[ishard];
     220    22475441 :   dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
     221    22475441 :   if (blk != NULL) {
     222    21224246 :     *block = &shard->data[blk->offset];
     223             :   }
     224    22475441 : }
     225             : 
     226             : /*******************************************************************************
     227             :  * \brief Adds a block to given matrix. This routine is thread-safe.
     228             :  *        If block already exist then it gets overwritten (or summed).
     229             :  * \author Ole Schuett
     230             :  ******************************************************************************/
     231    35197196 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
     232             :                    const bool summation, const double *block) {
     233    35197196 :   assert(0 <= row && row < matrix->nrows);
     234    35197196 :   assert(0 <= col && col < matrix->ncols);
     235    35197196 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     236    35197196 :   const int row_size = matrix->row_sizes[row];
     237    35197196 :   const int col_size = matrix->col_sizes[col];
     238    35197196 :   const int block_size = row_size * col_size;
     239             : 
     240    35197196 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     241    35197196 :   dbm_shard_t *shard = &matrix->shards[ishard];
     242    35197196 :   omp_set_lock(&shard->lock);
     243    35197196 :   dbm_block_t *blk =
     244    35197196 :       dbm_shard_get_or_allocate_block(shard, row, col, block_size);
     245    35197196 :   double *blk_data = &shard->data[blk->offset];
     246    35197196 :   if (summation) {
     247  1091563258 :     for (int i = 0; i < block_size; i++) {
     248  1088493651 :       blk_data[i] += block[i];
     249             :     }
     250             :   } else {
     251    32127589 :     memcpy(blk_data, block, block_size * sizeof(double));
     252             :   }
     253    35197196 :   omp_unset_lock(&shard->lock);
     254    35197196 : }
     255             : 
     256             : /*******************************************************************************
     257             :  * \brief Remove all blocks from matrix, but does not release underlying memory.
     258             :  * \author Ole Schuett
     259             :  ******************************************************************************/
     260     2057065 : void dbm_clear(dbm_matrix_t *matrix) {
     261     2057065 :   assert(omp_get_num_threads() == 1);
     262             : 
     263     2057065 : #pragma omp parallel for DBM_OMP_SCHEDULE
     264             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     265             :     dbm_shard_t *shard = &matrix->shards[ishard];
     266             :     shard->nblocks = 0;
     267             :     shard->data_size = 0;
     268             :     shard->data_promised = 0;
     269             :     // Does not deallocate memory, hence data_allocated remains unchanged.
     270             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     271             :   }
     272     2057065 : }
     273             : 
     274             : /*******************************************************************************
     275             :  * \brief Removes all blocks from the matrix whose norm is below the threshold.
     276             :  *        Blocks of size zero are always kept.
     277             :  * \author Ole Schuett
     278             :  ******************************************************************************/
     279      582237 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
     280      582237 :   assert(omp_get_num_threads() == 1);
     281             : 
     282      582237 :   if (eps == 0.0) {
     283             :     return;
     284             :   }
     285      542245 :   const double eps2 = eps * eps;
     286             : 
     287      542245 : #pragma omp parallel for DBM_OMP_SCHEDULE
     288             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     289             :     dbm_shard_t *shard = &matrix->shards[ishard];
     290             :     const int old_nblocks = shard->nblocks;
     291             :     shard->nblocks = 0;
     292             :     shard->data_promised = 0;
     293             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     294             : 
     295             :     for (int iblock = 0; iblock < old_nblocks; iblock++) {
     296             :       const dbm_block_t old_blk = shard->blocks[iblock];
     297             :       const double *old_blk_data = &shard->data[old_blk.offset];
     298             :       const int row_size = matrix->row_sizes[old_blk.row];
     299             :       const int col_size = matrix->col_sizes[old_blk.col];
     300             :       const int block_size = row_size * col_size;
     301             :       double norm = 0.0;
     302             :       for (int i = 0; i < block_size; i++) {
     303             :         const double value = old_blk_data[i];
     304             :         norm += value * value;
     305             :         if (eps2 <= norm) {
     306             :           break;
     307             :         }
     308             :       }
     309             :       // For historic reasons zero-sized blocks are never filtered.
     310             :       if (block_size > 0 && norm < eps2) {
     311             :         continue; // filter the block
     312             :       }
     313             :       // Re-create block.
     314             :       dbm_block_t *new_blk = dbm_shard_promise_new_block(
     315             :           shard, old_blk.row, old_blk.col, block_size);
     316             :       assert(new_blk->offset <= old_blk.offset);
     317             :       if (new_blk->offset != old_blk.offset) {
     318             :         // Using memmove because it can handle overlapping buffers.
     319             :         double *new_blk_data = &shard->data[new_blk->offset];
     320             :         memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
     321             :       }
     322             :     }
     323             :     shard->data_size = shard->data_promised;
     324             :     // TODO: Could call realloc to release excess memory.
     325             :   }
     326             : }
     327             : 
     328             : /*******************************************************************************
     329             :  * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
     330             :  *        This routine must always be called within an OpenMP parallel region.
     331             :  * \author Ole Schuett
     332             :  ******************************************************************************/
     333     1390980 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
     334             :                         const int rows[], const int cols[]) {
     335     1390980 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     336             :          "Please call dbm_reserve_blocks within an OpenMP parallel region.");
     337     1390980 :   const int my_rank = matrix->dist->my_rank;
     338    29296576 :   for (int i = 0; i < nblocks; i++) {
     339    27905596 :     const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
     340    27905596 :     dbm_shard_t *shard = &matrix->shards[ishard];
     341    27905596 :     omp_set_lock(&shard->lock);
     342    27905596 :     assert(0 <= rows[i] && rows[i] < matrix->nrows);
     343    27905596 :     assert(0 <= cols[i] && cols[i] < matrix->ncols);
     344    27905596 :     assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
     345    27905596 :     const int row_size = matrix->row_sizes[rows[i]];
     346    27905596 :     const int col_size = matrix->col_sizes[cols[i]];
     347    27905596 :     const int block_size = row_size * col_size;
     348    27905596 :     dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
     349    27905596 :     omp_unset_lock(&shard->lock);
     350             :   }
     351     1390980 : #pragma omp barrier
     352             : 
     353             : #pragma omp for DBM_OMP_SCHEDULE
     354     1390980 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     355     1390980 :     dbm_shard_t *shard = &matrix->shards[ishard];
     356     1390980 :     dbm_shard_allocate_promised_blocks(shard);
     357             :   }
     358     1390980 : }
     359             : 
     360             : /*******************************************************************************
     361             :  * \brief Multiplies all entries in the given matrix by the given factor alpha.
     362             :  * \author Ole Schuett
     363             :  ******************************************************************************/
     364      474379 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
     365      474379 :   assert(omp_get_num_threads() == 1);
     366      474379 :   if (alpha == 1.0) {
     367             :     return;
     368             :   }
     369      341889 :   if (alpha == 0.0) {
     370      333445 :     dbm_zero(matrix);
     371      333445 :     return;
     372             :   }
     373             : 
     374        8444 : #pragma omp parallel for DBM_OMP_SCHEDULE
     375             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     376             :     dbm_shard_t *shard = &matrix->shards[ishard];
     377             :     for (int i = 0; i < shard->data_size; i++) {
     378             :       shard->data[i] *= alpha;
     379             :     }
     380             :   }
     381             : }
     382             : 
     383             : /*******************************************************************************
     384             :  * \brief Sets all blocks in the given matrix to zero.
     385             :  * \author Ole Schuett
     386             :  ******************************************************************************/
     387      333445 : void dbm_zero(dbm_matrix_t *matrix) {
     388      333445 :   assert(omp_get_num_threads() == 1);
     389             : 
     390      333445 : #pragma omp parallel for DBM_OMP_SCHEDULE
     391             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     392             :     dbm_shard_t *shard = &matrix->shards[ishard];
     393             :     memset(shard->data, 0, shard->data_size * sizeof(double));
     394             :   }
     395      333445 : }
     396             : 
     397             : /*******************************************************************************
     398             :  * \brief Adds matrix_b to matrix_a.
     399             :  * \author Ole Schuett
     400             :  ******************************************************************************/
     401      211765 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
     402      211765 :   assert(omp_get_num_threads() == 1);
     403      211765 :   assert(matrix_a->dist == matrix_b->dist);
     404             : 
     405      211765 : #pragma omp parallel for DBM_OMP_SCHEDULE
     406             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
     407             :     dbm_shard_t *shard_a = &matrix_a->shards[ishard];
     408             :     const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
     409             :     for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
     410             :       const dbm_block_t blk_b = shard_b->blocks[iblock];
     411             : 
     412             :       const int row_size = matrix_b->row_sizes[blk_b.row];
     413             :       const int col_size = matrix_b->col_sizes[blk_b.col];
     414             :       assert(row_size == matrix_a->row_sizes[blk_b.row]);
     415             :       assert(col_size == matrix_a->col_sizes[blk_b.col]);
     416             :       const int block_size = row_size * col_size;
     417             :       dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
     418             :           shard_a, blk_b.row, blk_b.col, block_size);
     419             :       double *data_a = &shard_a->data[blk_a->offset];
     420             :       const double *data_b = &shard_b->data[blk_b.offset];
     421             :       for (int i = 0; i < block_size; i++) {
     422             :         data_a[i] += data_b[i];
     423             :       }
     424             :     }
     425             :   }
     426      211765 : }
     427             : 
     428             : /*******************************************************************************
     429             :  * \brief Creates an iterator for the blocks of the given matrix.
     430             :  *        The iteration order is not stable.
     431             :  *        This routine must always be called within an OpenMP parallel region.
     432             :  * \author Ole Schuett
     433             :  ******************************************************************************/
     434     3226000 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
     435     3226000 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     436             :          "Please call dbm_iterator_start within an OpenMP parallel region.");
     437     3226000 :   dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
     438     3226000 :   assert(iter != NULL);
     439     3226000 :   iter->matrix = matrix;
     440     3226000 :   iter->next_block = 0;
     441     3226000 :   iter->next_shard = omp_get_thread_num();
     442     3798813 :   while (iter->next_shard < dbm_get_num_shards(matrix) &&
     443     3226000 :          matrix->shards[iter->next_shard].nblocks == 0) {
     444      572813 :     iter->next_shard += omp_get_num_threads();
     445             :   }
     446     3226000 :   assert(*iter_out == NULL);
     447     3226000 :   *iter_out = iter;
     448     3226000 : }
     449             : 
     450             : /*******************************************************************************
     451             :  * \brief Returns number of blocks the iterator will provide to calling thread.
     452             :  * \author Ole Schuett
     453             :  ******************************************************************************/
     454      602915 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
     455      602915 :   int num_blocks = 0;
     456      602915 :   for (int ishard = omp_get_thread_num();
     457     1205830 :        ishard < dbm_get_num_shards(iter->matrix);
     458      602915 :        ishard += omp_get_num_threads()) {
     459      602915 :     num_blocks += iter->matrix->shards[ishard].nblocks;
     460             :   }
     461      602915 :   return num_blocks;
     462             : }
     463             : 
     464             : /*******************************************************************************
     465             :  * \brief Tests whether the given iterator has any block left.
     466             :  * \author Ole Schuett
     467             :  ******************************************************************************/
     468    57739890 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
     469    57739890 :   return iter->next_shard < dbm_get_num_shards(iter->matrix);
     470             : }
     471             : 
     472             : /*******************************************************************************
     473             :  * \brief Returns the next block from the given iterator.
     474             :  * \author Ole Schuett
     475             :  ******************************************************************************/
     476    66124770 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
     477             :                              double **block, int *row_size, int *col_size) {
     478    66124770 :   const dbm_matrix_t *matrix = iter->matrix;
     479    66124770 :   assert(iter->next_shard < dbm_get_num_shards(matrix));
     480    66124770 :   const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
     481    66124770 :   assert(iter->next_block < shard->nblocks);
     482    66124770 :   dbm_block_t *blk = &shard->blocks[iter->next_block];
     483             : 
     484    66124770 :   *row = blk->row;
     485    66124770 :   *col = blk->col;
     486    66124770 :   *row_size = matrix->row_sizes[blk->row];
     487    66124770 :   *col_size = matrix->col_sizes[blk->col];
     488    66124770 :   *block = &shard->data[blk->offset];
     489             : 
     490    66124770 :   iter->next_block++;
     491    66124770 :   if (iter->next_block >= shard->nblocks) {
     492             :     // Advance to the next non-empty shard...
     493     2653187 :     iter->next_shard += omp_get_num_threads();
     494     2653187 :     while (iter->next_shard < dbm_get_num_shards(matrix) &&
     495           0 :            matrix->shards[iter->next_shard].nblocks == 0) {
     496           0 :       iter->next_shard += omp_get_num_threads();
     497             :     }
     498     2653187 :     iter->next_block = 0; // ...and continue with its first block.
     499             :   }
     500    66124770 : }
     501             : 
     502             : /*******************************************************************************
     503             :  * \brief Releases the given iterator.
     504             :  * \author Ole Schuett
     505             :  ******************************************************************************/
     506     3226000 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
     507             : 
     508             : /*******************************************************************************
     509             :  * \brief Private routine to accumulate using Kahan's summation.
     510             :  * \author Hans Pabst
     511             :  ******************************************************************************/
     512    14383468 : static double kahan_sum(double value, double *accumulator,
     513             :                         double *compensation) {
     514    14383468 :   double r, c;
     515    14383468 :   assert(NULL != accumulator && NULL != compensation);
     516    14383468 :   c = value - *compensation;
     517    14383468 :   r = *accumulator + c;
     518    14383468 :   *compensation = (r - *accumulator) - c;
     519    14383468 :   *accumulator = r;
     520    14383468 :   return r;
     521             : }
     522             : 
     523             : /*******************************************************************************
     524             :  * \brief Computes a checksum of the given matrix.
     525             :  * \author Ole Schuett
     526             :  ******************************************************************************/
     527         190 : double dbm_checksum(const dbm_matrix_t *matrix) {
     528         190 :   double checksum = 0.0, compensation = 0.0;
     529         380 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     530         190 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     531    14383658 :     for (int i = 0; i < shard->data_size; i++) {
     532    14383468 :       const double value = shard->data[i];
     533    14383468 :       kahan_sum(value * value, &checksum, &compensation);
     534             :     }
     535             :   }
     536         190 :   dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
     537         190 :   return checksum;
     538             : }
     539             : 
     540             : /*******************************************************************************
     541             :  * \brief Returns the largest absolute value of all matrix elements.
     542             :  * \author Ole Schuett
     543             :  ******************************************************************************/
     544          48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
     545          48 :   double maxabs = 0.0;
     546          96 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     547          48 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     548     1599050 :     for (int i = 0; i < shard->data_size; i++) {
     549     1599002 :       maxabs = fmax(maxabs, fabs(shard->data[i]));
     550             :     }
     551             :   }
     552          48 :   dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
     553          48 :   return maxabs;
     554             : }
     555             : 
     556             : /*******************************************************************************
     557             :  * \brief Returns the name of the matrix of the given matrix.
     558             :  * \author Ole Schuett
     559             :  ******************************************************************************/
     560     1778307 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
     561             : 
     562             : /*******************************************************************************
     563             :  * \brief Returns the number of local Non-Zero Elements of the given matrix.
     564             :  * \author Ole Schuett
     565             :  ******************************************************************************/
     566     1776141 : int dbm_get_nze(const dbm_matrix_t *matrix) {
     567     1776141 :   int nze = 0;
     568     3552282 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     569     1776141 :     nze += matrix->shards[ishard].data_size;
     570             :   }
     571     1776141 :   return nze;
     572             : }
     573             : 
     574             : /*******************************************************************************
     575             :  * \brief Returns the number of local blocks of the given matrix.
     576             :  * \author Ole Schuett
     577             :  ******************************************************************************/
     578      972164 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
     579      972164 :   int nblocks = 0;
     580     1944328 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     581      972164 :     nblocks += matrix->shards[ishard].nblocks;
     582             :   }
     583      972164 :   return nblocks;
     584             : }
     585             : 
     586             : /*******************************************************************************
     587             :  * \brief Returns the row block sizes of the given matrix.
     588             :  * \author Ole Schuett
     589             :  ******************************************************************************/
     590     4527363 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
     591             :                        const int **row_sizes) {
     592     4527363 :   *nrows = matrix->nrows;
     593     4527363 :   *row_sizes = matrix->row_sizes;
     594     4527363 : }
     595             : 
     596             : /*******************************************************************************
     597             :  * \brief Returns the column block sizes of the given matrix.
     598             :  * \author Ole Schuett
     599             :  ******************************************************************************/
     600     3298597 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
     601             :                        const int **col_sizes) {
     602     3298597 :   *ncols = matrix->ncols;
     603     3298597 :   *col_sizes = matrix->col_sizes;
     604     3298597 : }
     605             : 
     606             : /*******************************************************************************
     607             :  * \brief Returns the local row block sizes of the given matrix.
     608             :  * \author Ole Schuett
     609             :  ******************************************************************************/
     610      287280 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
     611             :                         const int **local_rows) {
     612      287280 :   *nlocal_rows = matrix->dist->rows.nlocals;
     613      287280 :   *local_rows = matrix->dist->rows.local_indicies;
     614      287280 : }
     615             : 
     616             : /*******************************************************************************
     617             :  * \brief Returns the local column block sizes of the given matrix.
     618             :  * \author Ole Schuett
     619             :  ******************************************************************************/
     620      105118 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
     621             :                         const int **local_cols) {
     622      105118 :   *nlocal_cols = matrix->dist->cols.nlocals;
     623      105118 :   *local_cols = matrix->dist->cols.local_indicies;
     624      105118 : }
     625             : 
     626             : /*******************************************************************************
     627             :  * \brief Returns the MPI rank on which the given block should be stored.
     628             :  * \author Ole Schuett
     629             :  ******************************************************************************/
     630    92074402 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
     631             :                                const int col) {
     632    92074402 :   return dbm_distribution_stored_coords(matrix->dist, row, col);
     633             : }
     634             : 
     635             : /*******************************************************************************
     636             :  * \brief Returns the distribution of the given matrix.
     637             :  * \author Ole Schuett
     638             :  ******************************************************************************/
     639     1229842 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
     640     1229842 :   return matrix->dist;
     641             : }
     642             : 
     643             : // EOF

Generated by: LCOV version 1.15