LCOV - code coverage report
Current view: top level - src/dbm - dbm_matrix.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:96bff0e) Lines: 277 279 99.3 %
Date: 2024-07-27 06:51:10 Functions: 28 28 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <omp.h>
      11             : #include <stdbool.h>
      12             : #include <stddef.h>
      13             : #include <stdlib.h>
      14             : #include <string.h>
      15             : 
      16             : #include "dbm_hyperparams.h"
      17             : #include "dbm_matrix.h"
      18             : 
      19             : /*******************************************************************************
      20             :  * \brief Creates a new matrix.
      21             :  * \author Ole Schuett
      22             :  ******************************************************************************/
      23     1138271 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
      24             :                 const char name[], const int nrows, const int ncols,
      25             :                 const int row_sizes[nrows], const int col_sizes[ncols]) {
      26     1138271 :   assert(omp_get_num_threads() == 1);
      27             : 
      28     1138271 :   dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
      29             : 
      30     1138271 :   assert(dist->rows.length == nrows);
      31     1138271 :   assert(dist->cols.length == ncols);
      32     1138271 :   dbm_distribution_hold(dist);
      33     1138271 :   matrix->dist = dist;
      34             : 
      35     1138271 :   size_t size = (strlen(name) + 1) * sizeof(char);
      36     1138271 :   matrix->name = malloc(size);
      37     1138271 :   memcpy(matrix->name, name, size);
      38             : 
      39     1138271 :   matrix->nrows = nrows;
      40     1138271 :   matrix->ncols = ncols;
      41             : 
      42     1138271 :   size = nrows * sizeof(int);
      43     1138271 :   matrix->row_sizes = malloc(size);
      44     1138271 :   memcpy(matrix->row_sizes, row_sizes, size);
      45             : 
      46     1138271 :   size = ncols * sizeof(int);
      47     1138271 :   matrix->col_sizes = malloc(size);
      48     1138271 :   memcpy(matrix->col_sizes, col_sizes, size);
      49             : 
      50     1138271 :   matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
      51     1138271 : #pragma omp parallel for
      52             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      53             :     dbm_shard_init(&matrix->shards[ishard]);
      54             :   }
      55             : 
      56     1138271 :   assert(*matrix_out == NULL);
      57     1138271 :   *matrix_out = matrix;
      58     1138271 : }
      59             : 
      60             : /*******************************************************************************
      61             :  * \brief Releases a matrix and all its ressources.
      62             :  * \author Ole Schuett
      63             :  ******************************************************************************/
      64     1138271 : void dbm_release(dbm_matrix_t *matrix) {
      65     1138271 :   assert(omp_get_num_threads() == 1);
      66     2276542 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      67     1138271 :     dbm_shard_release(&matrix->shards[ishard]);
      68             :   }
      69     1138271 :   dbm_distribution_release(matrix->dist);
      70     1138271 :   free(matrix->row_sizes);
      71     1138271 :   free(matrix->col_sizes);
      72     1138271 :   free(matrix->shards);
      73     1138271 :   free(matrix->name);
      74     1138271 :   free(matrix);
      75     1138271 : }
      76             : 
      77             : /*******************************************************************************
      78             :  * \brief Copies content of matrix_b into matrix_a.
      79             :  *        Matrices must have the same row/col block sizes and distribution.
      80             :  * \author Ole Schuett
      81             :  ******************************************************************************/
      82      265848 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
      83      265848 :   assert(omp_get_num_threads() == 1);
      84             : 
      85      265848 :   assert(matrix_b->nrows == matrix_a->nrows);
      86     4110263 :   for (int i = 0; i < matrix_b->nrows; i++) {
      87     3844415 :     assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
      88             :   }
      89      265848 :   assert(matrix_b->ncols == matrix_a->ncols);
      90    34368874 :   for (int i = 0; i < matrix_b->ncols; i++) {
      91    34103026 :     assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
      92             :   }
      93             : 
      94      265848 :   assert(matrix_a->dist == matrix_b->dist);
      95             : 
      96      265848 : #pragma omp parallel for schedule(dynamic)
      97             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
      98             :     dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
      99             :   }
     100      265848 : }
     101             : 
     102             : /*******************************************************************************
     103             :  * \brief Copies content of matrix_b into matrix_a.
     104             :  *        Matrices may have different distributions.
     105             :  * \author Ole Schuett
     106             :  ******************************************************************************/
     107         144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
     108         144 :   assert(omp_get_num_threads() == 1);
     109         144 :   assert(matrix->nrows == redist->nrows);
     110        6384 :   for (int i = 0; i < matrix->nrows; i++) {
     111        6240 :     assert(matrix->row_sizes[i] == redist->row_sizes[i]);
     112             :   }
     113         144 :   assert(matrix->ncols == redist->ncols);
     114        6384 :   for (int i = 0; i < matrix->ncols; i++) {
     115        6240 :     assert(matrix->col_sizes[i] == redist->col_sizes[i]);
     116             :   }
     117             : 
     118         144 :   assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
     119         144 :   const dbm_mpi_comm_t comm = redist->dist->comm;
     120         144 :   const int nranks = dbm_mpi_comm_size(comm);
     121             : 
     122             :   // 1st pass: Compute send_count.
     123         144 :   int send_count[nranks];
     124         144 :   memset(send_count, 0, nranks * sizeof(int));
     125         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     126         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     127        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     128        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     129        8792 :       const int row_size = matrix->row_sizes[blk->row];
     130        8792 :       const int col_size = matrix->col_sizes[blk->col];
     131        8792 :       const int block_size = row_size * col_size;
     132        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     133        8792 :       assert(0 <= rank && rank < nranks);
     134        8792 :       send_count[rank] += 2 + block_size;
     135             :     }
     136             :   }
     137             : 
     138             :   // 1st communication: Exchange counts.
     139         144 :   int recv_count[nranks];
     140         144 :   dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
     141             : 
     142             :   // Compute displacements and allocate data buffers.
     143         144 :   int send_displ[nranks + 1], recv_displ[nranks + 1];
     144         144 :   send_displ[0] = recv_displ[0] = 0;
     145         432 :   for (int irank = 1; irank <= nranks; irank++) {
     146         288 :     send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
     147         288 :     recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
     148             :   }
     149         144 :   const int total_send_count = send_displ[nranks];
     150         144 :   const int total_recv_count = recv_displ[nranks];
     151         144 :   double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
     152         144 :   double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
     153             : 
     154             :   // 2nd pass: Fill send_data.
     155         144 :   int send_data_positions[nranks];
     156         144 :   memcpy(send_data_positions, send_displ, nranks * sizeof(int));
     157         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     158         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     159        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     160        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     161        8792 :       const double *blk_data = &shard->data[blk->offset];
     162        8792 :       const int row_size = matrix->row_sizes[blk->row];
     163        8792 :       const int col_size = matrix->col_sizes[blk->col];
     164        8792 :       const int block_size = row_size * col_size;
     165        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     166        8792 :       const int pos = send_data_positions[rank];
     167        8792 :       data_send[pos + 0] = blk->row; // send integers as doubles
     168        8792 :       data_send[pos + 1] = blk->col;
     169        8792 :       memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
     170        8792 :       send_data_positions[rank] += 2 + block_size;
     171             :     }
     172             :   }
     173         432 :   for (int irank = 0; irank < nranks; irank++) {
     174         288 :     assert(send_data_positions[irank] == send_displ[irank + 1]);
     175             :   }
     176             : 
     177             :   // 2nd communication: Exchange data.
     178         144 :   dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
     179             :                            recv_count, recv_displ, comm);
     180         144 :   dbm_mpi_free_mem(data_send);
     181             : 
     182             :   // 3rd pass: Unpack data.
     183         144 :   dbm_clear(redist);
     184         144 :   int recv_data_pos = 0;
     185        8936 :   while (recv_data_pos < total_recv_count) {
     186        8792 :     const int row = (int)data_recv[recv_data_pos + 0];
     187        8792 :     const int col = (int)data_recv[recv_data_pos + 1];
     188        8792 :     assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
     189        8792 :     assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
     190        8792 :     dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
     191        8792 :     const int row_size = matrix->row_sizes[row];
     192        8792 :     const int col_size = matrix->col_sizes[col];
     193        8792 :     const int block_size = row_size * col_size;
     194        8792 :     recv_data_pos += 2 + block_size;
     195             :   }
     196         144 :   assert(recv_data_pos == total_recv_count);
     197         144 :   dbm_mpi_free_mem(data_recv);
     198         144 : }
     199             : 
     200             : /*******************************************************************************
     201             :  * \brief Looks up a block from given matrics. This routine is thread-safe.
     202             :  *        If the block is not found then a null pointer is returned.
     203             :  * \author Ole Schuett
     204             :  ******************************************************************************/
     205    22816472 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
     206             :                      double **block, int *row_size, int *col_size) {
     207    22816472 :   assert(0 <= row && row < matrix->nrows);
     208    22816472 :   assert(0 <= col && col < matrix->ncols);
     209    22816472 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     210    22816472 :   *row_size = matrix->row_sizes[row];
     211    22816472 :   *col_size = matrix->col_sizes[col];
     212    22816472 :   *block = NULL;
     213             : 
     214    22816472 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     215    22816472 :   const dbm_shard_t *shard = &matrix->shards[ishard];
     216    22816472 :   dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
     217    22816472 :   if (blk != NULL) {
     218    21597289 :     *block = &shard->data[blk->offset];
     219             :   }
     220    22816472 : }
     221             : 
     222             : /*******************************************************************************
     223             :  * \brief Adds a block to given matrix. This routine is thread-safe.
     224             :  *        If block already exist then it gets overwritten (or summed).
     225             :  * \author Ole Schuett
     226             :  ******************************************************************************/
     227    35014148 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
     228             :                    const bool summation, const double *block) {
     229    35014148 :   assert(0 <= row && row < matrix->nrows);
     230    35014148 :   assert(0 <= col && col < matrix->ncols);
     231    35014148 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     232    35014148 :   const int row_size = matrix->row_sizes[row];
     233    35014148 :   const int col_size = matrix->col_sizes[col];
     234    35014148 :   const int block_size = row_size * col_size;
     235             : 
     236    35014148 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     237    35014148 :   dbm_shard_t *shard = &matrix->shards[ishard];
     238    35014148 :   omp_set_lock(&shard->lock);
     239    35014148 :   dbm_block_t *blk =
     240    35014148 :       dbm_shard_get_or_allocate_block(shard, row, col, block_size);
     241    35014148 :   double *blk_data = &shard->data[blk->offset];
     242    35014148 :   if (summation) {
     243  1102120389 :     for (int i = 0; i < block_size; i++) {
     244  1099148095 :       blk_data[i] += block[i];
     245             :     }
     246             :   } else {
     247    32041854 :     memcpy(blk_data, block, block_size * sizeof(double));
     248             :   }
     249    35014148 :   omp_unset_lock(&shard->lock);
     250    35014148 : }
     251             : 
     252             : /*******************************************************************************
     253             :  * \brief Remove all blocks from matrix, but does not release underlying memory.
     254             :  * \author Ole Schuett
     255             :  ******************************************************************************/
     256     1466120 : void dbm_clear(dbm_matrix_t *matrix) {
     257     1466120 :   assert(omp_get_num_threads() == 1);
     258             : 
     259     1466120 : #pragma omp parallel for schedule(dynamic)
     260             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     261             :     dbm_shard_t *shard = &matrix->shards[ishard];
     262             :     shard->nblocks = 0;
     263             :     shard->data_size = 0;
     264             :     shard->data_promised = 0;
     265             :     // Does not deallocate memory, hence data_allocated remains unchanged.
     266             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     267             :   }
     268     1466120 : }
     269             : 
     270             : /*******************************************************************************
     271             :  * \brief Removes all blocks from the matrix whose norm is below the threshold.
     272             :  *        Blocks of size zero are always kept.
     273             :  * \author Ole Schuett
     274             :  ******************************************************************************/
     275      437523 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
     276      437523 :   assert(omp_get_num_threads() == 1);
     277             : 
     278      437523 :   if (eps == 0.0) {
     279             :     return;
     280             :   }
     281      430459 :   const double eps2 = eps * eps;
     282             : 
     283      430459 : #pragma omp parallel for schedule(dynamic)
     284             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     285             :     dbm_shard_t *shard = &matrix->shards[ishard];
     286             :     const int old_nblocks = shard->nblocks;
     287             :     shard->nblocks = 0;
     288             :     shard->data_promised = 0;
     289             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     290             : 
     291             :     for (int iblock = 0; iblock < old_nblocks; iblock++) {
     292             :       const dbm_block_t old_blk = shard->blocks[iblock];
     293             :       const double *old_blk_data = &shard->data[old_blk.offset];
     294             :       const int row_size = matrix->row_sizes[old_blk.row];
     295             :       const int col_size = matrix->col_sizes[old_blk.col];
     296             :       const int block_size = row_size * col_size;
     297             :       double norm = 0.0;
     298             :       for (int i = 0; i < block_size; i++) {
     299             :         norm += old_blk_data[i] * old_blk_data[i];
     300             :       }
     301             :       // For historic reasons zero-sized blocks are never filtered.
     302             :       if (block_size > 0 && norm < eps2) {
     303             :         continue; // filter the block
     304             :       }
     305             :       // Re-create block.
     306             :       dbm_block_t *new_blk = dbm_shard_promise_new_block(
     307             :           shard, old_blk.row, old_blk.col, block_size);
     308             :       assert(new_blk->offset <= old_blk.offset);
     309             :       if (new_blk->offset != old_blk.offset) {
     310             :         // Using memmove instead of memcpy because it handles overlap correctly.
     311             :         double *new_blk_data = &shard->data[new_blk->offset];
     312             :         memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
     313             :       }
     314             :     }
     315             :     shard->data_size = shard->data_promised;
     316             :     // TODO: Could call realloc to release excess memory.
     317             :   }
     318             : }
     319             : 
     320             : /*******************************************************************************
     321             :  * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
     322             :  *        This routine must always be called within an OpenMP parallel region.
     323             :  * \author Ole Schuett
     324             :  ******************************************************************************/
     325     1077929 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
     326             :                         const int rows[], const int cols[]) {
     327     1077929 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     328             :          "Please call dbm_reserve_blocks within an OpenMP parallel region.");
     329     1077929 :   const int my_rank = matrix->dist->my_rank;
     330    28470575 :   for (int i = 0; i < nblocks; i++) {
     331    27392646 :     const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
     332    27392646 :     dbm_shard_t *shard = &matrix->shards[ishard];
     333    27392646 :     omp_set_lock(&shard->lock);
     334    27392646 :     assert(0 <= rows[i] && rows[i] < matrix->nrows);
     335    27392646 :     assert(0 <= cols[i] && cols[i] < matrix->ncols);
     336    27392646 :     assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
     337    27392646 :     const int row_size = matrix->row_sizes[rows[i]];
     338    27392646 :     const int col_size = matrix->col_sizes[cols[i]];
     339    27392646 :     const int block_size = row_size * col_size;
     340    27392646 :     dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
     341    27392646 :     omp_unset_lock(&shard->lock);
     342             :   }
     343     1077929 : #pragma omp barrier
     344             : 
     345             : #pragma omp for schedule(dynamic)
     346     1077929 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     347     1077929 :     dbm_shard_t *shard = &matrix->shards[ishard];
     348     1077929 :     dbm_shard_allocate_promised_blocks(shard);
     349             :   }
     350     1077929 : }
     351             : 
     352             : /*******************************************************************************
     353             :  * \brief Multiplies all entries in the given matrix by the given factor alpha.
     354             :  * \author Ole Schuett
     355             :  ******************************************************************************/
     356      278501 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
     357      278501 :   assert(omp_get_num_threads() == 1);
     358      278501 :   if (alpha == 1.0) {
     359             :     return;
     360             :   }
     361      192403 :   if (alpha == 0.0) {
     362      183371 :     dbm_zero(matrix);
     363      183371 :     return;
     364             :   }
     365             : 
     366        9032 : #pragma omp parallel for schedule(dynamic)
     367             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     368             :     dbm_shard_t *shard = &matrix->shards[ishard];
     369             :     for (int i = 0; i < shard->data_size; i++) {
     370             :       shard->data[i] *= alpha;
     371             :     }
     372             :   }
     373             : }
     374             : 
     375             : /*******************************************************************************
     376             :  * \brief Sets all blocks in the given matrix to zero.
     377             :  * \author Ole Schuett
     378             :  ******************************************************************************/
     379      183371 : void dbm_zero(dbm_matrix_t *matrix) {
     380      183371 :   assert(omp_get_num_threads() == 1);
     381             : 
     382      183371 : #pragma omp parallel for schedule(dynamic)
     383             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     384             :     dbm_shard_t *shard = &matrix->shards[ishard];
     385             :     memset(shard->data, 0, shard->data_size * sizeof(double));
     386             :   }
     387      183371 : }
     388             : 
     389             : /*******************************************************************************
     390             :  * \brief Adds matrix_b to matrix_a.
     391             :  * \author Ole Schuett
     392             :  ******************************************************************************/
     393      130569 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
     394      130569 :   assert(omp_get_num_threads() == 1);
     395      130569 :   assert(matrix_a->dist == matrix_b->dist);
     396             : 
     397      130569 : #pragma omp parallel for schedule(dynamic)
     398             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
     399             :     dbm_shard_t *shard_a = &matrix_a->shards[ishard];
     400             :     const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
     401             :     for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
     402             :       const dbm_block_t blk_b = shard_b->blocks[iblock];
     403             : 
     404             :       const int row_size = matrix_b->row_sizes[blk_b.row];
     405             :       const int col_size = matrix_b->col_sizes[blk_b.col];
     406             :       assert(row_size == matrix_a->row_sizes[blk_b.row]);
     407             :       assert(col_size == matrix_a->col_sizes[blk_b.col]);
     408             :       const int block_size = row_size * col_size;
     409             :       dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
     410             :           shard_a, blk_b.row, blk_b.col, block_size);
     411             :       double *data_a = &shard_a->data[blk_a->offset];
     412             :       const double *data_b = &shard_b->data[blk_b.offset];
     413             :       for (int i = 0; i < block_size; i++) {
     414             :         data_a[i] += data_b[i];
     415             :       }
     416             :     }
     417             :   }
     418      130569 : }
     419             : 
     420             : /*******************************************************************************
     421             :  * \brief Creates an iterator for the blocks of the given matrix.
     422             :  *        The iteration order is not stable.
     423             :  *        This routine must always be called within an OpenMP parallel region.
     424             :  * \author Ole Schuett
     425             :  ******************************************************************************/
     426     2551901 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
     427     2551901 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     428             :          "Please call dbm_iterator_start within an OpenMP parallel region.");
     429     2551901 :   dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
     430     2551901 :   iter->matrix = matrix;
     431     2551901 :   iter->next_block = 0;
     432     2551901 :   iter->next_shard = omp_get_thread_num();
     433     3012883 :   while (iter->next_shard < dbm_get_num_shards(matrix) &&
     434     2551901 :          matrix->shards[iter->next_shard].nblocks == 0) {
     435      460982 :     iter->next_shard += omp_get_num_threads();
     436             :   }
     437     2551901 :   assert(*iter_out == NULL);
     438     2551901 :   *iter_out = iter;
     439     2551901 : }
     440             : 
     441             : /*******************************************************************************
     442             :  * \brief Returns number of blocks the iterator will provide to calling thread.
     443             :  * \author Ole Schuett
     444             :  ******************************************************************************/
     445      505430 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
     446      505430 :   int num_blocks = 0;
     447      505430 :   for (int ishard = omp_get_thread_num();
     448     1010860 :        ishard < dbm_get_num_shards(iter->matrix);
     449      505430 :        ishard += omp_get_num_threads()) {
     450      505430 :     num_blocks += iter->matrix->shards[ishard].nblocks;
     451             :   }
     452      505430 :   return num_blocks;
     453             : }
     454             : 
     455             : /*******************************************************************************
     456             :  * \brief Tests whether the given iterator has any block left.
     457             :  * \author Ole Schuett
     458             :  ******************************************************************************/
     459    56396241 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
     460    56396241 :   return iter->next_shard < dbm_get_num_shards(iter->matrix);
     461             : }
     462             : 
     463             : /*******************************************************************************
     464             :  * \brief Returns the next block from the given iterator.
     465             :  * \author Ole Schuett
     466             :  ******************************************************************************/
     467    65474452 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
     468             :                              double **block, int *row_size, int *col_size) {
     469    65474452 :   const dbm_matrix_t *matrix = iter->matrix;
     470    65474452 :   assert(iter->next_shard < dbm_get_num_shards(matrix));
     471    65474452 :   const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
     472    65474452 :   assert(iter->next_block < shard->nblocks);
     473    65474452 :   dbm_block_t *blk = &shard->blocks[iter->next_block];
     474             : 
     475    65474452 :   *row = blk->row;
     476    65474452 :   *col = blk->col;
     477    65474452 :   *row_size = matrix->row_sizes[blk->row];
     478    65474452 :   *col_size = matrix->col_sizes[blk->col];
     479    65474452 :   *block = &shard->data[blk->offset];
     480             : 
     481    65474452 :   iter->next_block++;
     482    65474452 :   if (iter->next_block >= shard->nblocks) {
     483             :     // Advance to the next non-empty shard...
     484     2090919 :     iter->next_shard += omp_get_num_threads();
     485     2090919 :     while (iter->next_shard < dbm_get_num_shards(matrix) &&
     486           0 :            matrix->shards[iter->next_shard].nblocks == 0) {
     487           0 :       iter->next_shard += omp_get_num_threads();
     488             :     }
     489     2090919 :     iter->next_block = 0; // ...and continue with its first block.
     490             :   }
     491    65474452 : }
     492             : 
     493             : /*******************************************************************************
     494             :  * \brief Releases the given iterator.
     495             :  * \author Ole Schuett
     496             :  ******************************************************************************/
     497     2551901 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
     498             : 
     499             : /*******************************************************************************
     500             :  * \brief Computes a checksum of the given matrix.
     501             :  * \author Ole Schuett
     502             :  ******************************************************************************/
     503         190 : double dbm_checksum(const dbm_matrix_t *matrix) {
     504         190 :   double checksum = 0.0;
     505         380 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     506         190 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     507    14383658 :     for (int i = 0; i < shard->data_size; i++) {
     508    14383468 :       checksum += shard->data[i] * shard->data[i];
     509             :     }
     510             :   }
     511         190 :   dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
     512         190 :   return checksum;
     513             : }
     514             : 
     515             : /*******************************************************************************
     516             :  * \brief Returns the absolute value of the larges element of the entire matrix.
     517             :  * \author Ole Schuett
     518             :  ******************************************************************************/
     519          48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
     520          48 :   double maxabs = 0.0;
     521          96 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     522          48 :     dbm_shard_t *shard = &matrix->shards[ishard];
     523     1599050 :     for (int i = 0; i < shard->data_size; i++) {
     524     1599002 :       maxabs = fmax(maxabs, fabs(shard->data[i]));
     525             :     }
     526             :   }
     527          48 :   dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
     528          48 :   return maxabs;
     529             : }
     530             : 
     531             : /*******************************************************************************
     532             :  * \brief Returns the name of the matrix of the given matrix.
     533             :  * \author Ole Schuett
     534             :  ******************************************************************************/
     535     1127774 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
     536             : 
     537             : /*******************************************************************************
     538             :  * \brief Returns the number of local Non-Zero Elements of the given matrix.
     539             :  * \author Ole Schuett
     540             :  ******************************************************************************/
     541     1357738 : int dbm_get_nze(const dbm_matrix_t *matrix) {
     542     1357738 :   int nze = 0;
     543     2715476 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     544     1357738 :     nze += matrix->shards[ishard].data_size;
     545             :   }
     546     1357738 :   return nze;
     547             : }
     548             : 
     549             : /*******************************************************************************
     550             :  * \brief Returns the number of local blocks of the given matrix.
     551             :  * \author Ole Schuett
     552             :  ******************************************************************************/
     553      855880 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
     554      855880 :   int nblocks = 0;
     555     1711760 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     556      855880 :     nblocks += matrix->shards[ishard].nblocks;
     557             :   }
     558      855880 :   return nblocks;
     559             : }
     560             : 
     561             : /*******************************************************************************
     562             :  * \brief Returns the row block sizes of the given matrix.
     563             :  * \author Ole Schuett
     564             :  ******************************************************************************/
     565     3724722 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
     566             :                        const int **row_sizes) {
     567     3724722 :   *nrows = matrix->nrows;
     568     3724722 :   *row_sizes = matrix->row_sizes;
     569     3724722 : }
     570             : 
     571             : /*******************************************************************************
     572             :  * \brief Returns the column block sizes of the given matrix.
     573             :  * \author Ole Schuett
     574             :  ******************************************************************************/
     575     2860630 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
     576             :                        const int **col_sizes) {
     577     2860630 :   *ncols = matrix->ncols;
     578     2860630 :   *col_sizes = matrix->col_sizes;
     579     2860630 : }
     580             : 
     581             : /*******************************************************************************
     582             :  * \brief Returns the local row block sizes of the given matrix.
     583             :  * \author Ole Schuett
     584             :  ******************************************************************************/
     585      170348 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
     586             :                         const int **local_rows) {
     587      170348 :   *nlocal_rows = matrix->dist->rows.nlocals;
     588      170348 :   *local_rows = matrix->dist->rows.local_indicies;
     589      170348 : }
     590             : 
     591             : /*******************************************************************************
     592             :  * \brief Returns the local column block sizes of the given matrix.
     593             :  * \author Ole Schuett
     594             :  ******************************************************************************/
     595       74274 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
     596             :                         const int **local_cols) {
     597       74274 :   *nlocal_cols = matrix->dist->cols.nlocals;
     598       74274 :   *local_cols = matrix->dist->cols.local_indicies;
     599       74274 : }
     600             : 
     601             : /*******************************************************************************
     602             :  * \brief Returns the MPI rank on which the given block should be stored.
     603             :  * \author Ole Schuett
     604             :  ******************************************************************************/
     605    91415565 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
     606             :                                const int col) {
     607    91415565 :   return dbm_distribution_stored_coords(matrix->dist, row, col);
     608             : }
     609             : 
     610             : /*******************************************************************************
     611             :  * \brief Returns the distribution of the given matrix.
     612             :  * \author Ole Schuett
     613             :  ******************************************************************************/
     614      897525 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
     615      897525 :   return matrix->dist;
     616             : }
     617             : 
     618             : // EOF

Generated by: LCOV version 1.15