Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <omp.h>
11 : #include <stdbool.h>
12 : #include <stddef.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 :
16 : #include "dbm_hyperparams.h"
17 : #include "dbm_matrix.h"
18 :
19 : /*******************************************************************************
20 : * \brief Creates a new matrix.
21 : * \author Ole Schuett
22 : ******************************************************************************/
23 1653121 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 : const char name[], const int nrows, const int ncols,
25 : const int row_sizes[nrows], const int col_sizes[ncols]) {
26 1653121 : assert(omp_get_num_threads() == 1);
27 :
28 1653121 : dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29 :
30 1653121 : assert(dist->rows.length == nrows);
31 1653121 : assert(dist->cols.length == ncols);
32 1653121 : dbm_distribution_hold(dist);
33 1653121 : matrix->dist = dist;
34 :
35 1653121 : size_t size = (strlen(name) + 1) * sizeof(char);
36 1653121 : matrix->name = malloc(size);
37 1653121 : assert(matrix->name != NULL);
38 1653121 : memcpy(matrix->name, name, size);
39 :
40 1653121 : matrix->nrows = nrows;
41 1653121 : matrix->ncols = ncols;
42 :
43 1653121 : size = nrows * sizeof(int);
44 1653121 : matrix->row_sizes = malloc(size);
45 1653121 : assert(matrix->row_sizes != NULL);
46 1653121 : memcpy(matrix->row_sizes, row_sizes, size);
47 :
48 1653121 : size = ncols * sizeof(int);
49 1653121 : matrix->col_sizes = malloc(size);
50 1653121 : assert(matrix->col_sizes != NULL);
51 1653121 : memcpy(matrix->col_sizes, col_sizes, size);
52 :
53 1653121 : matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
54 1653121 : assert(matrix->shards != NULL);
55 1653121 : #pragma omp parallel for
56 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
57 : dbm_shard_init(&matrix->shards[ishard]);
58 : }
59 :
60 1653121 : assert(*matrix_out == NULL);
61 1653121 : *matrix_out = matrix;
62 1653121 : }
63 :
64 : /*******************************************************************************
65 : * \brief Releases a matrix and all its ressources.
66 : * \author Ole Schuett
67 : ******************************************************************************/
68 1653121 : void dbm_release(dbm_matrix_t *matrix) {
69 1653121 : assert(omp_get_num_threads() == 1);
70 3306242 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
71 1653121 : dbm_shard_release(&matrix->shards[ishard]);
72 : }
73 1653121 : dbm_distribution_release(matrix->dist);
74 1653121 : free(matrix->row_sizes);
75 1653121 : free(matrix->col_sizes);
76 1653121 : free(matrix->shards);
77 1653121 : free(matrix->name);
78 1653121 : free(matrix);
79 1653121 : }
80 :
81 : /*******************************************************************************
82 : * \brief Copies content of matrix_b into matrix_a.
83 : * Matrices must have the same row/col block sizes and distribution.
84 : * \author Ole Schuett
85 : ******************************************************************************/
86 410736 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
87 410736 : assert(omp_get_num_threads() == 1);
88 :
89 410736 : assert(matrix_b->nrows == matrix_a->nrows);
90 4757977 : for (int i = 0; i < matrix_b->nrows; i++) {
91 4347241 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
92 : }
93 410736 : assert(matrix_b->ncols == matrix_a->ncols);
94 33566210 : for (int i = 0; i < matrix_b->ncols; i++) {
95 33155474 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
96 : }
97 :
98 410736 : assert(matrix_a->dist == matrix_b->dist);
99 :
100 410736 : #pragma omp parallel for schedule(dynamic)
101 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
102 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
103 : }
104 410736 : }
105 :
106 : /*******************************************************************************
107 : * \brief Copies content of matrix_b into matrix_a.
108 : * Matrices may have different distributions.
109 : * \author Ole Schuett
110 : ******************************************************************************/
111 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
112 144 : assert(omp_get_num_threads() == 1);
113 144 : assert(matrix->nrows == redist->nrows);
114 6384 : for (int i = 0; i < matrix->nrows; i++) {
115 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
116 : }
117 144 : assert(matrix->ncols == redist->ncols);
118 6384 : for (int i = 0; i < matrix->ncols; i++) {
119 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
120 : }
121 :
122 144 : assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
123 144 : const dbm_mpi_comm_t comm = redist->dist->comm;
124 144 : const int nranks = dbm_mpi_comm_size(comm);
125 :
126 : // 1st pass: Compute send_count.
127 144 : int send_count[nranks];
128 144 : memset(send_count, 0, nranks * sizeof(int));
129 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
130 144 : dbm_shard_t *shard = &matrix->shards[ishard];
131 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
132 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
133 8792 : const int row_size = matrix->row_sizes[blk->row];
134 8792 : const int col_size = matrix->col_sizes[blk->col];
135 8792 : const int block_size = row_size * col_size;
136 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
137 8792 : assert(0 <= rank && rank < nranks);
138 8792 : send_count[rank] += 2 + block_size;
139 : }
140 : }
141 :
142 : // 1st communication: Exchange counts.
143 144 : int recv_count[nranks];
144 144 : dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
145 :
146 : // Compute displacements and allocate data buffers.
147 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
148 144 : send_displ[0] = recv_displ[0] = 0;
149 432 : for (int irank = 1; irank <= nranks; irank++) {
150 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
151 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
152 : }
153 144 : const int total_send_count = send_displ[nranks];
154 144 : const int total_recv_count = recv_displ[nranks];
155 144 : double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
156 144 : double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
157 :
158 : // 2nd pass: Fill send_data.
159 144 : int send_data_positions[nranks];
160 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
161 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
162 144 : dbm_shard_t *shard = &matrix->shards[ishard];
163 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
164 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
165 8792 : const double *blk_data = &shard->data[blk->offset];
166 8792 : const int row_size = matrix->row_sizes[blk->row];
167 8792 : const int col_size = matrix->col_sizes[blk->col];
168 8792 : const int block_size = row_size * col_size;
169 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
170 8792 : const int pos = send_data_positions[rank];
171 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
172 8792 : data_send[pos + 1] = blk->col;
173 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
174 8792 : send_data_positions[rank] += 2 + block_size;
175 : }
176 : }
177 432 : for (int irank = 0; irank < nranks; irank++) {
178 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
179 : }
180 :
181 : // 2nd communication: Exchange data.
182 144 : dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
183 : recv_count, recv_displ, comm);
184 144 : dbm_mpi_free_mem(data_send);
185 :
186 : // 3rd pass: Unpack data.
187 144 : dbm_clear(redist);
188 144 : int recv_data_pos = 0;
189 8936 : while (recv_data_pos < total_recv_count) {
190 8792 : const int row = (int)data_recv[recv_data_pos + 0];
191 8792 : const int col = (int)data_recv[recv_data_pos + 1];
192 8792 : assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
193 8792 : assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
194 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
195 8792 : const int row_size = matrix->row_sizes[row];
196 8792 : const int col_size = matrix->col_sizes[col];
197 8792 : const int block_size = row_size * col_size;
198 8792 : recv_data_pos += 2 + block_size;
199 : }
200 144 : assert(recv_data_pos == total_recv_count);
201 144 : dbm_mpi_free_mem(data_recv);
202 144 : }
203 :
204 : /*******************************************************************************
205 : * \brief Looks up a block from given matrics. This routine is thread-safe.
206 : * If the block is not found then a null pointer is returned.
207 : * \author Ole Schuett
208 : ******************************************************************************/
209 22489946 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
210 : double **block, int *row_size, int *col_size) {
211 22489946 : assert(0 <= row && row < matrix->nrows);
212 22489946 : assert(0 <= col && col < matrix->ncols);
213 22489946 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
214 22489946 : *row_size = matrix->row_sizes[row];
215 22489946 : *col_size = matrix->col_sizes[col];
216 22489946 : *block = NULL;
217 :
218 22489946 : const int ishard = dbm_get_shard_index(matrix, row, col);
219 22489946 : const dbm_shard_t *shard = &matrix->shards[ishard];
220 22489946 : dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
221 22489946 : if (blk != NULL) {
222 21238751 : *block = &shard->data[blk->offset];
223 : }
224 22489946 : }
225 :
226 : /*******************************************************************************
227 : * \brief Adds a block to given matrix. This routine is thread-safe.
228 : * If block already exist then it gets overwritten (or summed).
229 : * \author Ole Schuett
230 : ******************************************************************************/
231 35012631 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
232 : const bool summation, const double *block) {
233 35012631 : assert(0 <= row && row < matrix->nrows);
234 35012631 : assert(0 <= col && col < matrix->ncols);
235 35012631 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
236 35012631 : const int row_size = matrix->row_sizes[row];
237 35012631 : const int col_size = matrix->col_sizes[col];
238 35012631 : const int block_size = row_size * col_size;
239 :
240 35012631 : const int ishard = dbm_get_shard_index(matrix, row, col);
241 35012631 : dbm_shard_t *shard = &matrix->shards[ishard];
242 35012631 : omp_set_lock(&shard->lock);
243 35012631 : dbm_block_t *blk =
244 35012631 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
245 35012631 : double *blk_data = &shard->data[blk->offset];
246 35012631 : if (summation) {
247 1092479168 : for (int i = 0; i < block_size; i++) {
248 1089360477 : blk_data[i] += block[i];
249 : }
250 : } else {
251 31893940 : memcpy(blk_data, block, block_size * sizeof(double));
252 : }
253 35012631 : omp_unset_lock(&shard->lock);
254 35012631 : }
255 :
256 : /*******************************************************************************
257 : * \brief Remove all blocks from matrix, but does not release underlying memory.
258 : * \author Ole Schuett
259 : ******************************************************************************/
260 1947446 : void dbm_clear(dbm_matrix_t *matrix) {
261 1947446 : assert(omp_get_num_threads() == 1);
262 :
263 1947446 : #pragma omp parallel for schedule(dynamic)
264 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
265 : dbm_shard_t *shard = &matrix->shards[ishard];
266 : shard->nblocks = 0;
267 : shard->data_size = 0;
268 : shard->data_promised = 0;
269 : // Does not deallocate memory, hence data_allocated remains unchanged.
270 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
271 : }
272 1947446 : }
273 :
274 : /*******************************************************************************
275 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
276 : * Blocks of size zero are always kept.
277 : * \author Ole Schuett
278 : ******************************************************************************/
279 545489 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
280 545489 : assert(omp_get_num_threads() == 1);
281 :
282 545489 : if (eps == 0.0) {
283 : return;
284 : }
285 494209 : const double eps2 = eps * eps;
286 :
287 494209 : #pragma omp parallel for schedule(dynamic)
288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
289 : dbm_shard_t *shard = &matrix->shards[ishard];
290 : const int old_nblocks = shard->nblocks;
291 : shard->nblocks = 0;
292 : shard->data_promised = 0;
293 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
294 :
295 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
296 : const dbm_block_t old_blk = shard->blocks[iblock];
297 : const double *old_blk_data = &shard->data[old_blk.offset];
298 : const int row_size = matrix->row_sizes[old_blk.row];
299 : const int col_size = matrix->col_sizes[old_blk.col];
300 : const int block_size = row_size * col_size;
301 : double norm = 0.0;
302 : for (int i = 0; i < block_size; i++) {
303 : norm += old_blk_data[i] * old_blk_data[i];
304 : }
305 : // For historic reasons zero-sized blocks are never filtered.
306 : if (block_size > 0 && norm < eps2) {
307 : continue; // filter the block
308 : }
309 : // Re-create block.
310 : dbm_block_t *new_blk = dbm_shard_promise_new_block(
311 : shard, old_blk.row, old_blk.col, block_size);
312 : assert(new_blk->offset <= old_blk.offset);
313 : if (new_blk->offset != old_blk.offset) {
314 : // Using memmove instead of memcpy because it handles overlap correctly.
315 : double *new_blk_data = &shard->data[new_blk->offset];
316 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
317 : }
318 : }
319 : shard->data_size = shard->data_promised;
320 : // TODO: Could call realloc to release excess memory.
321 : }
322 : }
323 :
324 : /*******************************************************************************
325 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
326 : * This routine must always be called within an OpenMP parallel region.
327 : * \author Ole Schuett
328 : ******************************************************************************/
329 1355499 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
330 : const int rows[], const int cols[]) {
331 1355499 : assert(omp_get_num_threads() == omp_get_max_threads() &&
332 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
333 1355499 : const int my_rank = matrix->dist->my_rank;
334 29089923 : for (int i = 0; i < nblocks; i++) {
335 27734424 : const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
336 27734424 : dbm_shard_t *shard = &matrix->shards[ishard];
337 27734424 : omp_set_lock(&shard->lock);
338 27734424 : assert(0 <= rows[i] && rows[i] < matrix->nrows);
339 27734424 : assert(0 <= cols[i] && cols[i] < matrix->ncols);
340 27734424 : assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
341 27734424 : const int row_size = matrix->row_sizes[rows[i]];
342 27734424 : const int col_size = matrix->col_sizes[cols[i]];
343 27734424 : const int block_size = row_size * col_size;
344 27734424 : dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
345 27734424 : omp_unset_lock(&shard->lock);
346 : }
347 1355499 : #pragma omp barrier
348 :
349 : #pragma omp for schedule(dynamic)
350 1355499 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
351 1355499 : dbm_shard_t *shard = &matrix->shards[ishard];
352 1355499 : dbm_shard_allocate_promised_blocks(shard);
353 : }
354 1355499 : }
355 :
356 : /*******************************************************************************
357 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
358 : * \author Ole Schuett
359 : ******************************************************************************/
360 459608 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
361 459608 : assert(omp_get_num_threads() == 1);
362 459608 : if (alpha == 1.0) {
363 : return;
364 : }
365 346716 : if (alpha == 0.0) {
366 338228 : dbm_zero(matrix);
367 338228 : return;
368 : }
369 :
370 8488 : #pragma omp parallel for schedule(dynamic)
371 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
372 : dbm_shard_t *shard = &matrix->shards[ishard];
373 : for (int i = 0; i < shard->data_size; i++) {
374 : shard->data[i] *= alpha;
375 : }
376 : }
377 : }
378 :
379 : /*******************************************************************************
380 : * \brief Sets all blocks in the given matrix to zero.
381 : * \author Ole Schuett
382 : ******************************************************************************/
383 338228 : void dbm_zero(dbm_matrix_t *matrix) {
384 338228 : assert(omp_get_num_threads() == 1);
385 :
386 338228 : #pragma omp parallel for schedule(dynamic)
387 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
388 : dbm_shard_t *shard = &matrix->shards[ishard];
389 : memset(shard->data, 0, shard->data_size * sizeof(double));
390 : }
391 338228 : }
392 :
393 : /*******************************************************************************
394 : * \brief Adds matrix_b to matrix_a.
395 : * \author Ole Schuett
396 : ******************************************************************************/
397 203013 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
398 203013 : assert(omp_get_num_threads() == 1);
399 203013 : assert(matrix_a->dist == matrix_b->dist);
400 :
401 203013 : #pragma omp parallel for schedule(dynamic)
402 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
403 : dbm_shard_t *shard_a = &matrix_a->shards[ishard];
404 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
405 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
406 : const dbm_block_t blk_b = shard_b->blocks[iblock];
407 :
408 : const int row_size = matrix_b->row_sizes[blk_b.row];
409 : const int col_size = matrix_b->col_sizes[blk_b.col];
410 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
411 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
412 : const int block_size = row_size * col_size;
413 : dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
414 : shard_a, blk_b.row, blk_b.col, block_size);
415 : double *data_a = &shard_a->data[blk_a->offset];
416 : const double *data_b = &shard_b->data[blk_b.offset];
417 : for (int i = 0; i < block_size; i++) {
418 : data_a[i] += data_b[i];
419 : }
420 : }
421 : }
422 203013 : }
423 :
424 : /*******************************************************************************
425 : * \brief Creates an iterator for the blocks of the given matrix.
426 : * The iteration order is not stable.
427 : * This routine must always be called within an OpenMP parallel region.
428 : * \author Ole Schuett
429 : ******************************************************************************/
430 3148365 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
431 3148365 : assert(omp_get_num_threads() == omp_get_max_threads() &&
432 : "Please call dbm_iterator_start within an OpenMP parallel region.");
433 3148365 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
434 3148365 : assert(iter != NULL);
435 3148365 : iter->matrix = matrix;
436 3148365 : iter->next_block = 0;
437 3148365 : iter->next_shard = omp_get_thread_num();
438 3687288 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
439 3148365 : matrix->shards[iter->next_shard].nblocks == 0) {
440 538923 : iter->next_shard += omp_get_num_threads();
441 : }
442 3148365 : assert(*iter_out == NULL);
443 3148365 : *iter_out = iter;
444 3148365 : }
445 :
446 : /*******************************************************************************
447 : * \brief Returns number of blocks the iterator will provide to calling thread.
448 : * \author Ole Schuett
449 : ******************************************************************************/
450 592305 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
451 592305 : int num_blocks = 0;
452 592305 : for (int ishard = omp_get_thread_num();
453 1184610 : ishard < dbm_get_num_shards(iter->matrix);
454 592305 : ishard += omp_get_num_threads()) {
455 592305 : num_blocks += iter->matrix->shards[ishard].nblocks;
456 : }
457 592305 : return num_blocks;
458 : }
459 :
460 : /*******************************************************************************
461 : * \brief Tests whether the given iterator has any block left.
462 : * \author Ole Schuett
463 : ******************************************************************************/
464 57360136 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
465 57360136 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
466 : }
467 :
468 : /*******************************************************************************
469 : * \brief Returns the next block from the given iterator.
470 : * \author Ole Schuett
471 : ******************************************************************************/
472 65739674 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
473 : double **block, int *row_size, int *col_size) {
474 65739674 : const dbm_matrix_t *matrix = iter->matrix;
475 65739674 : assert(iter->next_shard < dbm_get_num_shards(matrix));
476 65739674 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
477 65739674 : assert(iter->next_block < shard->nblocks);
478 65739674 : dbm_block_t *blk = &shard->blocks[iter->next_block];
479 :
480 65739674 : *row = blk->row;
481 65739674 : *col = blk->col;
482 65739674 : *row_size = matrix->row_sizes[blk->row];
483 65739674 : *col_size = matrix->col_sizes[blk->col];
484 65739674 : *block = &shard->data[blk->offset];
485 :
486 65739674 : iter->next_block++;
487 65739674 : if (iter->next_block >= shard->nblocks) {
488 : // Advance to the next non-empty shard...
489 2609442 : iter->next_shard += omp_get_num_threads();
490 2609442 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
491 0 : matrix->shards[iter->next_shard].nblocks == 0) {
492 0 : iter->next_shard += omp_get_num_threads();
493 : }
494 2609442 : iter->next_block = 0; // ...and continue with its first block.
495 : }
496 65739674 : }
497 :
498 : /*******************************************************************************
499 : * \brief Releases the given iterator.
500 : * \author Ole Schuett
501 : ******************************************************************************/
502 3148365 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
503 :
504 : /*******************************************************************************
505 : * \brief Computes a checksum of the given matrix.
506 : * \author Ole Schuett
507 : ******************************************************************************/
508 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
509 190 : double checksum = 0.0;
510 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
511 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
512 14383658 : for (int i = 0; i < shard->data_size; i++) {
513 14383468 : checksum += shard->data[i] * shard->data[i];
514 : }
515 : }
516 190 : dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
517 190 : return checksum;
518 : }
519 :
520 : /*******************************************************************************
521 : * \brief Returns the absolute value of the larges element of the entire matrix.
522 : * \author Ole Schuett
523 : ******************************************************************************/
524 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
525 48 : double maxabs = 0.0;
526 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
527 48 : dbm_shard_t *shard = &matrix->shards[ishard];
528 1599050 : for (int i = 0; i < shard->data_size; i++) {
529 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
530 : }
531 : }
532 48 : dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
533 48 : return maxabs;
534 : }
535 :
536 : /*******************************************************************************
537 : * \brief Returns the name of the matrix of the given matrix.
538 : * \author Ole Schuett
539 : ******************************************************************************/
540 1708539 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
541 :
542 : /*******************************************************************************
543 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
544 : * \author Ole Schuett
545 : ******************************************************************************/
546 1742495 : int dbm_get_nze(const dbm_matrix_t *matrix) {
547 1742495 : int nze = 0;
548 3484990 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
549 1742495 : nze += matrix->shards[ishard].data_size;
550 : }
551 1742495 : return nze;
552 : }
553 :
554 : /*******************************************************************************
555 : * \brief Returns the number of local blocks of the given matrix.
556 : * \author Ole Schuett
557 : ******************************************************************************/
558 959404 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
559 959404 : int nblocks = 0;
560 1918808 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
561 959404 : nblocks += matrix->shards[ishard].nblocks;
562 : }
563 959404 : return nblocks;
564 : }
565 :
566 : /*******************************************************************************
567 : * \brief Returns the row block sizes of the given matrix.
568 : * \author Ole Schuett
569 : ******************************************************************************/
570 4350948 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
571 : const int **row_sizes) {
572 4350948 : *nrows = matrix->nrows;
573 4350948 : *row_sizes = matrix->row_sizes;
574 4350948 : }
575 :
576 : /*******************************************************************************
577 : * \brief Returns the column block sizes of the given matrix.
578 : * \author Ole Schuett
579 : ******************************************************************************/
580 3297010 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
581 : const int **col_sizes) {
582 3297010 : *ncols = matrix->ncols;
583 3297010 : *col_sizes = matrix->col_sizes;
584 3297010 : }
585 :
586 : /*******************************************************************************
587 : * \brief Returns the local row block sizes of the given matrix.
588 : * \author Ole Schuett
589 : ******************************************************************************/
590 272240 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
591 : const int **local_rows) {
592 272240 : *nlocal_rows = matrix->dist->rows.nlocals;
593 272240 : *local_rows = matrix->dist->rows.local_indicies;
594 272240 : }
595 :
596 : /*******************************************************************************
597 : * \brief Returns the local column block sizes of the given matrix.
598 : * \author Ole Schuett
599 : ******************************************************************************/
600 109102 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
601 : const int **local_cols) {
602 109102 : *nlocal_cols = matrix->dist->cols.nlocals;
603 109102 : *local_cols = matrix->dist->cols.local_indicies;
604 109102 : }
605 :
606 : /*******************************************************************************
607 : * \brief Returns the MPI rank on which the given block should be stored.
608 : * \author Ole Schuett
609 : ******************************************************************************/
610 91788356 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
611 : const int col) {
612 91788356 : return dbm_distribution_stored_coords(matrix->dist, row, col);
613 : }
614 :
615 : /*******************************************************************************
616 : * \brief Returns the distribution of the given matrix.
617 : * \author Ole Schuett
618 : ******************************************************************************/
619 1199065 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
620 1199065 : return matrix->dist;
621 : }
622 :
623 : // EOF
|