Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <omp.h>
11 : #include <stdbool.h>
12 : #include <stddef.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 :
16 : #include "dbm_hyperparams.h"
17 : #include "dbm_matrix.h"
18 :
19 : /*******************************************************************************
20 : * \brief Creates a new matrix.
21 : * \author Ole Schuett
22 : ******************************************************************************/
23 1138271 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 : const char name[], const int nrows, const int ncols,
25 : const int row_sizes[nrows], const int col_sizes[ncols]) {
26 1138271 : assert(omp_get_num_threads() == 1);
27 :
28 1138271 : dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29 :
30 1138271 : assert(dist->rows.length == nrows);
31 1138271 : assert(dist->cols.length == ncols);
32 1138271 : dbm_distribution_hold(dist);
33 1138271 : matrix->dist = dist;
34 :
35 1138271 : size_t size = (strlen(name) + 1) * sizeof(char);
36 1138271 : matrix->name = malloc(size);
37 1138271 : memcpy(matrix->name, name, size);
38 :
39 1138271 : matrix->nrows = nrows;
40 1138271 : matrix->ncols = ncols;
41 :
42 1138271 : size = nrows * sizeof(int);
43 1138271 : matrix->row_sizes = malloc(size);
44 1138271 : memcpy(matrix->row_sizes, row_sizes, size);
45 :
46 1138271 : size = ncols * sizeof(int);
47 1138271 : matrix->col_sizes = malloc(size);
48 1138271 : memcpy(matrix->col_sizes, col_sizes, size);
49 :
50 1138271 : matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
51 1138271 : #pragma omp parallel for
52 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
53 : dbm_shard_init(&matrix->shards[ishard]);
54 : }
55 :
56 1138271 : assert(*matrix_out == NULL);
57 1138271 : *matrix_out = matrix;
58 1138271 : }
59 :
60 : /*******************************************************************************
61 : * \brief Releases a matrix and all its ressources.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 1138271 : void dbm_release(dbm_matrix_t *matrix) {
65 1138271 : assert(omp_get_num_threads() == 1);
66 2276542 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
67 1138271 : dbm_shard_release(&matrix->shards[ishard]);
68 : }
69 1138271 : dbm_distribution_release(matrix->dist);
70 1138271 : free(matrix->row_sizes);
71 1138271 : free(matrix->col_sizes);
72 1138271 : free(matrix->shards);
73 1138271 : free(matrix->name);
74 1138271 : free(matrix);
75 1138271 : }
76 :
77 : /*******************************************************************************
78 : * \brief Copies content of matrix_b into matrix_a.
79 : * Matrices must have the same row/col block sizes and distribution.
80 : * \author Ole Schuett
81 : ******************************************************************************/
82 265848 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
83 265848 : assert(omp_get_num_threads() == 1);
84 :
85 265848 : assert(matrix_b->nrows == matrix_a->nrows);
86 4110263 : for (int i = 0; i < matrix_b->nrows; i++) {
87 3844415 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
88 : }
89 265848 : assert(matrix_b->ncols == matrix_a->ncols);
90 34368874 : for (int i = 0; i < matrix_b->ncols; i++) {
91 34103026 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
92 : }
93 :
94 265848 : assert(matrix_a->dist == matrix_b->dist);
95 :
96 265848 : #pragma omp parallel for schedule(dynamic)
97 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
98 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
99 : }
100 265848 : }
101 :
102 : /*******************************************************************************
103 : * \brief Copies content of matrix_b into matrix_a.
104 : * Matrices may have different distributions.
105 : * \author Ole Schuett
106 : ******************************************************************************/
107 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
108 144 : assert(omp_get_num_threads() == 1);
109 144 : assert(matrix->nrows == redist->nrows);
110 6384 : for (int i = 0; i < matrix->nrows; i++) {
111 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
112 : }
113 144 : assert(matrix->ncols == redist->ncols);
114 6384 : for (int i = 0; i < matrix->ncols; i++) {
115 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
116 : }
117 :
118 144 : assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
119 144 : const dbm_mpi_comm_t comm = redist->dist->comm;
120 144 : const int nranks = dbm_mpi_comm_size(comm);
121 :
122 : // 1st pass: Compute send_count.
123 144 : int send_count[nranks];
124 144 : memset(send_count, 0, nranks * sizeof(int));
125 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
126 144 : dbm_shard_t *shard = &matrix->shards[ishard];
127 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
128 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
129 8792 : const int row_size = matrix->row_sizes[blk->row];
130 8792 : const int col_size = matrix->col_sizes[blk->col];
131 8792 : const int block_size = row_size * col_size;
132 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
133 8792 : assert(0 <= rank && rank < nranks);
134 8792 : send_count[rank] += 2 + block_size;
135 : }
136 : }
137 :
138 : // 1st communication: Exchange counts.
139 144 : int recv_count[nranks];
140 144 : dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
141 :
142 : // Compute displacements and allocate data buffers.
143 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
144 144 : send_displ[0] = recv_displ[0] = 0;
145 432 : for (int irank = 1; irank <= nranks; irank++) {
146 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
147 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
148 : }
149 144 : const int total_send_count = send_displ[nranks];
150 144 : const int total_recv_count = recv_displ[nranks];
151 144 : double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
152 144 : double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
153 :
154 : // 2nd pass: Fill send_data.
155 144 : int send_data_positions[nranks];
156 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
157 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
158 144 : dbm_shard_t *shard = &matrix->shards[ishard];
159 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
160 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
161 8792 : const double *blk_data = &shard->data[blk->offset];
162 8792 : const int row_size = matrix->row_sizes[blk->row];
163 8792 : const int col_size = matrix->col_sizes[blk->col];
164 8792 : const int block_size = row_size * col_size;
165 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
166 8792 : const int pos = send_data_positions[rank];
167 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
168 8792 : data_send[pos + 1] = blk->col;
169 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
170 8792 : send_data_positions[rank] += 2 + block_size;
171 : }
172 : }
173 432 : for (int irank = 0; irank < nranks; irank++) {
174 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
175 : }
176 :
177 : // 2nd communication: Exchange data.
178 144 : dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
179 : recv_count, recv_displ, comm);
180 144 : dbm_mpi_free_mem(data_send);
181 :
182 : // 3rd pass: Unpack data.
183 144 : dbm_clear(redist);
184 144 : int recv_data_pos = 0;
185 8936 : while (recv_data_pos < total_recv_count) {
186 8792 : const int row = (int)data_recv[recv_data_pos + 0];
187 8792 : const int col = (int)data_recv[recv_data_pos + 1];
188 8792 : assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
189 8792 : assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
190 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
191 8792 : const int row_size = matrix->row_sizes[row];
192 8792 : const int col_size = matrix->col_sizes[col];
193 8792 : const int block_size = row_size * col_size;
194 8792 : recv_data_pos += 2 + block_size;
195 : }
196 144 : assert(recv_data_pos == total_recv_count);
197 144 : dbm_mpi_free_mem(data_recv);
198 144 : }
199 :
200 : /*******************************************************************************
201 : * \brief Looks up a block from given matrics. This routine is thread-safe.
202 : * If the block is not found then a null pointer is returned.
203 : * \author Ole Schuett
204 : ******************************************************************************/
205 22816472 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
206 : double **block, int *row_size, int *col_size) {
207 22816472 : assert(0 <= row && row < matrix->nrows);
208 22816472 : assert(0 <= col && col < matrix->ncols);
209 22816472 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
210 22816472 : *row_size = matrix->row_sizes[row];
211 22816472 : *col_size = matrix->col_sizes[col];
212 22816472 : *block = NULL;
213 :
214 22816472 : const int ishard = dbm_get_shard_index(matrix, row, col);
215 22816472 : const dbm_shard_t *shard = &matrix->shards[ishard];
216 22816472 : dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
217 22816472 : if (blk != NULL) {
218 21597289 : *block = &shard->data[blk->offset];
219 : }
220 22816472 : }
221 :
222 : /*******************************************************************************
223 : * \brief Adds a block to given matrix. This routine is thread-safe.
224 : * If block already exist then it gets overwritten (or summed).
225 : * \author Ole Schuett
226 : ******************************************************************************/
227 35014148 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
228 : const bool summation, const double *block) {
229 35014148 : assert(0 <= row && row < matrix->nrows);
230 35014148 : assert(0 <= col && col < matrix->ncols);
231 35014148 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
232 35014148 : const int row_size = matrix->row_sizes[row];
233 35014148 : const int col_size = matrix->col_sizes[col];
234 35014148 : const int block_size = row_size * col_size;
235 :
236 35014148 : const int ishard = dbm_get_shard_index(matrix, row, col);
237 35014148 : dbm_shard_t *shard = &matrix->shards[ishard];
238 35014148 : omp_set_lock(&shard->lock);
239 35014148 : dbm_block_t *blk =
240 35014148 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
241 35014148 : double *blk_data = &shard->data[blk->offset];
242 35014148 : if (summation) {
243 1102120389 : for (int i = 0; i < block_size; i++) {
244 1099148095 : blk_data[i] += block[i];
245 : }
246 : } else {
247 32041854 : memcpy(blk_data, block, block_size * sizeof(double));
248 : }
249 35014148 : omp_unset_lock(&shard->lock);
250 35014148 : }
251 :
252 : /*******************************************************************************
253 : * \brief Remove all blocks from matrix, but does not release underlying memory.
254 : * \author Ole Schuett
255 : ******************************************************************************/
256 1466120 : void dbm_clear(dbm_matrix_t *matrix) {
257 1466120 : assert(omp_get_num_threads() == 1);
258 :
259 1466120 : #pragma omp parallel for schedule(dynamic)
260 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
261 : dbm_shard_t *shard = &matrix->shards[ishard];
262 : shard->nblocks = 0;
263 : shard->data_size = 0;
264 : shard->data_promised = 0;
265 : // Does not deallocate memory, hence data_allocated remains unchanged.
266 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
267 : }
268 1466120 : }
269 :
270 : /*******************************************************************************
271 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
272 : * Blocks of size zero are always kept.
273 : * \author Ole Schuett
274 : ******************************************************************************/
275 437523 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
276 437523 : assert(omp_get_num_threads() == 1);
277 :
278 437523 : if (eps == 0.0) {
279 : return;
280 : }
281 430459 : const double eps2 = eps * eps;
282 :
283 430459 : #pragma omp parallel for schedule(dynamic)
284 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
285 : dbm_shard_t *shard = &matrix->shards[ishard];
286 : const int old_nblocks = shard->nblocks;
287 : shard->nblocks = 0;
288 : shard->data_promised = 0;
289 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
290 :
291 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
292 : const dbm_block_t old_blk = shard->blocks[iblock];
293 : const double *old_blk_data = &shard->data[old_blk.offset];
294 : const int row_size = matrix->row_sizes[old_blk.row];
295 : const int col_size = matrix->col_sizes[old_blk.col];
296 : const int block_size = row_size * col_size;
297 : double norm = 0.0;
298 : for (int i = 0; i < block_size; i++) {
299 : norm += old_blk_data[i] * old_blk_data[i];
300 : }
301 : // For historic reasons zero-sized blocks are never filtered.
302 : if (block_size > 0 && norm < eps2) {
303 : continue; // filter the block
304 : }
305 : // Re-create block.
306 : dbm_block_t *new_blk = dbm_shard_promise_new_block(
307 : shard, old_blk.row, old_blk.col, block_size);
308 : assert(new_blk->offset <= old_blk.offset);
309 : if (new_blk->offset != old_blk.offset) {
310 : // Using memmove instead of memcpy because it handles overlap correctly.
311 : double *new_blk_data = &shard->data[new_blk->offset];
312 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
313 : }
314 : }
315 : shard->data_size = shard->data_promised;
316 : // TODO: Could call realloc to release excess memory.
317 : }
318 : }
319 :
320 : /*******************************************************************************
321 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
322 : * This routine must always be called within an OpenMP parallel region.
323 : * \author Ole Schuett
324 : ******************************************************************************/
325 1077929 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
326 : const int rows[], const int cols[]) {
327 1077929 : assert(omp_get_num_threads() == omp_get_max_threads() &&
328 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
329 1077929 : const int my_rank = matrix->dist->my_rank;
330 28470575 : for (int i = 0; i < nblocks; i++) {
331 27392646 : const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
332 27392646 : dbm_shard_t *shard = &matrix->shards[ishard];
333 27392646 : omp_set_lock(&shard->lock);
334 27392646 : assert(0 <= rows[i] && rows[i] < matrix->nrows);
335 27392646 : assert(0 <= cols[i] && cols[i] < matrix->ncols);
336 27392646 : assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
337 27392646 : const int row_size = matrix->row_sizes[rows[i]];
338 27392646 : const int col_size = matrix->col_sizes[cols[i]];
339 27392646 : const int block_size = row_size * col_size;
340 27392646 : dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
341 27392646 : omp_unset_lock(&shard->lock);
342 : }
343 1077929 : #pragma omp barrier
344 :
345 : #pragma omp for schedule(dynamic)
346 1077929 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
347 1077929 : dbm_shard_t *shard = &matrix->shards[ishard];
348 1077929 : dbm_shard_allocate_promised_blocks(shard);
349 : }
350 1077929 : }
351 :
352 : /*******************************************************************************
353 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
354 : * \author Ole Schuett
355 : ******************************************************************************/
356 278501 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
357 278501 : assert(omp_get_num_threads() == 1);
358 278501 : if (alpha == 1.0) {
359 : return;
360 : }
361 192403 : if (alpha == 0.0) {
362 183371 : dbm_zero(matrix);
363 183371 : return;
364 : }
365 :
366 9032 : #pragma omp parallel for schedule(dynamic)
367 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
368 : dbm_shard_t *shard = &matrix->shards[ishard];
369 : for (int i = 0; i < shard->data_size; i++) {
370 : shard->data[i] *= alpha;
371 : }
372 : }
373 : }
374 :
375 : /*******************************************************************************
376 : * \brief Sets all blocks in the given matrix to zero.
377 : * \author Ole Schuett
378 : ******************************************************************************/
379 183371 : void dbm_zero(dbm_matrix_t *matrix) {
380 183371 : assert(omp_get_num_threads() == 1);
381 :
382 183371 : #pragma omp parallel for schedule(dynamic)
383 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
384 : dbm_shard_t *shard = &matrix->shards[ishard];
385 : memset(shard->data, 0, shard->data_size * sizeof(double));
386 : }
387 183371 : }
388 :
389 : /*******************************************************************************
390 : * \brief Adds matrix_b to matrix_a.
391 : * \author Ole Schuett
392 : ******************************************************************************/
393 130569 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
394 130569 : assert(omp_get_num_threads() == 1);
395 130569 : assert(matrix_a->dist == matrix_b->dist);
396 :
397 130569 : #pragma omp parallel for schedule(dynamic)
398 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
399 : dbm_shard_t *shard_a = &matrix_a->shards[ishard];
400 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
401 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
402 : const dbm_block_t blk_b = shard_b->blocks[iblock];
403 :
404 : const int row_size = matrix_b->row_sizes[blk_b.row];
405 : const int col_size = matrix_b->col_sizes[blk_b.col];
406 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
407 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
408 : const int block_size = row_size * col_size;
409 : dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
410 : shard_a, blk_b.row, blk_b.col, block_size);
411 : double *data_a = &shard_a->data[blk_a->offset];
412 : const double *data_b = &shard_b->data[blk_b.offset];
413 : for (int i = 0; i < block_size; i++) {
414 : data_a[i] += data_b[i];
415 : }
416 : }
417 : }
418 130569 : }
419 :
420 : /*******************************************************************************
421 : * \brief Creates an iterator for the blocks of the given matrix.
422 : * The iteration order is not stable.
423 : * This routine must always be called within an OpenMP parallel region.
424 : * \author Ole Schuett
425 : ******************************************************************************/
426 2551901 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
427 2551901 : assert(omp_get_num_threads() == omp_get_max_threads() &&
428 : "Please call dbm_iterator_start within an OpenMP parallel region.");
429 2551901 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
430 2551901 : iter->matrix = matrix;
431 2551901 : iter->next_block = 0;
432 2551901 : iter->next_shard = omp_get_thread_num();
433 3012883 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
434 2551901 : matrix->shards[iter->next_shard].nblocks == 0) {
435 460982 : iter->next_shard += omp_get_num_threads();
436 : }
437 2551901 : assert(*iter_out == NULL);
438 2551901 : *iter_out = iter;
439 2551901 : }
440 :
441 : /*******************************************************************************
442 : * \brief Returns number of blocks the iterator will provide to calling thread.
443 : * \author Ole Schuett
444 : ******************************************************************************/
445 505430 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
446 505430 : int num_blocks = 0;
447 505430 : for (int ishard = omp_get_thread_num();
448 1010860 : ishard < dbm_get_num_shards(iter->matrix);
449 505430 : ishard += omp_get_num_threads()) {
450 505430 : num_blocks += iter->matrix->shards[ishard].nblocks;
451 : }
452 505430 : return num_blocks;
453 : }
454 :
455 : /*******************************************************************************
456 : * \brief Tests whether the given iterator has any block left.
457 : * \author Ole Schuett
458 : ******************************************************************************/
459 56396241 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
460 56396241 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
461 : }
462 :
463 : /*******************************************************************************
464 : * \brief Returns the next block from the given iterator.
465 : * \author Ole Schuett
466 : ******************************************************************************/
467 65474452 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
468 : double **block, int *row_size, int *col_size) {
469 65474452 : const dbm_matrix_t *matrix = iter->matrix;
470 65474452 : assert(iter->next_shard < dbm_get_num_shards(matrix));
471 65474452 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
472 65474452 : assert(iter->next_block < shard->nblocks);
473 65474452 : dbm_block_t *blk = &shard->blocks[iter->next_block];
474 :
475 65474452 : *row = blk->row;
476 65474452 : *col = blk->col;
477 65474452 : *row_size = matrix->row_sizes[blk->row];
478 65474452 : *col_size = matrix->col_sizes[blk->col];
479 65474452 : *block = &shard->data[blk->offset];
480 :
481 65474452 : iter->next_block++;
482 65474452 : if (iter->next_block >= shard->nblocks) {
483 : // Advance to the next non-empty shard...
484 2090919 : iter->next_shard += omp_get_num_threads();
485 2090919 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
486 0 : matrix->shards[iter->next_shard].nblocks == 0) {
487 0 : iter->next_shard += omp_get_num_threads();
488 : }
489 2090919 : iter->next_block = 0; // ...and continue with its first block.
490 : }
491 65474452 : }
492 :
493 : /*******************************************************************************
494 : * \brief Releases the given iterator.
495 : * \author Ole Schuett
496 : ******************************************************************************/
497 2551901 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
498 :
499 : /*******************************************************************************
500 : * \brief Computes a checksum of the given matrix.
501 : * \author Ole Schuett
502 : ******************************************************************************/
503 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
504 190 : double checksum = 0.0;
505 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
506 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
507 14383658 : for (int i = 0; i < shard->data_size; i++) {
508 14383468 : checksum += shard->data[i] * shard->data[i];
509 : }
510 : }
511 190 : dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
512 190 : return checksum;
513 : }
514 :
515 : /*******************************************************************************
516 : * \brief Returns the absolute value of the larges element of the entire matrix.
517 : * \author Ole Schuett
518 : ******************************************************************************/
519 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
520 48 : double maxabs = 0.0;
521 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
522 48 : dbm_shard_t *shard = &matrix->shards[ishard];
523 1599050 : for (int i = 0; i < shard->data_size; i++) {
524 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
525 : }
526 : }
527 48 : dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
528 48 : return maxabs;
529 : }
530 :
531 : /*******************************************************************************
532 : * \brief Returns the name of the matrix of the given matrix.
533 : * \author Ole Schuett
534 : ******************************************************************************/
535 1127774 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
536 :
537 : /*******************************************************************************
538 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
539 : * \author Ole Schuett
540 : ******************************************************************************/
541 1357738 : int dbm_get_nze(const dbm_matrix_t *matrix) {
542 1357738 : int nze = 0;
543 2715476 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
544 1357738 : nze += matrix->shards[ishard].data_size;
545 : }
546 1357738 : return nze;
547 : }
548 :
549 : /*******************************************************************************
550 : * \brief Returns the number of local blocks of the given matrix.
551 : * \author Ole Schuett
552 : ******************************************************************************/
553 855880 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
554 855880 : int nblocks = 0;
555 1711760 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
556 855880 : nblocks += matrix->shards[ishard].nblocks;
557 : }
558 855880 : return nblocks;
559 : }
560 :
561 : /*******************************************************************************
562 : * \brief Returns the row block sizes of the given matrix.
563 : * \author Ole Schuett
564 : ******************************************************************************/
565 3724722 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
566 : const int **row_sizes) {
567 3724722 : *nrows = matrix->nrows;
568 3724722 : *row_sizes = matrix->row_sizes;
569 3724722 : }
570 :
571 : /*******************************************************************************
572 : * \brief Returns the column block sizes of the given matrix.
573 : * \author Ole Schuett
574 : ******************************************************************************/
575 2860630 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
576 : const int **col_sizes) {
577 2860630 : *ncols = matrix->ncols;
578 2860630 : *col_sizes = matrix->col_sizes;
579 2860630 : }
580 :
581 : /*******************************************************************************
582 : * \brief Returns the local row block sizes of the given matrix.
583 : * \author Ole Schuett
584 : ******************************************************************************/
585 170348 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
586 : const int **local_rows) {
587 170348 : *nlocal_rows = matrix->dist->rows.nlocals;
588 170348 : *local_rows = matrix->dist->rows.local_indicies;
589 170348 : }
590 :
591 : /*******************************************************************************
592 : * \brief Returns the local column block sizes of the given matrix.
593 : * \author Ole Schuett
594 : ******************************************************************************/
595 74274 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
596 : const int **local_cols) {
597 74274 : *nlocal_cols = matrix->dist->cols.nlocals;
598 74274 : *local_cols = matrix->dist->cols.local_indicies;
599 74274 : }
600 :
601 : /*******************************************************************************
602 : * \brief Returns the MPI rank on which the given block should be stored.
603 : * \author Ole Schuett
604 : ******************************************************************************/
605 91415565 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
606 : const int col) {
607 91415565 : return dbm_distribution_stored_coords(matrix->dist, row, col);
608 : }
609 :
610 : /*******************************************************************************
611 : * \brief Returns the distribution of the given matrix.
612 : * \author Ole Schuett
613 : ******************************************************************************/
614 897525 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
615 897525 : return matrix->dist;
616 : }
617 :
618 : // EOF
|