Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <omp.h>
11 : #include <stdbool.h>
12 : #include <stddef.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 :
16 : #include "dbm_hyperparams.h"
17 : #include "dbm_matrix.h"
18 :
19 : /*******************************************************************************
20 : * \brief Creates a new matrix.
21 : * \author Ole Schuett
22 : ******************************************************************************/
23 1707391 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 : const char name[], const int nrows, const int ncols,
25 : const int row_sizes[nrows], const int col_sizes[ncols]) {
26 1707391 : assert(omp_get_num_threads() == 1);
27 :
28 1707391 : dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29 :
30 1707391 : assert(dist->rows.length == nrows);
31 1707391 : assert(dist->cols.length == ncols);
32 1707391 : dbm_distribution_hold(dist);
33 1707391 : matrix->dist = dist;
34 :
35 1707391 : size_t size = (strlen(name) + 1) * sizeof(char);
36 1707391 : matrix->name = malloc(size);
37 1707391 : assert(matrix->name != NULL);
38 1707391 : memcpy(matrix->name, name, size);
39 :
40 1707391 : matrix->nrows = nrows;
41 1707391 : matrix->ncols = ncols;
42 :
43 1707391 : size = nrows * sizeof(int);
44 1707391 : matrix->row_sizes = malloc(size);
45 1707391 : assert(matrix->row_sizes != NULL);
46 1707391 : memcpy(matrix->row_sizes, row_sizes, size);
47 :
48 1707391 : size = ncols * sizeof(int);
49 1707391 : matrix->col_sizes = malloc(size);
50 1707391 : assert(matrix->col_sizes != NULL);
51 1707391 : memcpy(matrix->col_sizes, col_sizes, size);
52 :
53 1707391 : matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
54 1707391 : assert(matrix->shards != NULL);
55 1707391 : #pragma omp parallel for
56 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
57 : dbm_shard_init(&matrix->shards[ishard]);
58 : }
59 :
60 1707391 : assert(*matrix_out == NULL);
61 1707391 : *matrix_out = matrix;
62 1707391 : }
63 :
64 : /*******************************************************************************
65 : * \brief Releases a matrix and all its ressources.
66 : * \author Ole Schuett
67 : ******************************************************************************/
68 1707391 : void dbm_release(dbm_matrix_t *matrix) {
69 1707391 : assert(omp_get_num_threads() == 1);
70 3414782 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
71 1707391 : dbm_shard_release(&matrix->shards[ishard]);
72 : }
73 1707391 : dbm_distribution_release(matrix->dist);
74 1707391 : free(matrix->row_sizes);
75 1707391 : free(matrix->col_sizes);
76 1707391 : free(matrix->shards);
77 1707391 : free(matrix->name);
78 1707391 : free(matrix);
79 1707391 : }
80 :
81 : /*******************************************************************************
82 : * \brief Copies content of matrix_b into matrix_a.
83 : * Matrices must have the same row/col block sizes and distribution.
84 : * \author Ole Schuett
85 : ******************************************************************************/
86 428240 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
87 428240 : assert(omp_get_num_threads() == 1);
88 :
89 428240 : assert(matrix_b->nrows == matrix_a->nrows);
90 4991453 : for (int i = 0; i < matrix_b->nrows; i++) {
91 4563213 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
92 : }
93 428240 : assert(matrix_b->ncols == matrix_a->ncols);
94 33683339 : for (int i = 0; i < matrix_b->ncols; i++) {
95 33255099 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
96 : }
97 :
98 428240 : assert(matrix_a->dist == matrix_b->dist);
99 :
100 428240 : #pragma omp parallel for DBM_OMP_SCHEDULE
101 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
102 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
103 : }
104 428240 : }
105 :
106 : /*******************************************************************************
107 : * \brief Copies content of matrix_b into matrix_a.
108 : * Matrices may have different distributions.
109 : * \author Ole Schuett
110 : ******************************************************************************/
111 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
112 144 : assert(omp_get_num_threads() == 1);
113 144 : assert(matrix->nrows == redist->nrows);
114 6384 : for (int i = 0; i < matrix->nrows; i++) {
115 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
116 : }
117 144 : assert(matrix->ncols == redist->ncols);
118 6384 : for (int i = 0; i < matrix->ncols; i++) {
119 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
120 : }
121 :
122 144 : assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
123 144 : const dbm_mpi_comm_t comm = redist->dist->comm;
124 144 : const int nranks = dbm_mpi_comm_size(comm);
125 :
126 : // 1st pass: Compute send_count.
127 144 : int send_count[nranks];
128 144 : memset(send_count, 0, nranks * sizeof(int));
129 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
130 144 : dbm_shard_t *shard = &matrix->shards[ishard];
131 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
132 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
133 8792 : const int row_size = matrix->row_sizes[blk->row];
134 8792 : const int col_size = matrix->col_sizes[blk->col];
135 8792 : const int block_size = row_size * col_size;
136 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
137 8792 : assert(0 <= rank && rank < nranks);
138 8792 : send_count[rank] += 2 + block_size;
139 : }
140 : }
141 :
142 : // 1st communication: Exchange counts.
143 144 : int recv_count[nranks];
144 144 : dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
145 :
146 : // Compute displacements and allocate data buffers.
147 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
148 144 : send_displ[0] = recv_displ[0] = 0;
149 432 : for (int irank = 1; irank <= nranks; irank++) {
150 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
151 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
152 : }
153 144 : const int total_send_count = send_displ[nranks];
154 144 : const int total_recv_count = recv_displ[nranks];
155 144 : double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
156 144 : double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
157 :
158 : // 2nd pass: Fill send_data.
159 144 : int send_data_positions[nranks];
160 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
161 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
162 144 : dbm_shard_t *shard = &matrix->shards[ishard];
163 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
164 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
165 8792 : const double *blk_data = &shard->data[blk->offset];
166 8792 : const int row_size = matrix->row_sizes[blk->row];
167 8792 : const int col_size = matrix->col_sizes[blk->col];
168 8792 : const int block_size = row_size * col_size;
169 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
170 8792 : const int pos = send_data_positions[rank];
171 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
172 8792 : data_send[pos + 1] = blk->col;
173 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
174 8792 : send_data_positions[rank] += 2 + block_size;
175 : }
176 : }
177 432 : for (int irank = 0; irank < nranks; irank++) {
178 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
179 : }
180 :
181 : // 2nd communication: Exchange data.
182 144 : dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
183 : recv_count, recv_displ, comm);
184 144 : dbm_mpi_free_mem(data_send);
185 :
186 : // 3rd pass: Unpack data.
187 144 : dbm_clear(redist);
188 144 : int recv_data_pos = 0;
189 8936 : while (recv_data_pos < total_recv_count) {
190 8792 : const int row = (int)data_recv[recv_data_pos + 0];
191 8792 : const int col = (int)data_recv[recv_data_pos + 1];
192 8792 : assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
193 8792 : assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
194 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
195 8792 : const int row_size = matrix->row_sizes[row];
196 8792 : const int col_size = matrix->col_sizes[col];
197 8792 : const int block_size = row_size * col_size;
198 8792 : recv_data_pos += 2 + block_size;
199 : }
200 144 : assert(recv_data_pos == total_recv_count);
201 144 : dbm_mpi_free_mem(data_recv);
202 144 : }
203 :
204 : /*******************************************************************************
205 : * \brief Looks up a block from given matrics. This routine is thread-safe.
206 : * If the block is not found then a null pointer is returned.
207 : * \author Ole Schuett
208 : ******************************************************************************/
209 22475441 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
210 : double **block, int *row_size, int *col_size) {
211 22475441 : assert(0 <= row && row < matrix->nrows);
212 22475441 : assert(0 <= col && col < matrix->ncols);
213 22475441 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
214 22475441 : *row_size = matrix->row_sizes[row];
215 22475441 : *col_size = matrix->col_sizes[col];
216 22475441 : *block = NULL;
217 :
218 22475441 : const int ishard = dbm_get_shard_index(matrix, row, col);
219 22475441 : const dbm_shard_t *shard = &matrix->shards[ishard];
220 22475441 : dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
221 22475441 : if (blk != NULL) {
222 21224246 : *block = &shard->data[blk->offset];
223 : }
224 22475441 : }
225 :
226 : /*******************************************************************************
227 : * \brief Adds a block to given matrix. This routine is thread-safe.
228 : * If block already exist then it gets overwritten (or summed).
229 : * \author Ole Schuett
230 : ******************************************************************************/
231 35197196 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
232 : const bool summation, const double *block) {
233 35197196 : assert(0 <= row && row < matrix->nrows);
234 35197196 : assert(0 <= col && col < matrix->ncols);
235 35197196 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
236 35197196 : const int row_size = matrix->row_sizes[row];
237 35197196 : const int col_size = matrix->col_sizes[col];
238 35197196 : const int block_size = row_size * col_size;
239 :
240 35197196 : const int ishard = dbm_get_shard_index(matrix, row, col);
241 35197196 : dbm_shard_t *shard = &matrix->shards[ishard];
242 35197196 : omp_set_lock(&shard->lock);
243 35197196 : dbm_block_t *blk =
244 35197196 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
245 35197196 : double *blk_data = &shard->data[blk->offset];
246 35197196 : if (summation) {
247 1091563258 : for (int i = 0; i < block_size; i++) {
248 1088493651 : blk_data[i] += block[i];
249 : }
250 : } else {
251 32127589 : memcpy(blk_data, block, block_size * sizeof(double));
252 : }
253 35197196 : omp_unset_lock(&shard->lock);
254 35197196 : }
255 :
256 : /*******************************************************************************
257 : * \brief Remove all blocks from matrix, but does not release underlying memory.
258 : * \author Ole Schuett
259 : ******************************************************************************/
260 2057065 : void dbm_clear(dbm_matrix_t *matrix) {
261 2057065 : assert(omp_get_num_threads() == 1);
262 :
263 2057065 : #pragma omp parallel for DBM_OMP_SCHEDULE
264 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
265 : dbm_shard_t *shard = &matrix->shards[ishard];
266 : shard->nblocks = 0;
267 : shard->data_size = 0;
268 : shard->data_promised = 0;
269 : // Does not deallocate memory, hence data_allocated remains unchanged.
270 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
271 : }
272 2057065 : }
273 :
274 : /*******************************************************************************
275 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
276 : * Blocks of size zero are always kept.
277 : * \author Ole Schuett
278 : ******************************************************************************/
279 582237 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
280 582237 : assert(omp_get_num_threads() == 1);
281 :
282 582237 : if (eps == 0.0) {
283 : return;
284 : }
285 542245 : const double eps2 = eps * eps;
286 :
287 542245 : #pragma omp parallel for DBM_OMP_SCHEDULE
288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
289 : dbm_shard_t *shard = &matrix->shards[ishard];
290 : const int old_nblocks = shard->nblocks;
291 : shard->nblocks = 0;
292 : shard->data_promised = 0;
293 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
294 :
295 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
296 : const dbm_block_t old_blk = shard->blocks[iblock];
297 : const double *old_blk_data = &shard->data[old_blk.offset];
298 : const int row_size = matrix->row_sizes[old_blk.row];
299 : const int col_size = matrix->col_sizes[old_blk.col];
300 : const int block_size = row_size * col_size;
301 : double norm = 0.0;
302 : for (int i = 0; i < block_size; i++) {
303 : const double value = old_blk_data[i];
304 : norm += value * value;
305 : if (eps2 <= norm) {
306 : break;
307 : }
308 : }
309 : // For historic reasons zero-sized blocks are never filtered.
310 : if (block_size > 0 && norm < eps2) {
311 : continue; // filter the block
312 : }
313 : // Re-create block.
314 : dbm_block_t *new_blk = dbm_shard_promise_new_block(
315 : shard, old_blk.row, old_blk.col, block_size);
316 : assert(new_blk->offset <= old_blk.offset);
317 : if (new_blk->offset != old_blk.offset) {
318 : // Using memmove because it can handle overlapping buffers.
319 : double *new_blk_data = &shard->data[new_blk->offset];
320 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
321 : }
322 : }
323 : shard->data_size = shard->data_promised;
324 : // TODO: Could call realloc to release excess memory.
325 : }
326 : }
327 :
328 : /*******************************************************************************
329 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
330 : * This routine must always be called within an OpenMP parallel region.
331 : * \author Ole Schuett
332 : ******************************************************************************/
333 1390980 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
334 : const int rows[], const int cols[]) {
335 1390980 : assert(omp_get_num_threads() == omp_get_max_threads() &&
336 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
337 1390980 : const int my_rank = matrix->dist->my_rank;
338 29296576 : for (int i = 0; i < nblocks; i++) {
339 27905596 : const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
340 27905596 : dbm_shard_t *shard = &matrix->shards[ishard];
341 27905596 : omp_set_lock(&shard->lock);
342 27905596 : assert(0 <= rows[i] && rows[i] < matrix->nrows);
343 27905596 : assert(0 <= cols[i] && cols[i] < matrix->ncols);
344 27905596 : assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
345 27905596 : const int row_size = matrix->row_sizes[rows[i]];
346 27905596 : const int col_size = matrix->col_sizes[cols[i]];
347 27905596 : const int block_size = row_size * col_size;
348 27905596 : dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
349 27905596 : omp_unset_lock(&shard->lock);
350 : }
351 1390980 : #pragma omp barrier
352 :
353 : #pragma omp for DBM_OMP_SCHEDULE
354 1390980 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
355 1390980 : dbm_shard_t *shard = &matrix->shards[ishard];
356 1390980 : dbm_shard_allocate_promised_blocks(shard);
357 : }
358 1390980 : }
359 :
360 : /*******************************************************************************
361 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
362 : * \author Ole Schuett
363 : ******************************************************************************/
364 474379 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
365 474379 : assert(omp_get_num_threads() == 1);
366 474379 : if (alpha == 1.0) {
367 : return;
368 : }
369 341889 : if (alpha == 0.0) {
370 333445 : dbm_zero(matrix);
371 333445 : return;
372 : }
373 :
374 8444 : #pragma omp parallel for DBM_OMP_SCHEDULE
375 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
376 : dbm_shard_t *shard = &matrix->shards[ishard];
377 : for (int i = 0; i < shard->data_size; i++) {
378 : shard->data[i] *= alpha;
379 : }
380 : }
381 : }
382 :
383 : /*******************************************************************************
384 : * \brief Sets all blocks in the given matrix to zero.
385 : * \author Ole Schuett
386 : ******************************************************************************/
387 333445 : void dbm_zero(dbm_matrix_t *matrix) {
388 333445 : assert(omp_get_num_threads() == 1);
389 :
390 333445 : #pragma omp parallel for DBM_OMP_SCHEDULE
391 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
392 : dbm_shard_t *shard = &matrix->shards[ishard];
393 : memset(shard->data, 0, shard->data_size * sizeof(double));
394 : }
395 333445 : }
396 :
397 : /*******************************************************************************
398 : * \brief Adds matrix_b to matrix_a.
399 : * \author Ole Schuett
400 : ******************************************************************************/
401 211765 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
402 211765 : assert(omp_get_num_threads() == 1);
403 211765 : assert(matrix_a->dist == matrix_b->dist);
404 :
405 211765 : #pragma omp parallel for DBM_OMP_SCHEDULE
406 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
407 : dbm_shard_t *shard_a = &matrix_a->shards[ishard];
408 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
409 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
410 : const dbm_block_t blk_b = shard_b->blocks[iblock];
411 :
412 : const int row_size = matrix_b->row_sizes[blk_b.row];
413 : const int col_size = matrix_b->col_sizes[blk_b.col];
414 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
415 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
416 : const int block_size = row_size * col_size;
417 : dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
418 : shard_a, blk_b.row, blk_b.col, block_size);
419 : double *data_a = &shard_a->data[blk_a->offset];
420 : const double *data_b = &shard_b->data[blk_b.offset];
421 : for (int i = 0; i < block_size; i++) {
422 : data_a[i] += data_b[i];
423 : }
424 : }
425 : }
426 211765 : }
427 :
428 : /*******************************************************************************
429 : * \brief Creates an iterator for the blocks of the given matrix.
430 : * The iteration order is not stable.
431 : * This routine must always be called within an OpenMP parallel region.
432 : * \author Ole Schuett
433 : ******************************************************************************/
434 3226000 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
435 3226000 : assert(omp_get_num_threads() == omp_get_max_threads() &&
436 : "Please call dbm_iterator_start within an OpenMP parallel region.");
437 3226000 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
438 3226000 : assert(iter != NULL);
439 3226000 : iter->matrix = matrix;
440 3226000 : iter->next_block = 0;
441 3226000 : iter->next_shard = omp_get_thread_num();
442 3798813 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
443 3226000 : matrix->shards[iter->next_shard].nblocks == 0) {
444 572813 : iter->next_shard += omp_get_num_threads();
445 : }
446 3226000 : assert(*iter_out == NULL);
447 3226000 : *iter_out = iter;
448 3226000 : }
449 :
450 : /*******************************************************************************
451 : * \brief Returns number of blocks the iterator will provide to calling thread.
452 : * \author Ole Schuett
453 : ******************************************************************************/
454 602915 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
455 602915 : int num_blocks = 0;
456 602915 : for (int ishard = omp_get_thread_num();
457 1205830 : ishard < dbm_get_num_shards(iter->matrix);
458 602915 : ishard += omp_get_num_threads()) {
459 602915 : num_blocks += iter->matrix->shards[ishard].nblocks;
460 : }
461 602915 : return num_blocks;
462 : }
463 :
464 : /*******************************************************************************
465 : * \brief Tests whether the given iterator has any block left.
466 : * \author Ole Schuett
467 : ******************************************************************************/
468 57739890 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
469 57739890 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
470 : }
471 :
472 : /*******************************************************************************
473 : * \brief Returns the next block from the given iterator.
474 : * \author Ole Schuett
475 : ******************************************************************************/
476 66124770 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
477 : double **block, int *row_size, int *col_size) {
478 66124770 : const dbm_matrix_t *matrix = iter->matrix;
479 66124770 : assert(iter->next_shard < dbm_get_num_shards(matrix));
480 66124770 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
481 66124770 : assert(iter->next_block < shard->nblocks);
482 66124770 : dbm_block_t *blk = &shard->blocks[iter->next_block];
483 :
484 66124770 : *row = blk->row;
485 66124770 : *col = blk->col;
486 66124770 : *row_size = matrix->row_sizes[blk->row];
487 66124770 : *col_size = matrix->col_sizes[blk->col];
488 66124770 : *block = &shard->data[blk->offset];
489 :
490 66124770 : iter->next_block++;
491 66124770 : if (iter->next_block >= shard->nblocks) {
492 : // Advance to the next non-empty shard...
493 2653187 : iter->next_shard += omp_get_num_threads();
494 2653187 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
495 0 : matrix->shards[iter->next_shard].nblocks == 0) {
496 0 : iter->next_shard += omp_get_num_threads();
497 : }
498 2653187 : iter->next_block = 0; // ...and continue with its first block.
499 : }
500 66124770 : }
501 :
502 : /*******************************************************************************
503 : * \brief Releases the given iterator.
504 : * \author Ole Schuett
505 : ******************************************************************************/
506 3226000 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
507 :
508 : /*******************************************************************************
509 : * \brief Private routine to accumulate using Kahan's summation.
510 : * \author Hans Pabst
511 : ******************************************************************************/
512 14383468 : static double kahan_sum(double value, double *accumulator,
513 : double *compensation) {
514 14383468 : double r, c;
515 14383468 : assert(NULL != accumulator && NULL != compensation);
516 14383468 : c = value - *compensation;
517 14383468 : r = *accumulator + c;
518 14383468 : *compensation = (r - *accumulator) - c;
519 14383468 : *accumulator = r;
520 14383468 : return r;
521 : }
522 :
523 : /*******************************************************************************
524 : * \brief Computes a checksum of the given matrix.
525 : * \author Ole Schuett
526 : ******************************************************************************/
527 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
528 190 : double checksum = 0.0, compensation = 0.0;
529 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
530 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
531 14383658 : for (int i = 0; i < shard->data_size; i++) {
532 14383468 : const double value = shard->data[i];
533 14383468 : kahan_sum(value * value, &checksum, &compensation);
534 : }
535 : }
536 190 : dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
537 190 : return checksum;
538 : }
539 :
540 : /*******************************************************************************
541 : * \brief Returns the largest absolute value of all matrix elements.
542 : * \author Ole Schuett
543 : ******************************************************************************/
544 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
545 48 : double maxabs = 0.0;
546 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
547 48 : const dbm_shard_t *shard = &matrix->shards[ishard];
548 1599050 : for (int i = 0; i < shard->data_size; i++) {
549 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
550 : }
551 : }
552 48 : dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
553 48 : return maxabs;
554 : }
555 :
556 : /*******************************************************************************
557 : * \brief Returns the name of the matrix of the given matrix.
558 : * \author Ole Schuett
559 : ******************************************************************************/
560 1778307 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
561 :
562 : /*******************************************************************************
563 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
564 : * \author Ole Schuett
565 : ******************************************************************************/
566 1776141 : int dbm_get_nze(const dbm_matrix_t *matrix) {
567 1776141 : int nze = 0;
568 3552282 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
569 1776141 : nze += matrix->shards[ishard].data_size;
570 : }
571 1776141 : return nze;
572 : }
573 :
574 : /*******************************************************************************
575 : * \brief Returns the number of local blocks of the given matrix.
576 : * \author Ole Schuett
577 : ******************************************************************************/
578 972164 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
579 972164 : int nblocks = 0;
580 1944328 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
581 972164 : nblocks += matrix->shards[ishard].nblocks;
582 : }
583 972164 : return nblocks;
584 : }
585 :
586 : /*******************************************************************************
587 : * \brief Returns the row block sizes of the given matrix.
588 : * \author Ole Schuett
589 : ******************************************************************************/
590 4527363 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
591 : const int **row_sizes) {
592 4527363 : *nrows = matrix->nrows;
593 4527363 : *row_sizes = matrix->row_sizes;
594 4527363 : }
595 :
596 : /*******************************************************************************
597 : * \brief Returns the column block sizes of the given matrix.
598 : * \author Ole Schuett
599 : ******************************************************************************/
600 3298597 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
601 : const int **col_sizes) {
602 3298597 : *ncols = matrix->ncols;
603 3298597 : *col_sizes = matrix->col_sizes;
604 3298597 : }
605 :
606 : /*******************************************************************************
607 : * \brief Returns the local row block sizes of the given matrix.
608 : * \author Ole Schuett
609 : ******************************************************************************/
610 287280 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
611 : const int **local_rows) {
612 287280 : *nlocal_rows = matrix->dist->rows.nlocals;
613 287280 : *local_rows = matrix->dist->rows.local_indicies;
614 287280 : }
615 :
616 : /*******************************************************************************
617 : * \brief Returns the local column block sizes of the given matrix.
618 : * \author Ole Schuett
619 : ******************************************************************************/
620 105118 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
621 : const int **local_cols) {
622 105118 : *nlocal_cols = matrix->dist->cols.nlocals;
623 105118 : *local_cols = matrix->dist->cols.local_indicies;
624 105118 : }
625 :
626 : /*******************************************************************************
627 : * \brief Returns the MPI rank on which the given block should be stored.
628 : * \author Ole Schuett
629 : ******************************************************************************/
630 92074402 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
631 : const int col) {
632 92074402 : return dbm_distribution_stored_coords(matrix->dist, row, col);
633 : }
634 :
635 : /*******************************************************************************
636 : * \brief Returns the distribution of the given matrix.
637 : * \author Ole Schuett
638 : ******************************************************************************/
639 1229842 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
640 1229842 : return matrix->dist;
641 : }
642 :
643 : // EOF
|