LCOV - code coverage report
Current view: top level - src/dbt/tas - dbt_tas_reshape_ops.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 355 359 98.9 %
Date: 2024-11-21 06:45:46 Functions: 10 12 83.3 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief communication routines to reshape / replicate / merge tall-and-skinny matrices.
      10             : !> \author Patrick Seewald
      11             : ! **************************************************************************************************
      12             : MODULE dbt_tas_reshape_ops
      13             :    USE OMP_LIB,                         ONLY: omp_get_num_threads,&
      14             :                                               omp_get_thread_num,&
      15             :                                               omp_init_lock,&
      16             :                                               omp_lock_kind,&
      17             :                                               omp_set_lock,&
      18             :                                               omp_unset_lock
      19             :    USE dbm_api,                         ONLY: &
      20             :         dbm_clear, dbm_distribution_col_dist, dbm_distribution_obj, dbm_distribution_row_dist, &
      21             :         dbm_finalize, dbm_get_col_block_sizes, dbm_get_distribution, dbm_get_name, &
      22             :         dbm_get_row_block_sizes, dbm_get_stored_coordinates, dbm_iterator, &
      23             :         dbm_iterator_blocks_left, dbm_iterator_next_block, dbm_iterator_start, dbm_iterator_stop, &
      24             :         dbm_put_block, dbm_reserve_blocks, dbm_type
      25             :    USE dbt_tas_base,                    ONLY: &
      26             :         dbt_repl_get_stored_coordinates, dbt_tas_blk_sizes, dbt_tas_clear, dbt_tas_create, &
      27             :         dbt_tas_distribution_new, dbt_tas_finalize, dbt_tas_get_stored_coordinates, dbt_tas_info, &
      28             :         dbt_tas_iterator_blocks_left, dbt_tas_iterator_next_block, dbt_tas_iterator_start, &
      29             :         dbt_tas_iterator_stop, dbt_tas_put_block, dbt_tas_reserve_blocks
      30             :    USE dbt_tas_global,                  ONLY: dbt_tas_blk_size_arb,&
      31             :                                               dbt_tas_blk_size_repl,&
      32             :                                               dbt_tas_dist_arb,&
      33             :                                               dbt_tas_dist_repl,&
      34             :                                               dbt_tas_distribution,&
      35             :                                               dbt_tas_rowcol_data
      36             :    USE dbt_tas_split,                   ONLY: colsplit,&
      37             :                                               dbt_tas_get_split_info,&
      38             :                                               rowsplit
      39             :    USE dbt_tas_types,                   ONLY: dbt_tas_distribution_type,&
      40             :                                               dbt_tas_iterator,&
      41             :                                               dbt_tas_split_info,&
      42             :                                               dbt_tas_type
      43             :    USE dbt_tas_util,                    ONLY: swap
      44             :    USE kinds,                           ONLY: dp,&
      45             :                                               int_8
      46             :    USE message_passing,                 ONLY: mp_cart_type,&
      47             :                                               mp_comm_type,&
      48             :                                               mp_request_type,&
      49             :                                               mp_waitall
      50             : #include "../../base/base_uses.f90"
      51             : 
      52             :    IMPLICIT NONE
      53             :    PRIVATE
      54             : 
      55             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_tas_reshape_ops'
      56             : 
      57             :    PUBLIC :: &
      58             :       dbt_tas_merge, &
      59             :       dbt_tas_replicate, &
      60             :       dbt_tas_reshape
      61             : 
      62             :    TYPE dbt_buffer_type
      63             :       INTEGER :: nblock = -1
      64             :       INTEGER(KIND=int_8), DIMENSION(:, :), ALLOCATABLE :: indx
      65             :       REAL(dp), DIMENSION(:), ALLOCATABLE :: msg
      66             :       INTEGER :: endpos = -1
      67             :    END TYPE
      68             : 
      69             : CONTAINS
      70             : 
      71             : ! **************************************************************************************************
      72             : !> \brief copy data (involves reshape)
      73             : !> \param matrix_in ...
      74             : !> \param matrix_out ...
      75             : !> \param summation whether matrix_out = matrix_out + matrix_in
      76             : !> \param transposed ...
      77             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
      78             : !> \author Patrick Seewald
      79             : ! **************************************************************************************************
      80      192410 :    RECURSIVE SUBROUTINE dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
      81             :       TYPE(dbt_tas_type), INTENT(INOUT)                  :: matrix_in, matrix_out
      82             :       LOGICAL, INTENT(IN), OPTIONAL                      :: summation, transposed, move_data
      83             : 
      84             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'dbt_tas_reshape'
      85             : 
      86             :       INTEGER                                            :: a, b, bcount, handle, handle2, iproc, &
      87             :                                                             nblk, nblk_per_thread, ndata, numnodes
      88      192410 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :)  :: blks_to_allocate, index_recv
      89             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: blk_index
      90             :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
      91      192410 :          DIMENSION(:)                                    :: locks
      92      192410 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
      93      192410 :                                                             num_entries_recv, num_entries_send, &
      94      192410 :                                                             num_rec, num_send
      95             :       INTEGER, DIMENSION(2)                              :: blk_size
      96             :       LOGICAL                                            :: move_prv, tr_in
      97      192410 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block
      98      192410 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:)   :: buffer_recv, buffer_send
      99             :       TYPE(dbt_tas_iterator)                             :: iter
     100      962050 :       TYPE(dbt_tas_split_info)                           :: info
     101             :       TYPE(mp_comm_type)                                 :: mp_comm
     102             :       TYPE(mp_request_type), ALLOCATABLE, &
     103      192410 :          DIMENSION(:, :)                                 :: req_array
     104             : 
     105      192410 :       CALL timeset(routineN, handle)
     106             : 
     107      192410 :       IF (PRESENT(summation)) THEN
     108       65827 :          IF (.NOT. summation) CALL dbm_clear(matrix_out%matrix)
     109             :       ELSE
     110      126583 :          CALL dbm_clear(matrix_out%matrix)
     111             :       END IF
     112             : 
     113      192410 :       IF (PRESENT(move_data)) THEN
     114      192410 :          move_prv = move_data
     115             :       ELSE
     116             :          move_prv = .FALSE.
     117             :       END IF
     118             : 
     119      192410 :       IF (PRESENT(transposed)) THEN
     120      192410 :          tr_in = transposed
     121             :       ELSE
     122           0 :          tr_in = .FALSE.
     123             :       END IF
     124             : 
     125      192410 :       IF (.NOT. matrix_out%valid) THEN
     126           0 :          CPABORT("can not reshape into invalid matrix")
     127             :       END IF
     128             : 
     129      192410 :       info = dbt_tas_info(matrix_in)
     130      192410 :       mp_comm = info%mp_comm
     131      192410 :       numnodes = mp_comm%num_pe
     132      898136 :       ALLOCATE (buffer_send(0:numnodes - 1))
     133      705726 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     134      577230 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     135      384820 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     136      384820 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     137      384820 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     138      577230 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     139      384820 :       ALLOCATE (num_send(0:2*numnodes - 1))
     140      834222 :       num_send(:) = 0
     141     2630494 :       ALLOCATE (req_array(1:numnodes, 4))
     142      384820 :       ALLOCATE (locks(0:numnodes - 1))
     143      513316 :       DO iproc = 0, numnodes - 1
     144      513316 :          CALL omp_init_lock(locks(iproc))
     145             :       END DO
     146             : 
     147      192410 :       CALL timeset(routineN//"_get_coord", handle2)
     148             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,num_send) &
     149      192410 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
     150             :       CALL dbt_tas_iterator_start(iter, matrix_in)
     151             :       DO WHILE (dbt_tas_iterator_blocks_left(iter))
     152             :          CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), &
     153             :                                           row_size=blk_size(1), col_size=blk_size(2))
     154             :          IF (tr_in) THEN
     155             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
     156             :          ELSE
     157             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     158             :          END IF
     159             : !$OMP ATOMIC
     160             :          num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
     161             : !$OMP ATOMIC
     162             :          num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
     163             :       END DO
     164             :       CALL dbt_tas_iterator_stop(iter)
     165             : !$OMP END PARALLEL
     166      192410 :       CALL timestop(handle2)
     167             : 
     168      192410 :       CALL timeset(routineN//"_alltoall", handle2)
     169      192410 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     170      192410 :       CALL timestop(handle2)
     171             : 
     172      192410 :       CALL timeset(routineN//"_buffer_fill", handle2)
     173      513316 :       DO iproc = 0, numnodes - 1
     174      320906 :          num_entries_recv(iproc) = num_rec(2*iproc)
     175      320906 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     176      320906 :          num_entries_send(iproc) = num_send(2*iproc)
     177      320906 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     178             : 
     179      320906 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     180             : 
     181      513316 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     182             : 
     183             :       END DO
     184             : 
     185             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,buffer_send,locks) &
     186      192410 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
     187             :       CALL dbt_tas_iterator_start(iter, matrix_in)
     188             :       DO WHILE (dbt_tas_iterator_blocks_left(iter))
     189             :          CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     190             :                                           row_size=blk_size(1), col_size=blk_size(2))
     191             :          IF (tr_in) THEN
     192             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
     193             :          ELSE
     194             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     195             :          END IF
     196             :          CALL omp_set_lock(locks(iproc))
     197             :          CALL dbt_buffer_add_block(buffer_send(iproc), blk_index, block, transposed=tr_in)
     198             :          CALL omp_unset_lock(locks(iproc))
     199             :       END DO
     200             :       CALL dbt_tas_iterator_stop(iter)
     201             : !$OMP END PARALLEL
     202             : 
     203      192410 :       IF (move_prv) CALL dbt_tas_clear(matrix_in)
     204             : 
     205      192410 :       CALL timestop(handle2)
     206             : 
     207      192410 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     208      192410 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     209             : 
     210      513316 :       DO iproc = 0, numnodes - 1
     211      513316 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     212             :       END DO
     213             : 
     214      192410 :       CALL timestop(handle2)
     215             : 
     216      192410 :       CALL timeset(routineN//"_buffer_obtain", handle2)
     217             : 
     218             :       ! TODO Add OpenMP to the buffer unpacking.
     219      513316 :       nblk = SUM(num_blocks_recv)
     220      557449 :       ALLOCATE (blks_to_allocate(nblk, 2))
     221             : 
     222      192410 :       bcount = 0
     223      513316 :       DO iproc = 0, numnodes - 1
     224      320906 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     225     5271702 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = index_recv(:, :)
     226      320906 :          bcount = bcount + SIZE(index_recv, 1)
     227      834222 :          DEALLOCATE (index_recv)
     228             :       END DO
     229             : 
     230             : !TODO: Parallelize creation of block list.
     231      192410 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     232             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     233             :       a = omp_get_thread_num()*nblk_per_thread + 1
     234             :       b = MIN(a + nblk_per_thread, nblk)
     235             :       CALL dbt_tas_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     236             : !$OMP END PARALLEL
     237      192410 :       DEALLOCATE (blks_to_allocate)
     238             : 
     239      513316 :       DO iproc = 0, numnodes - 1
     240             :          ! First, we need to get the index to create block
     241     2475398 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     242     2154492 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index)
     243     2154492 :             CALL dbt_tas_blk_sizes(matrix_out, blk_index(1), blk_index(2), blk_size(1), blk_size(2))
     244     8617968 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     245     2154492 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index, block)
     246     2154492 :             CALL dbt_tas_put_block(matrix_out, blk_index(1), blk_index(2), block, summation=summation)
     247     6784382 :             DEALLOCATE (block)
     248             :          END DO
     249      513316 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     250             :       END DO
     251             : 
     252      192410 :       CALL timestop(handle2)
     253             : 
     254      192410 :       CALL dbt_tas_finalize(matrix_out)
     255             : 
     256      192410 :       CALL timestop(handle)
     257     2181092 :    END SUBROUTINE
     258             : 
     259             : ! **************************************************************************************************
     260             : !> \brief Replicate matrix_in such that each submatrix of matrix_out is an exact copy of matrix_in
     261             : !> \param matrix_in ...
     262             : !> \param info ...
     263             : !> \param matrix_out ...
     264             : !> \param nodata Don't copy data but create matrix_out
     265             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
     266             : !> \author Patrick Seewald
     267             : ! **************************************************************************************************
     268     1080264 :    SUBROUTINE dbt_tas_replicate(matrix_in, info, matrix_out, nodata, move_data)
     269             :       TYPE(dbm_type), INTENT(INOUT)                      :: matrix_in
     270             :       TYPE(dbt_tas_split_info), INTENT(IN)               :: info
     271             :       TYPE(dbt_tas_type), INTENT(OUT)                    :: matrix_out
     272             :       LOGICAL, INTENT(IN), OPTIONAL                      :: nodata, move_data
     273             : 
     274             :       INTEGER                                            :: a, b, nblk_per_thread, nblkcols, nblkrows
     275             :       INTEGER, DIMENSION(2)                              :: pdims
     276      360088 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     277      180044 :                                                             row_dist
     278             :       TYPE(dbm_distribution_obj)                         :: dbm_dist
     279      180044 :       TYPE(dbt_tas_dist_arb), TARGET                     :: dir_dist
     280      180044 :       TYPE(dbt_tas_dist_repl), TARGET                    :: repl_dist
     281             : 
     282      360088 :       CLASS(dbt_tas_distribution), ALLOCATABLE :: col_dist_obj, row_dist_obj
     283      360088 :       CLASS(dbt_tas_rowcol_data), ALLOCATABLE :: row_bsize_obj, col_bsize_obj
     284      180044 :       TYPE(dbt_tas_blk_size_repl), TARGET :: repl_blksize
     285      180044 :       TYPE(dbt_tas_blk_size_arb), TARGET :: dir_blksize
     286      900220 :       TYPE(dbt_tas_distribution_type) :: dist
     287             :       INTEGER :: numnodes, ngroup
     288      180044 :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, DIMENSION(:) :: locks
     289      180044 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
     290      180044 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
     291      180044 :                                                             num_entries_recv, num_entries_send, &
     292      180044 :                                                             num_rec, num_send
     293      180044 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
     294      180044 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
     295             :       INTEGER, DIMENSION(2) :: blk_size
     296             :       INTEGER, DIMENSION(2) :: blk_index
     297             :       INTEGER(KIND=int_8), DIMENSION(2) :: blk_index_i8
     298             :       TYPE(dbm_iterator) :: iter
     299             :       INTEGER :: i, iproc, bcount, nblk
     300      180044 :       INTEGER, DIMENSION(:), ALLOCATABLE :: iprocs
     301             :       LOGICAL :: nodata_prv, move_prv
     302      180044 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :) :: index_recv
     303             :       INTEGER :: ndata
     304      180044 :       TYPE(mp_cart_type) :: mp_comm
     305             : 
     306      180044 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: block
     307             : 
     308             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_replicate'
     309             : 
     310             :       INTEGER :: handle, handle2
     311             : 
     312      180044 :       NULLIFY (col_blk_size, row_blk_size)
     313             : 
     314      180044 :       CALL timeset(routineN, handle)
     315             : 
     316      180044 :       IF (PRESENT(nodata)) THEN
     317       54829 :          nodata_prv = nodata
     318             :       ELSE
     319             :          nodata_prv = .FALSE.
     320             :       END IF
     321             : 
     322      180044 :       IF (PRESENT(move_data)) THEN
     323      125215 :          move_prv = move_data
     324             :       ELSE
     325             :          move_prv = .FALSE.
     326             :       END IF
     327             : 
     328      180044 :       row_blk_size => dbm_get_row_block_sizes(matrix_in)
     329      180044 :       col_blk_size => dbm_get_col_block_sizes(matrix_in)
     330      180044 :       nblkrows = SIZE(row_blk_size)
     331      180044 :       nblkcols = SIZE(col_blk_size)
     332      180044 :       dbm_dist = dbm_get_distribution(matrix_in)
     333      180044 :       row_dist => dbm_distribution_row_dist(dbm_dist)
     334      180044 :       col_dist => dbm_distribution_col_dist(dbm_dist)
     335             : 
     336      180044 :       mp_comm = info%mp_comm
     337      180044 :       ngroup = info%ngroup
     338             : 
     339      180044 :       numnodes = mp_comm%num_pe
     340      540132 :       pdims = mp_comm%num_pe_cart
     341             : 
     342      306686 :       SELECT CASE (info%split_rowcol)
     343             :       CASE (rowsplit)
     344      126642 :          repl_dist = dbt_tas_dist_repl(row_dist, pdims(1), nblkrows, info%ngroup, info%pgrid_split_size)
     345      126642 :          dir_dist = dbt_tas_dist_arb(col_dist, pdims(2), INT(nblkcols, KIND=int_8))
     346      126642 :          repl_blksize = dbt_tas_blk_size_repl(row_blk_size, info%ngroup)
     347      126642 :          dir_blksize = dbt_tas_blk_size_arb(col_blk_size)
     348      126642 :          ALLOCATE (row_dist_obj, source=repl_dist)
     349      126642 :          ALLOCATE (col_dist_obj, source=dir_dist)
     350      126642 :          ALLOCATE (row_bsize_obj, source=repl_blksize)
     351      253284 :          ALLOCATE (col_bsize_obj, source=dir_blksize)
     352             :       CASE (colsplit)
     353       53402 :          dir_dist = dbt_tas_dist_arb(row_dist, pdims(1), INT(nblkrows, KIND=int_8))
     354       53402 :          repl_dist = dbt_tas_dist_repl(col_dist, pdims(2), nblkcols, info%ngroup, info%pgrid_split_size)
     355       53402 :          dir_blksize = dbt_tas_blk_size_arb(row_blk_size)
     356       53402 :          repl_blksize = dbt_tas_blk_size_repl(col_blk_size, info%ngroup)
     357       53402 :          ALLOCATE (row_dist_obj, source=dir_dist)
     358       53402 :          ALLOCATE (col_dist_obj, source=repl_dist)
     359       53402 :          ALLOCATE (row_bsize_obj, source=dir_blksize)
     360      846818 :          ALLOCATE (col_bsize_obj, source=repl_blksize)
     361             :       END SELECT
     362             : 
     363      180044 :       CALL dbt_tas_distribution_new(dist, mp_comm, row_dist_obj, col_dist_obj, split_info=info)
     364             :       CALL dbt_tas_create(matrix_out, TRIM(dbm_get_name(matrix_in))//" replicated", &
     365      180044 :                           dist, row_bsize_obj, col_bsize_obj, own_dist=.TRUE.)
     366             : 
     367      180044 :       IF (nodata_prv) THEN
     368       54829 :          CALL dbt_tas_finalize(matrix_out)
     369       54829 :          CALL timestop(handle)
     370       54829 :          RETURN
     371             :       END IF
     372             : 
     373      583446 :       ALLOCATE (buffer_send(0:numnodes - 1))
     374      458231 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     375      375645 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     376      250430 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     377      250430 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     378      250430 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     379      375645 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     380      250430 :       ALLOCATE (num_send(0:2*numnodes - 1))
     381      540817 :       num_send(:) = 0
     382     1707709 :       ALLOCATE (req_array(1:numnodes, 4))
     383      250430 :       ALLOCATE (locks(0:numnodes - 1))
     384      333016 :       DO iproc = 0, numnodes - 1
     385      333016 :          CALL omp_init_lock(locks(iproc))
     386             :       END DO
     387             : 
     388             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send,ngroup) &
     389      125215 : !$OMP PRIVATE(iter,blk_index,blk_size,iprocs)
     390             :       ALLOCATE (iprocs(ngroup))
     391             :       CALL dbm_iterator_start(iter, matrix_in)
     392             :       DO WHILE (dbm_iterator_blocks_left(iter))
     393             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
     394             :                                       row_size=blk_size(1), col_size=blk_size(2))
     395             :          CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iprocs)
     396             :          DO i = 1, SIZE(iprocs)
     397             : !$OMP ATOMIC
     398             :             num_send(2*iprocs(i)) = num_send(2*iprocs(i)) + PRODUCT(blk_size)
     399             : !$OMP ATOMIC
     400             :             num_send(2*iprocs(i) + 1) = num_send(2*iprocs(i) + 1) + 1
     401             :          END DO
     402             :       END DO
     403             :       CALL dbm_iterator_stop(iter)
     404             :       DEALLOCATE (iprocs)
     405             : !$OMP END PARALLEL
     406             : 
     407      125215 :       CALL timeset(routineN//"_alltoall", handle2)
     408      125215 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     409      125215 :       CALL timestop(handle2)
     410             : 
     411      333016 :       DO iproc = 0, numnodes - 1
     412      207801 :          num_entries_recv(iproc) = num_rec(2*iproc)
     413      207801 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     414      207801 :          num_entries_send(iproc) = num_send(2*iproc)
     415      207801 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     416             : 
     417      207801 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     418             : 
     419      333016 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     420             : 
     421             :       END DO
     422             : 
     423             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks,ngroup) &
     424      125215 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iprocs)
     425             :       ALLOCATE (iprocs(ngroup))
     426             :       CALL dbm_iterator_start(iter, matrix_in)
     427             :       DO WHILE (dbm_iterator_blocks_left(iter))
     428             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     429             :                                       row_size=blk_size(1), col_size=blk_size(2))
     430             :          CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iprocs)
     431             :          DO i = 1, SIZE(iprocs)
     432             :             CALL omp_set_lock(locks(iprocs(i)))
     433             :             CALL dbt_buffer_add_block(buffer_send(iprocs(i)), INT(blk_index, KIND=int_8), block)
     434             :             CALL omp_unset_lock(locks(iprocs(i)))
     435             :          END DO
     436             :       END DO
     437             :       CALL dbm_iterator_stop(iter)
     438             :       DEALLOCATE (iprocs)
     439             : !$OMP END PARALLEL
     440             : 
     441      125215 :       IF (move_prv) CALL dbm_clear(matrix_in)
     442             : 
     443      125215 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     444      125215 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     445             : 
     446      333016 :       DO iproc = 0, numnodes - 1
     447      333016 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     448             :       END DO
     449             : 
     450      125215 :       CALL timestop(handle2)
     451             : 
     452             :       ! TODO Add OpenMP to the buffer unpacking.
     453      333016 :       nblk = SUM(num_blocks_recv)
     454      375179 :       ALLOCATE (blks_to_allocate(nblk, 2))
     455             : 
     456      125215 :       bcount = 0
     457      333016 :       DO iproc = 0, numnodes - 1
     458      207801 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     459     4613953 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
     460      207801 :          bcount = bcount + SIZE(index_recv, 1)
     461      540817 :          DEALLOCATE (index_recv)
     462             :       END DO
     463             : 
     464             : !TODO: Parallelize creation of block list.
     465      125215 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     466             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     467             :       a = omp_get_thread_num()*nblk_per_thread + 1
     468             :       b = MIN(a + nblk_per_thread, nblk)
     469             :       CALL dbm_reserve_blocks(matrix_out%matrix, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     470             : !$OMP END PARALLEL
     471      125215 :       DEALLOCATE (blks_to_allocate)
     472             : 
     473      333016 :       DO iproc = 0, numnodes - 1
     474             :          ! First, we need to get the index to create block
     475     2203076 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     476     1995275 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
     477     1995275 :             CALL dbt_tas_blk_sizes(matrix_out, blk_index_i8(1), blk_index_i8(2), blk_size(1), blk_size(2))
     478     7981100 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     479     1995275 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
     480   306411593 :             CALL dbm_put_block(matrix_out%matrix, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block)
     481     6193626 :             DEALLOCATE (block)
     482             :          END DO
     483             : 
     484      333016 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     485             :       END DO
     486             : 
     487      125215 :       CALL dbt_tas_finalize(matrix_out)
     488             : 
     489      125215 :       CALL timestop(handle)
     490             : 
     491     2106384 :    END SUBROUTINE
     492             : 
     493             : ! **************************************************************************************************
     494             : !> \brief Merge submatrices of matrix_in to matrix_out by sum
     495             : !> \param matrix_out ...
     496             : !> \param matrix_in ...
     497             : !> \param summation ...
     498             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
     499             : !> \author Patrick Seewald
     500             : ! **************************************************************************************************
     501       54829 :    SUBROUTINE dbt_tas_merge(matrix_out, matrix_in, summation, move_data)
     502             :       TYPE(dbm_type), INTENT(INOUT)                      :: matrix_out
     503             :       TYPE(dbt_tas_type), INTENT(INOUT)                  :: matrix_in
     504             :       LOGICAL, INTENT(IN), OPTIONAL                      :: summation, move_data
     505             : 
     506             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'dbt_tas_merge'
     507             : 
     508             :       INTEGER                                            :: a, b, bcount, handle, handle2, iproc, &
     509             :                                                             nblk, nblk_per_thread, ndata, numnodes
     510       54829 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :)  :: index_recv
     511             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: blk_index_i8
     512             :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
     513       54829 :          DIMENSION(:)                                    :: locks
     514       54829 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
     515       54829 :                                                             num_entries_recv, num_entries_send, &
     516       54829 :                                                             num_rec, num_send
     517       54829 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blks_to_allocate
     518             :       INTEGER, DIMENSION(2)                              :: blk_index, blk_size
     519      109658 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_sizes, row_block_sizes
     520             :       LOGICAL                                            :: move_prv
     521       54829 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block
     522             :       TYPE(dbm_iterator)                                 :: iter
     523       54829 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:)   :: buffer_recv, buffer_send
     524      274145 :       TYPE(dbt_tas_split_info)                           :: info
     525       54829 :       TYPE(mp_cart_type)                                 :: mp_comm
     526             :       TYPE(mp_request_type), ALLOCATABLE, &
     527       54829 :          DIMENSION(:, :)                                 :: req_array
     528             : 
     529             : !!
     530             : 
     531       54829 :       CALL timeset(routineN, handle)
     532             : 
     533       54829 :       IF (PRESENT(summation)) THEN
     534           0 :          IF (.NOT. summation) CALL dbm_clear(matrix_out)
     535             :       ELSE
     536       54829 :          CALL dbm_clear(matrix_out)
     537             :       END IF
     538             : 
     539       54829 :       IF (PRESENT(move_data)) THEN
     540       54829 :          move_prv = move_data
     541             :       ELSE
     542             :          move_prv = .FALSE.
     543             :       END IF
     544             : 
     545       54829 :       info = dbt_tas_info(matrix_in)
     546       54829 :       CALL dbt_tas_get_split_info(info, mp_comm=mp_comm)
     547       54829 :       numnodes = mp_comm%num_pe
     548             : 
     549      262950 :       ALLOCATE (buffer_send(0:numnodes - 1))
     550      208121 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     551      164487 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     552      109658 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     553      109658 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     554      109658 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     555      164487 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     556      109658 :       ALLOCATE (num_send(0:2*numnodes - 1))
     557      251755 :       num_send(:) = 0
     558      777655 :       ALLOCATE (req_array(1:numnodes, 4))
     559      109658 :       ALLOCATE (locks(0:numnodes - 1))
     560      153292 :       DO iproc = 0, numnodes - 1
     561      153292 :          CALL omp_init_lock(locks(iproc))
     562             :       END DO
     563             : 
     564             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send) &
     565       54829 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
     566             :       CALL dbm_iterator_start(iter, matrix_in%matrix)
     567             :       DO WHILE (dbm_iterator_blocks_left(iter))
     568             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
     569             :                                       row_size=blk_size(1), col_size=blk_size(2))
     570             :          CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     571             : !$OMP ATOMIC
     572             :          num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
     573             : !$OMP ATOMIC
     574             :          num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
     575             :       END DO
     576             :       CALL dbm_iterator_stop(iter)
     577             : !$OMP END PARALLEL
     578             : 
     579       54829 :       CALL timeset(routineN//"_alltoall", handle2)
     580       54829 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     581       54829 :       CALL timestop(handle2)
     582             : 
     583      153292 :       DO iproc = 0, numnodes - 1
     584       98463 :          num_entries_recv(iproc) = num_rec(2*iproc)
     585       98463 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     586       98463 :          num_entries_send(iproc) = num_send(2*iproc)
     587       98463 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     588             : 
     589       98463 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     590             : 
     591      153292 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     592             : 
     593             :       END DO
     594             : 
     595             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks) &
     596       54829 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
     597             :       CALL dbm_iterator_start(iter, matrix_in%matrix)
     598             :       DO WHILE (dbm_iterator_blocks_left(iter))
     599             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     600             :                                       row_size=blk_size(1), col_size=blk_size(2))
     601             :          CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     602             :          CALL omp_set_lock(locks(iproc))
     603             :          CALL dbt_buffer_add_block(buffer_send(iproc), INT(blk_index, KIND=int_8), block)
     604             :          CALL omp_unset_lock(locks(iproc))
     605             :       END DO
     606             :       CALL dbm_iterator_stop(iter)
     607             : !$OMP END PARALLEL
     608             : 
     609       54829 :       IF (move_prv) CALL dbt_tas_clear(matrix_in)
     610             : 
     611       54829 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     612       54829 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     613             : 
     614      153292 :       DO iproc = 0, numnodes - 1
     615      153292 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     616             :       END DO
     617             : 
     618       54829 :       CALL timestop(handle2)
     619             : 
     620             :       ! TODO Add OpenMP to the buffer unpacking.
     621      153292 :       nblk = SUM(num_blocks_recv)
     622      155886 :       ALLOCATE (blks_to_allocate(nblk, 2))
     623             : 
     624       54829 :       bcount = 0
     625      153292 :       DO iproc = 0, numnodes - 1
     626       98463 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     627     1834771 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
     628       98463 :          bcount = bcount + SIZE(index_recv, 1)
     629      251755 :          DEALLOCATE (index_recv)
     630             :       END DO
     631             : 
     632             : !TODO: Parallelize creation of block list.
     633       54829 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     634             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     635             :       a = omp_get_thread_num()*nblk_per_thread + 1
     636             :       b = MIN(a + nblk_per_thread, nblk)
     637             :       CALL dbm_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     638             : !$OMP END PARALLEL
     639       54829 :       DEALLOCATE (blks_to_allocate)
     640             : 
     641      153292 :       DO iproc = 0, numnodes - 1
     642             :          ! First, we need to get the index to create block
     643      868154 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     644      769691 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
     645      769691 :             row_block_sizes => dbm_get_row_block_sizes(matrix_out)
     646      769691 :             col_block_sizes => dbm_get_col_block_sizes(matrix_out)
     647      769691 :             blk_size(1) = row_block_sizes(INT(blk_index_i8(1)))
     648      769691 :             blk_size(2) = col_block_sizes(INT(blk_index_i8(2)))
     649     3078764 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     650      769691 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
     651    77988338 :             CALL dbm_put_block(matrix_out, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block, summation=.TRUE.)
     652     2407536 :             DEALLOCATE (block)
     653             :          END DO
     654      153292 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     655             :       END DO
     656             : 
     657       54829 :       CALL dbm_finalize(matrix_out)
     658             : 
     659       54829 :       CALL timestop(handle)
     660      471071 :    END SUBROUTINE
     661             : 
     662             : ! **************************************************************************************************
     663             : !> \brief get all indices from buffer
     664             : !> \param buffer ...
     665             : !> \param index ...
     666             : !> \author Patrick Seewald
     667             : ! **************************************************************************************************
     668      627170 :    SUBROUTINE dbt_buffer_get_index(buffer, index)
     669             :       TYPE(dbt_buffer_type), INTENT(IN)                  :: buffer
     670             :       INTEGER(KIND=int_8), ALLOCATABLE, &
     671             :          DIMENSION(:, :), INTENT(OUT)                    :: index
     672             : 
     673             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_buffer_get_index'
     674             : 
     675             :       INTEGER                                            :: handle
     676             :       INTEGER, DIMENSION(2)                              :: indx_shape
     677             : 
     678      627170 :       CALL timeset(routineN, handle)
     679             : 
     680     3763020 :       indx_shape = SHAPE(buffer%indx) - [0, 1]
     681     2331451 :       ALLOCATE (INDEX(indx_shape(1), indx_shape(2)))
     682    11720426 :       INDEX(:, :) = buffer%indx(1:indx_shape(1), 1:indx_shape(2))
     683      627170 :       CALL timestop(handle)
     684      627170 :    END SUBROUTINE
     685             : 
     686             : ! **************************************************************************************************
     687             : !> \brief how many blocks left in iterator
     688             : !> \param buffer ...
     689             : !> \return ...
     690             : !> \author Patrick Seewald
     691             : ! **************************************************************************************************
     692     5546628 :    PURE FUNCTION dbt_buffer_blocks_left(buffer)
     693             :       TYPE(dbt_buffer_type), INTENT(IN)                  :: buffer
     694             :       LOGICAL                                            :: dbt_buffer_blocks_left
     695             : 
     696     5546628 :       dbt_buffer_blocks_left = buffer%endpos .LT. buffer%nblock
     697     5546628 :    END FUNCTION
     698             : 
     699             : ! **************************************************************************************************
     700             : !> \brief Create block buffer for MPI communication.
     701             : !> \param buffer block buffer
     702             : !> \param nblock number of blocks
     703             : !> \param ndata total number of block entries
     704             : !> \author Patrick Seewald
     705             : ! **************************************************************************************************
     706     1254340 :    SUBROUTINE dbt_buffer_create(buffer, nblock, ndata)
     707             :       TYPE(dbt_buffer_type), INTENT(OUT)                 :: buffer
     708             :       INTEGER, INTENT(IN)                                :: nblock, ndata
     709             : 
     710     1254340 :       buffer%nblock = nblock
     711     1254340 :       buffer%endpos = 0
     712     3408562 :       ALLOCATE (buffer%msg(ndata))
     713     3408562 :       ALLOCATE (buffer%indx(nblock, 3))
     714     1254340 :    END SUBROUTINE
     715             : 
     716             : ! **************************************************************************************************
     717             : !> \brief ...
     718             : !> \param buffer ...
     719             : !> \author Patrick Seewald
     720             : ! **************************************************************************************************
     721     1254340 :    SUBROUTINE dbt_buffer_destroy(buffer)
     722             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     723             : 
     724     1254340 :       DEALLOCATE (buffer%msg)
     725     1254340 :       DEALLOCATE (buffer%indx)
     726     1254340 :       buffer%nblock = -1
     727     1254340 :       buffer%endpos = -1
     728     1254340 :    END SUBROUTINE dbt_buffer_destroy
     729             : 
     730             : ! **************************************************************************************************
     731             : !> \brief insert a block into block buffer (at current iterator position)
     732             : !> \param buffer ...
     733             : !> \param index index of block
     734             : !> \param block ...
     735             : !> \param transposed ...
     736             : !> \author Patrick Seewald
     737             : ! **************************************************************************************************
     738     4919458 :    SUBROUTINE dbt_buffer_add_block(buffer, index, block, transposed)
     739             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     740             :       INTEGER(KIND=int_8), DIMENSION(2), INTENT(IN)      :: index
     741             :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: block
     742             :       LOGICAL, INTENT(IN), OPTIONAL                      :: transposed
     743             : 
     744             :       INTEGER                                            :: ndata, p, p_data
     745             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: index_prv
     746             :       LOGICAL                                            :: tr
     747             : 
     748     4919458 :       IF (PRESENT(transposed)) THEN
     749     2154492 :          tr = transposed
     750             :       ELSE
     751             :          tr = .FALSE.
     752             :       END IF
     753             : 
     754     4919458 :       index_prv(:) = INDEX(:)
     755     4919458 :       IF (tr) THEN
     756      631278 :          CALL swap(index_prv)
     757             :       END IF
     758    14758374 :       ndata = PRODUCT(SHAPE(block))
     759             : 
     760     4919458 :       p = buffer%endpos
     761     4919458 :       IF (p .EQ. 0) THEN
     762             :          p_data = 0
     763             :       ELSE
     764     4469517 :          p_data = INT(buffer%indx(p, 3))
     765             :       END IF
     766             : 
     767     4919458 :       IF (tr) THEN
     768    78197877 :          buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(TRANSPOSE(block), [ndata])
     769             :       ELSE
     770   638970071 :          buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(block, [ndata])
     771             :       END IF
     772             : 
     773    14758374 :       buffer%indx(p + 1, 1:2) = index_prv(:)
     774     4919458 :       IF (p > 0) THEN
     775     4469517 :          buffer%indx(p + 1, 3) = buffer%indx(p, 3) + INT(ndata, KIND=int_8)
     776             :       ELSE
     777      449941 :          buffer%indx(p + 1, 3) = INT(ndata, KIND=int_8)
     778             :       END IF
     779     4919458 :       buffer%endpos = buffer%endpos + 1
     780     4919458 :    END SUBROUTINE
     781             : 
     782             : ! **************************************************************************************************
     783             : !> \brief get next block from buffer. Iterator is advanced only if block is retrieved or advance_iter.
     784             : !> \param buffer ...
     785             : !> \param ndata ...
     786             : !> \param index ...
     787             : !> \param block ...
     788             : !> \param advance_iter ...
     789             : !> \author Patrick Seewald
     790             : ! **************************************************************************************************
     791     9838916 :    SUBROUTINE dbt_buffer_get_next_block(buffer, ndata, index, block, advance_iter)
     792             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     793             :       INTEGER, INTENT(OUT)                               :: ndata
     794             :       INTEGER(KIND=int_8), DIMENSION(2), INTENT(OUT)     :: index
     795             :       REAL(dp), DIMENSION(:, :), INTENT(OUT), OPTIONAL   :: block
     796             :       LOGICAL, INTENT(IN), OPTIONAL                      :: advance_iter
     797             : 
     798             :       INTEGER                                            :: p, p_data
     799             :       LOGICAL                                            :: do_advance
     800             : 
     801     9838916 :       do_advance = .FALSE.
     802     9838916 :       IF (PRESENT(advance_iter)) THEN
     803           0 :          do_advance = advance_iter
     804     9838916 :       ELSE IF (PRESENT(block)) THEN
     805     4919458 :          do_advance = .TRUE.
     806             :       END IF
     807             : 
     808     9838916 :       p = buffer%endpos
     809     9838916 :       IF (p .EQ. 0) THEN
     810             :          p_data = 0
     811             :       ELSE
     812     8939034 :          p_data = INT(buffer%indx(p, 3))
     813             :       END IF
     814             : 
     815     8939034 :       IF (p > 0) THEN
     816     8939034 :          ndata = INT(buffer%indx(p + 1, 3) - buffer%indx(p, 3))
     817             :       ELSE
     818      899882 :          ndata = INT(buffer%indx(p + 1, 3))
     819             :       END IF
     820    29516748 :       INDEX(:) = buffer%indx(p + 1, 1:2)
     821             : 
     822     9838916 :       IF (PRESENT(block)) THEN
     823    14758374 :          block(:, :) = RESHAPE(buffer%msg(p_data + 1:p_data + ndata), SHAPE(block))
     824             :       END IF
     825             : 
     826     9838916 :       IF (do_advance) buffer%endpos = buffer%endpos + 1
     827     9838916 :    END SUBROUTINE
     828             : 
     829             : ! **************************************************************************************************
     830             : !> \brief communicate buffer
     831             : !> \param mp_comm ...
     832             : !> \param buffer_recv ...
     833             : !> \param buffer_send ...
     834             : !> \param req_array ...
     835             : !> \author Patrick Seewald
     836             : ! **************************************************************************************************
     837     4370950 :    SUBROUTINE dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     838             :       CLASS(mp_comm_type), INTENT(IN)                     :: mp_comm
     839             :       TYPE(dbt_buffer_type), DIMENSION(0:), &
     840             :          INTENT(INOUT)                                   :: buffer_recv, buffer_send
     841             :       TYPE(mp_request_type), DIMENSION(:, :), &
     842             :          INTENT(OUT)                                     :: req_array
     843             : 
     844             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_communicate_buffer'
     845             : 
     846             :       INTEGER                                            :: handle, iproc, numnodes, &
     847             :                                                             rec_counter, send_counter
     848             : 
     849      372454 :       CALL timeset(routineN, handle)
     850      372454 :       numnodes = mp_comm%num_pe
     851             : 
     852      372454 :       IF (numnodes > 1) THEN
     853             : 
     854      254716 :          send_counter = 0
     855      254716 :          rec_counter = 0
     856             : 
     857      764148 :          DO iproc = 0, numnodes - 1
     858      764148 :             IF (buffer_recv(iproc)%nblock > 0) THEN
     859      349371 :                rec_counter = rec_counter + 1
     860      349371 :                CALL mp_comm%irecv(buffer_recv(iproc)%indx, iproc, req_array(rec_counter, 3), tag=4)
     861      349371 :                CALL mp_comm%irecv(buffer_recv(iproc)%msg, iproc, req_array(rec_counter, 4), tag=7)
     862             :             END IF
     863             :          END DO
     864             : 
     865      764148 :          DO iproc = 0, numnodes - 1
     866      764148 :             IF (buffer_send(iproc)%nblock > 0) THEN
     867      349371 :                send_counter = send_counter + 1
     868      349371 :                CALL mp_comm%isend(buffer_send(iproc)%indx, iproc, req_array(send_counter, 1), tag=4)
     869      349371 :                CALL mp_comm%isend(buffer_send(iproc)%msg, iproc, req_array(send_counter, 2), tag=7)
     870             :             END IF
     871             :          END DO
     872             : 
     873      254716 :          IF (send_counter > 0) THEN
     874      222340 :             CALL mp_waitall(req_array(1:send_counter, 1:2))
     875             :          END IF
     876      254716 :          IF (rec_counter > 0) THEN
     877      243036 :             CALL mp_waitall(req_array(1:rec_counter, 3:4))
     878             :          END IF
     879             : 
     880             :       ELSE
     881      117738 :          IF (buffer_recv(0)%nblock > 0) THEN
     882     4053901 :             buffer_recv(0)%indx(:, :) = buffer_send(0)%indx(:, :)
     883   425183014 :             buffer_recv(0)%msg(:) = buffer_send(0)%msg(:)
     884             :          END IF
     885             :       END IF
     886      372454 :       CALL timestop(handle)
     887      372454 :    END SUBROUTINE
     888             : 
     889      247239 : END MODULE

Generated by: LCOV version 1.15