LCOV - code coverage report
Current view: top level - src - qs_fb_atomic_matrix_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 246 251 98.0 %
Date: 2024-11-21 06:45:46 Functions: 5 5 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : MODULE qs_fb_atomic_matrix_methods
       9             : 
      10             :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      11             :                                               dbcsr_get_info,&
      12             :                                               dbcsr_get_stored_coordinates,&
      13             :                                               dbcsr_type
      14             :    USE kinds,                           ONLY: dp,&
      15             :                                               int_8
      16             :    USE message_passing,                 ONLY: mp_para_env_type
      17             :    USE qs_fb_atomic_halo_types,         ONLY: fb_atomic_halo_atom_global2halo,&
      18             :                                               fb_atomic_halo_get,&
      19             :                                               fb_atomic_halo_has_data,&
      20             :                                               fb_atomic_halo_list_get,&
      21             :                                               fb_atomic_halo_list_obj,&
      22             :                                               fb_atomic_halo_obj
      23             :    USE qs_fb_com_tasks_types,           ONLY: &
      24             :         TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
      25             :         fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
      26             :         fb_com_atom_pairs_get, fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, &
      27             :         fb_com_atom_pairs_nullify, fb_com_atom_pairs_obj, fb_com_atom_pairs_release, &
      28             :         fb_com_tasks_build_atom_pairs, fb_com_tasks_create, fb_com_tasks_decode_pair, &
      29             :         fb_com_tasks_encode_pair, fb_com_tasks_get, fb_com_tasks_nullify, fb_com_tasks_obj, &
      30             :         fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
      31             :    USE qs_fb_matrix_data_types,         ONLY: fb_matrix_data_get,&
      32             :                                               fb_matrix_data_has_data,&
      33             :                                               fb_matrix_data_obj
      34             : #include "./base/base_uses.f90"
      35             : 
      36             :    IMPLICIT NONE
      37             : 
      38             :    PRIVATE
      39             : 
      40             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_atomic_matrix_methods'
      41             : 
      42             :    PUBLIC :: fb_atmatrix_calc_size, &
      43             :              fb_atmatrix_construct, &
      44             :              fb_atmatrix_construct_2, &
      45             :              fb_atmatrix_generate_com_pairs_2
      46             : 
      47             : CONTAINS
      48             : 
      49             : ! **********************************************************************
      50             : !> \brief Calculates the atomic matrix size from a given DBCSR matrix
      51             : !>        and atomic halo. It also calculates the first row (col) or the
      52             : !>        row (col) atomic blocks in the atomic matrix
      53             : !> \param dbcsr_mat : pointer to the DBCSR matrix the atomic matrix is
      54             : !>                    to be constructed from
      55             : !> \param atomic_halo : the atomic halo used for defining the atomic
      56             : !>                      matrix from the DBCSR matrix
      57             : !> \param nrows : outputs total number of rows in the atomic matrix
      58             : !> \param ncols : outputs total number of cols in the atomic matrix
      59             : !> \param blk_row_start : first row in each atomic blk row in the
      60             : !>                        atomic matrix
      61             : !> \param blk_col_start : first col in each atomic blk col in the
      62             : !>                        atomic matrix
      63             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      64             : ! **************************************************************************************************
      65        1280 :    SUBROUTINE fb_atmatrix_calc_size(dbcsr_mat, &
      66             :                                     atomic_halo, &
      67             :                                     nrows, &
      68             :                                     ncols, &
      69         640 :                                     blk_row_start, &
      70         640 :                                     blk_col_start)
      71             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
      72             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
      73             :       INTEGER, INTENT(OUT)                               :: nrows, ncols
      74             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: blk_row_start, blk_col_start
      75             : 
      76             :       INTEGER                                            :: ii, natoms_in_halo
      77         640 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, halo_atoms, &
      78         640 :                                                             row_block_size_data
      79             :       LOGICAL                                            :: check_ok
      80             : 
      81         640 :       NULLIFY (halo_atoms, row_block_size_data, col_block_size_data)
      82             : 
      83         640 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
      84             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
      85             :                               natoms=natoms_in_halo, &
      86         640 :                               halo_atoms=halo_atoms)
      87         640 :       check_ok = SIZE(blk_row_start) .GE. (natoms_in_halo + 1)
      88         640 :       CPASSERT(check_ok)
      89         640 :       check_ok = SIZE(blk_col_start) .GE. (natoms_in_halo + 1)
      90         640 :       CPASSERT(check_ok)
      91        6400 :       blk_row_start = 0
      92        6400 :       blk_col_start = 0
      93         640 :       nrows = 0
      94         640 :       ncols = 0
      95        5760 :       DO ii = 1, natoms_in_halo
      96        5120 :          blk_row_start(ii) = nrows + 1
      97        5120 :          blk_col_start(ii) = ncols + 1
      98        5120 :          nrows = nrows + row_block_size_data(halo_atoms(ii))
      99        5760 :          ncols = ncols + col_block_size_data(halo_atoms(ii))
     100             :       END DO
     101         640 :       blk_row_start(natoms_in_halo + 1) = nrows + 1
     102         640 :       blk_col_start(natoms_in_halo + 1) = ncols + 1
     103         640 :    END SUBROUTINE fb_atmatrix_calc_size
     104             : 
     105             : ! ****************************************************************************
     106             : !> \brief Constructs atomic matrix for filter basis method from a given
     107             : !>        DBCSR matrix and a set of atomic send and recv pairs
     108             : !>        corresponding to the matrix blocks that needs to be included
     109             : !>        in the atomic matrix. This version is for when we do MPI
     110             : !>        communications at every step, for each atomic matrix.
     111             : !> \param dbcsr_mat : the DBCSR matrix the atomic matrix is to be
     112             : !>                    constructed from
     113             : !> \param atomic_halo : the atomic halo conrresponding to this atomic
     114             : !>                      matrix
     115             : !> \param para_env : cp2k parallel environment
     116             : !> \param atomic_matrix : the atomic matrix to be constructed, it should
     117             : !>                        have already been allocated prior entering
     118             : !>                        this subroutine
     119             : !> \param blk_row_start : first row in each atomic blk row in the
     120             : !>                        atomic matrix
     121             : !> \param blk_col_start : first col in each atomic blk col in the
     122             : !>                        atomic matrix
     123             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     124             : ! **************************************************************************************************
     125         512 :    SUBROUTINE fb_atmatrix_construct(dbcsr_mat, &
     126             :                                     atomic_halo, &
     127             :                                     para_env, &
     128         512 :                                     atomic_matrix, &
     129         512 :                                     blk_row_start, &
     130         512 :                                     blk_col_start)
     131             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     132             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     133             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     134             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
     135             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
     136             : 
     137             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct'
     138             : 
     139             :       INTEGER :: handle, iatom, iatom_in_halo, ii, ind, ipair, ipe, jatom, jatom_in_halo, jj, &
     140             :          ncols_blk, npairs_recv, npairs_send, nrows_blk, numprocs, pe, recv_encode, send_encode
     141         512 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
     142         512 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
     143         512 :          recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
     144         512 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, row_block_size_data
     145             :       LOGICAL                                            :: found
     146         512 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
     147         512 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     148             :       TYPE(fb_com_atom_pairs_obj)                        :: atom_pairs_recv, atom_pairs_send
     149             : 
     150         512 :       CALL timeset(routineN, handle)
     151             : 
     152         512 :       NULLIFY (pairs_send, pairs_recv, mat_block, &
     153         512 :                row_block_size_data, col_block_size_data)
     154         512 :       CALL fb_com_atom_pairs_nullify(atom_pairs_send)
     155         512 :       CALL fb_com_atom_pairs_nullify(atom_pairs_recv)
     156             : 
     157             :       ! initialise atomic matrix
     158         512 :       IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
     159     5591552 :          atomic_matrix = 0.0_dp
     160             :       END IF
     161             : 
     162             :       ! generate send and receive atomic pairs
     163         512 :       CALL fb_com_atom_pairs_create(atom_pairs_send)
     164         512 :       CALL fb_com_atom_pairs_create(atom_pairs_recv)
     165             :       CALL fb_atmatrix_generate_com_pairs(dbcsr_mat, &
     166             :                                           atomic_halo, &
     167             :                                           para_env, &
     168             :                                           atom_pairs_send, &
     169         512 :                                           atom_pairs_recv)
     170             : 
     171             :       ! get com pair informations
     172             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
     173             :                                  pairs=pairs_send, &
     174             :                                  npairs=npairs_send, &
     175         512 :                                  natoms_encode=send_encode)
     176             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
     177             :                                  pairs=pairs_recv, &
     178             :                                  npairs=npairs_recv, &
     179         512 :                                  natoms_encode=recv_encode)
     180             : 
     181             :       ! get para_env info
     182         512 :       numprocs = para_env%num_pe
     183             : 
     184             :       ! get dbcsr row and col block sizes
     185         512 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
     186             : 
     187             :       ! allocate temporary arrays for send
     188        1536 :       ALLOCATE (send_sizes(numprocs))
     189        1024 :       ALLOCATE (send_disps(numprocs))
     190        1024 :       ALLOCATE (send_pair_count(numprocs))
     191        1024 :       ALLOCATE (send_pair_disps(numprocs))
     192             : 
     193             :       ! setup send buffer sizes
     194             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
     195             :                                                numprocs, &
     196             :                                                row_block_size_data, &
     197             :                                                col_block_size_data, &
     198             :                                                send_sizes, &
     199             :                                                send_disps, &
     200             :                                                send_pair_count, &
     201         512 :                                                send_pair_disps)
     202             :       ! allocate send buffer
     203        2560 :       ALLOCATE (send_buf(SUM(send_sizes)))
     204             : 
     205             :       ! allocate temporary arrays for recv
     206        1024 :       ALLOCATE (recv_sizes(numprocs))
     207        1024 :       ALLOCATE (recv_disps(numprocs))
     208        1024 :       ALLOCATE (recv_pair_count(numprocs))
     209        1024 :       ALLOCATE (recv_pair_disps(numprocs))
     210             : 
     211             :       ! setup recv buffer sizes
     212             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
     213             :                                                numprocs, &
     214             :                                                row_block_size_data, &
     215             :                                                col_block_size_data, &
     216             :                                                recv_sizes, &
     217             :                                                recv_disps, &
     218             :                                                recv_pair_count, &
     219         512 :                                                recv_pair_disps)
     220             :       ! allocate recv buffer
     221        2560 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
     222             :       ! do packing
     223        1536 :       DO ipe = 1, numprocs
     224             :          ! need to reuse send_sizes as an accumulative displacement, so recalculate
     225        1024 :          send_sizes(ipe) = 0
     226       19968 :          DO ipair = 1, send_pair_count(ipe)
     227             :             CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
     228       18432 :                                           pe, iatom, jatom, send_encode)
     229       18432 :             nrows_blk = row_block_size_data(iatom)
     230       18432 :             ncols_blk = col_block_size_data(jatom)
     231             :             CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
     232             :                                    row=iatom, col=jatom, block=mat_block, &
     233       18432 :                                    found=found)
     234       19456 :             IF (.NOT. found) THEN
     235           0 :                CPABORT("Matrix block not found")
     236             :             ELSE
     237             :                ! we have found the matrix block
     238      258048 :                DO jj = 1, ncols_blk
     239     3373056 :                   DO ii = 1, nrows_blk
     240             :                      ! column major format in blocks
     241     3115008 :                      ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
     242     3354624 :                      send_buf(ind) = mat_block(ii, jj)
     243             :                   END DO ! ii
     244             :                END DO ! jj
     245       18432 :                send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
     246             :             END IF
     247             :          END DO ! ipair
     248             :       END DO ! ipe
     249             : 
     250             :       ! do communication
     251             :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
     252         512 :                              recv_buf, recv_sizes, recv_disps)
     253             : 
     254             :       ! cleanup temporary arrays no longer needed
     255         512 :       DEALLOCATE (send_buf)
     256         512 :       DEALLOCATE (send_sizes)
     257         512 :       DEALLOCATE (send_disps)
     258         512 :       DEALLOCATE (send_pair_count)
     259         512 :       DEALLOCATE (send_pair_disps)
     260             : 
     261             :       ! do unpacking
     262        1536 :       DO ipe = 1, numprocs
     263        1024 :          recv_sizes(ipe) = 0
     264       19968 :          DO ipair = 1, recv_pair_count(ipe)
     265             :             CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
     266       18432 :                                           pe, iatom, jatom, recv_encode)
     267             :             ! nrows_blk = last_row(iatom) - first_row(iatom) + 1
     268             :             ! ncols_blk = last_col(jatom) - first_col(jatom) + 1
     269       18432 :             nrows_blk = row_block_size_data(iatom)
     270       18432 :             ncols_blk = col_block_size_data(jatom)
     271             :             ! get the corresponding atom indices in halo
     272             :             ! the atoms from the recv_pairs should be in the atomic_halo, because
     273             :             ! the recv_pairs are the matrix blocks requested by the local proc for
     274             :             ! this particular atomic_halo
     275             :             CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
     276             :                                                  iatom, iatom_in_halo, &
     277       18432 :                                                  found)
     278       18432 :             CPASSERT(found)
     279             :             CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
     280             :                                                  jatom, jatom_in_halo, &
     281       18432 :                                                  found)
     282       18432 :             CPASSERT(found)
     283             :             ! put block into the full conventional matrix
     284      258048 :             DO jj = 1, ncols_blk
     285     3373056 :                DO ii = 1, nrows_blk
     286             :                   ! column major format in blocks
     287     3115008 :                   ind = recv_disps(ipe) + recv_sizes(ipe) + ii + (jj - 1)*nrows_blk
     288             :                   atomic_matrix(blk_row_start(iatom_in_halo) + ii - 1, &
     289     3354624 :                                 blk_col_start(jatom_in_halo) + jj - 1) = recv_buf(ind)
     290             : 
     291             :                END DO ! ii
     292             :             END DO ! jj
     293       56320 :             recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
     294             :          END DO ! ipair
     295             :       END DO ! ipe
     296             : 
     297             :       ! the constructed matrix is upper triangular, fill it up to full
     298       53248 :       DO ii = 2, SIZE(atomic_matrix, 1)
     299     2795520 :          DO jj = 1, ii - 1
     300     2795008 :             atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
     301             :          END DO
     302             :       END DO
     303             : 
     304             :       ! cleanup rest of the temporary arrays
     305         512 :       DEALLOCATE (recv_buf)
     306         512 :       DEALLOCATE (recv_sizes)
     307         512 :       DEALLOCATE (recv_disps)
     308         512 :       DEALLOCATE (recv_pair_count)
     309         512 :       DEALLOCATE (recv_pair_disps)
     310         512 :       CALL fb_com_atom_pairs_release(atom_pairs_send)
     311         512 :       CALL fb_com_atom_pairs_release(atom_pairs_recv)
     312             : 
     313         512 :       CALL timestop(handle)
     314             : 
     315        1024 :    END SUBROUTINE fb_atmatrix_construct
     316             : 
     317             : ! ****************************************************************************
     318             : !> \brief Constructs atomic matrix for filter basis method from a given
     319             : !>        DBCSR matrix and a set of atomic send and recv pairs
     320             : !>        corresponding to the matrix blocks that needs to be included
     321             : !>        in the atomic matrix. This version is for when we do MPI
     322             : !>        communications collectively in one go at the beginning.
     323             : !> \param matrix_storage : data storing the relevant DBCSR matrix blocks
     324             : !>                         needed for constructing the atomic matrix
     325             : !> \param atomic_halo : the atomic halo conrresponding to this atomic
     326             : !>                      matrix
     327             : !> \param atomic_matrix : the atomic matrix to be constructed, it should
     328             : !>                        have already been allocated prior entering
     329             : !>                        this subroutine
     330             : !> \param blk_row_start : first row in each atomic blk row in the
     331             : !>                        atomic matrix
     332             : !> \param blk_col_start : first col in each atomic blk col in the
     333             : !>                        atomic matrix
     334             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     335             : ! **************************************************************************************************
     336         384 :    SUBROUTINE fb_atmatrix_construct_2(matrix_storage, &
     337             :                                       atomic_halo, &
     338         128 :                                       atomic_matrix, &
     339         256 :                                       blk_row_start, &
     340         128 :                                       blk_col_start)
     341             :       TYPE(fb_matrix_data_obj), INTENT(IN)               :: matrix_storage
     342             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     343             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
     344             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
     345             : 
     346             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct_2'
     347             : 
     348             :       INTEGER                                            :: handle, iatom, iatom_global, icol, ii, &
     349             :                                                             irow, jatom, jatom_global, jj, &
     350             :                                                             natoms_in_halo
     351         128 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     352             :       LOGICAL                                            :: check_ok, found
     353         128 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: blk_p
     354             : 
     355         128 :       CALL timeset(routineN, handle)
     356             : 
     357         128 :       check_ok = fb_matrix_data_has_data(matrix_storage)
     358         128 :       CPASSERT(check_ok)
     359         128 :       check_ok = fb_atomic_halo_has_data(atomic_halo)
     360         128 :       CPASSERT(check_ok)
     361             : 
     362         128 :       NULLIFY (halo_atoms, blk_p)
     363             : 
     364             :       ! initialise atomic matrix
     365         128 :       IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
     366     1397888 :          atomic_matrix = 0.0_dp
     367             :       END IF
     368             : 
     369             :       ! get atomic halo information
     370             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     371             :                               natoms=natoms_in_halo, &
     372         128 :                               halo_atoms=halo_atoms)
     373             : 
     374             :       ! construct atomic matrix using data from matrix_storage
     375        1152 :       DO iatom = 1, natoms_in_halo
     376        1024 :          iatom_global = halo_atoms(iatom)
     377        9344 :          DO jatom = 1, natoms_in_halo
     378        8192 :             jatom_global = halo_atoms(jatom)
     379             :             ! atomic matrices are symmetric, fill only the top
     380             :             ! triangular part
     381        9216 :             IF (jatom_global .GE. iatom_global) THEN
     382             :                CALL fb_matrix_data_get(matrix_storage, &
     383             :                                        iatom_global, &
     384             :                                        jatom_global, &
     385             :                                        blk_p, &
     386        4608 :                                        found)
     387             :                ! copy data to atomic_matrix if found
     388        4608 :                IF (found) THEN
     389       64512 :                   DO jj = 1, SIZE(blk_p, 2)
     390       59904 :                      icol = blk_col_start(jatom) + jj - 1
     391      843264 :                      DO ii = 1, SIZE(blk_p, 1)
     392      778752 :                         irow = blk_row_start(iatom) + ii - 1
     393      838656 :                         atomic_matrix(irow, icol) = blk_p(ii, jj)
     394             :                      END DO ! ii
     395             :                   END DO ! jj
     396             :                END IF
     397             :             END IF
     398             :          END DO ! jatom
     399             :       END DO ! iatom
     400             : 
     401             :       ! the constructed matrix is upper triangular, fill it up to full
     402       13312 :       DO ii = 2, SIZE(atomic_matrix, 1)
     403      698880 :          DO jj = 1, ii - 1
     404      698752 :             atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
     405             :          END DO
     406             :       END DO
     407             : 
     408         128 :       CALL timestop(handle)
     409             : 
     410         128 :    END SUBROUTINE fb_atmatrix_construct_2
     411             : 
     412             : ! ****************************************************************************
     413             : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
     414             : !>        sent and received in order to construct an atomic matrix
     415             : !>        corresponding to a given atomic halo. This version is for the case
     416             : !>        when we do MPI communications at each step, for each atomic matrix.
     417             : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
     418             : !> \param atomic_halo : the atomic halo used to construct the atomic
     419             : !>                      matrix
     420             : !> \param para_env : cp2k parallel environment
     421             : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
     422             : !>                          data to be sent
     423             : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
     424             : !>                          data to be recveived
     425             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     426             : ! **************************************************************************************************
     427         512 :    SUBROUTINE fb_atmatrix_generate_com_pairs(dbcsr_mat, &
     428             :                                              atomic_halo, &
     429             :                                              para_env, &
     430             :                                              atom_pairs_send, &
     431             :                                              atom_pairs_recv)
     432             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     433             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     434             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     435             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
     436             : 
     437             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs'
     438             : 
     439             :       INTEGER :: counter, handle, iatom, iatom_global, itask, jatom, jatom_global, natoms_in_halo, &
     440             :          nblkrows_total, nencode, ntasks_recv, ntasks_send, src
     441             :       INTEGER(KIND=int_8)                                :: pair
     442         512 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
     443         512 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     444             :       LOGICAL                                            :: found
     445         512 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     446             :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
     447             : 
     448         512 :       CALL timeset(routineN, handle)
     449             : 
     450         512 :       NULLIFY (halo_atoms, tasks_send, tasks_recv)
     451         512 :       CALL fb_com_tasks_nullify(com_tasks_send)
     452         512 :       CALL fb_com_tasks_nullify(com_tasks_recv)
     453             : 
     454             :       ! initialise atom_pairs_send and atom_pairs_receive
     455         512 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
     456         512 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
     457             :       ELSE
     458           0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
     459             :       END IF
     460         512 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
     461         512 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
     462             :       ELSE
     463           0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
     464             :       END IF
     465             : 
     466             :       ! get atomic halo information
     467             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     468             :                               natoms=natoms_in_halo, &
     469         512 :                               halo_atoms=halo_atoms)
     470             : 
     471             :       ! get the total number of atoms, we can obtain this directly
     472             :       ! from the global block row dimension of the dbcsr matrix
     473             :       CALL dbcsr_get_info(matrix=dbcsr_mat, &
     474         512 :                           nblkrows_total=nblkrows_total)
     475             : 
     476             :       ! generate recv task list (tasks_recv)
     477             : 
     478             :       ! a recv task corresponds to the copying or transferring of a
     479             :       ! matrix block in the part of the DBCSR matrix owned by the src
     480             :       ! proc to this proc in order to construct the atomic matrix
     481             :       ! corresponding to the given atomic halo. As an upper-bound, the
     482             :       ! number of matrix blocks required do not exceed natoms_in_halo**2
     483         512 :       ntasks_recv = natoms_in_halo*natoms_in_halo
     484             : 
     485        1536 :       ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
     486             : 
     487             :       ! destination proc is always the local processor
     488             :       ASSOCIATE (dest => para_env%mepos)
     489             :          ! now that tasks_recv has been allocated, generate the tasks
     490         512 :          itask = 1
     491        4608 :          DO iatom = 1, natoms_in_halo
     492        4096 :             iatom_global = halo_atoms(iatom)
     493       37376 :             DO jatom = 1, natoms_in_halo
     494       32768 :                jatom_global = halo_atoms(jatom)
     495             :                ! atomic matrix is symmetric, and only upper triangular part
     496             :                ! is stored in DBCSR matrix
     497       36864 :                IF (jatom_global .GE. iatom_global) THEN
     498             :                   ! find the source proc that supposed to own the block
     499             :                   ! (iatom_global, jatom_global)
     500             :                   CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
     501             :                                                     iatom_global, &
     502             :                                                     jatom_global, &
     503       18432 :                                                     processor=src)
     504             :                   ! we must encode the global atom indices rather the halo
     505             :                   ! atomic indices in each task, because halo atomic
     506             :                   ! indices are local to each halo, and each processor is
     507             :                   ! working on a different halo local to them. So one
     508             :                   ! processor would not have the information about the halo
     509             :                   ! on another processor, rendering the halo atomic indices
     510             :                   ! rather useless outside the local processor.
     511       18432 :                   tasks_recv(TASK_DEST, itask) = dest
     512       18432 :                   tasks_recv(TASK_SRC, itask) = src
     513             : 
     514             :                   CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
     515             :                                                 iatom_global, jatom_global, &
     516       18432 :                                                 nblkrows_total)
     517             :                   ! calculation of cost not implemented at the moment
     518       18432 :                   tasks_recv(TASK_COST, itask) = 0
     519       18432 :                   itask = itask + 1
     520             :                END IF
     521             :             END DO ! jatom
     522             :          END DO ! iatom
     523             :       END ASSOCIATE
     524             : 
     525             :       ! get the actual number of tasks
     526         512 :       ntasks_recv = itask - 1
     527             : 
     528             :       ! create tasks
     529         512 :       CALL fb_com_tasks_create(com_tasks_recv)
     530         512 :       CALL fb_com_tasks_create(com_tasks_send)
     531             : 
     532             :       CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
     533             :                             task_dim=TASK_N_RECORDS, &
     534             :                             ntasks=ntasks_recv, &
     535             :                             nencode=nblkrows_total, &
     536         512 :                             tasks=tasks_recv)
     537             : 
     538             :       ! genearte the send task list (tasks_send) from the recv task list
     539             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
     540         512 :                                            para_env)
     541             : 
     542             :       CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
     543             :                             ntasks=ntasks_send, &
     544             :                             tasks=tasks_send, &
     545         512 :                             nencode=nencode)
     546             : 
     547             :       ! because the atomic_halos and the neighbor_list_set used to
     548             :       ! generate the sparse structure of the DBCSR matrix do not
     549             :       ! necessarily have to coincide, we must check of the blocks in
     550             :       ! tasks_send (these should be local to the processor) do indeed
     551             :       ! exist in the DBCSR matrix, if not, then we need to prune these
     552             :       ! out of the task list
     553             : 
     554         512 :       counter = 0
     555       18944 :       DO itask = 1, ntasks_send
     556       18432 :          pair = tasks_send(TASK_PAIR, itask)
     557       18432 :          CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
     558             :          ! check if block exists in DBCSR matrix
     559             :          CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
     560             :                                 row=iatom_global, col=jatom_global, block=mat_block, &
     561       18432 :                                 found=found)
     562       18944 :          IF (found) THEN
     563       18432 :             counter = counter + 1
     564             :             ! we can do this here, because essencially we are inspecting
     565             :             ! the send tasks one by one, and then omit ones which the
     566             :             ! block is not found in the DBCSR matrix. itask is always
     567             :             ! .GE. counter
     568       92160 :             tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
     569             :          END IF
     570             :       END DO
     571             :       ! the new send task list should have size counter. counter
     572             :       ! .LE. the old ntasks_send, thus the task list does not really
     573             :       ! need to be reallocated (as it is just a temporary array), and
     574             :       ! the useful data will cutoff at counter, and the rest of the
     575             :       ! array will just be garbage
     576         512 :       ntasks_send = counter
     577             : 
     578             :       ! tasks_send is set through the pointer already
     579             :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
     580         512 :                             ntasks=ntasks_send)
     581             : 
     582             :       ! now, re-distribute the new send tasks list to other processors
     583             :       ! to build the updated recv tasks list
     584             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
     585         512 :                                            para_env)
     586             : 
     587             :       ! task lists are now complete, now construct the atom_pairs_send
     588             :       ! and atom_pairs_recv from the tasks lists
     589             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
     590             :                                          atom_pairs=atom_pairs_send, &
     591             :                                          natoms_encode=nencode, &
     592         512 :                                          send_or_recv="send")
     593             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
     594             :                                          atom_pairs=atom_pairs_recv, &
     595             :                                          natoms_encode=nencode, &
     596         512 :                                          send_or_recv="recv")
     597             : 
     598             :       ! cleanup
     599         512 :       CALL fb_com_tasks_release(com_tasks_recv)
     600         512 :       CALL fb_com_tasks_release(com_tasks_send)
     601             : 
     602         512 :       CALL timestop(handle)
     603             : 
     604        1536 :    END SUBROUTINE fb_atmatrix_generate_com_pairs
     605             : 
     606             : ! ****************************************************************************
     607             : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
     608             : !>        sent and received in order to construct all local atomic matrices
     609             : !>        corresponding to the atomic halos. This version is for the case
     610             : !>        when we do MPI communications collectively in one go at the
     611             : !>        beginning.
     612             : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
     613             : !> \param atomic_halos : the list of all atomic halos local to the process
     614             : !> \param para_env : cp2k parallel environment
     615             : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
     616             : !>                          data to be sent
     617             : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
     618             : !>                          data to be recveived
     619             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     620             : ! **************************************************************************************************
     621          32 :    SUBROUTINE fb_atmatrix_generate_com_pairs_2(dbcsr_mat, &
     622             :                                                atomic_halos, &
     623             :                                                para_env, &
     624             :                                                atom_pairs_send, &
     625             :                                                atom_pairs_recv)
     626             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     627             :       TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
     628             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     629             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
     630             : 
     631             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs_2'
     632             : 
     633             :       INTEGER :: counter, handle, iatom, iatom_global, ihalo, itask, jatom, jatom_global, &
     634             :          natoms_in_halo, nblkrows_total, nencode, nhalos, ntasks_recv, ntasks_send, src
     635             :       INTEGER(KIND=int_8)                                :: pair
     636          32 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
     637          32 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     638             :       LOGICAL                                            :: found
     639          32 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     640          32 :       TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
     641             :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
     642             : 
     643          32 :       CALL timeset(routineN, handle)
     644             : 
     645          32 :       NULLIFY (halo_atoms, tasks_send, tasks_recv)
     646          32 :       CALL fb_com_tasks_nullify(com_tasks_send)
     647          32 :       CALL fb_com_tasks_nullify(com_tasks_recv)
     648             : 
     649             :       ! initialise atom_pairs_send and atom_pairs_receive
     650          32 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
     651          32 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
     652             :       ELSE
     653           0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
     654             :       END IF
     655          32 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
     656          32 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
     657             :       ELSE
     658           0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
     659             :       END IF
     660             : 
     661             :       ! get atomic halo list information
     662             :       CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
     663             :                                    nhalos=nhalos, &
     664          32 :                                    halos=halos)
     665             :       ! get the total number of atoms, we can obtain this directly
     666             :       ! from the global block row dimension of the dbcsr matrix
     667             :       CALL dbcsr_get_info(matrix=dbcsr_mat, &
     668          32 :                           nblkrows_total=nblkrows_total)
     669             : 
     670             :       ! estimate the maximum number of blocks to be received
     671          32 :       ntasks_recv = 0
     672         160 :       DO ihalo = 1, nhalos
     673             :          CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
     674         128 :                                  natoms=natoms_in_halo)
     675         160 :          ntasks_recv = ntasks_recv + natoms_in_halo*natoms_in_halo
     676             :       END DO
     677          96 :       ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
     678             : 
     679             :       ! now that tasks_recv has been allocated, generate the tasks
     680             : 
     681             :       ! destination proc is always the local process
     682             :       ASSOCIATE (dest => para_env%mepos)
     683          32 :          itask = 1
     684         160 :          DO ihalo = 1, nhalos
     685             :             CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
     686             :                                     natoms=natoms_in_halo, &
     687         128 :                                     halo_atoms=halo_atoms)
     688        1184 :             DO iatom = 1, natoms_in_halo
     689        1024 :                iatom_global = halo_atoms(iatom)
     690        9344 :                DO jatom = 1, natoms_in_halo
     691        8192 :                   jatom_global = halo_atoms(jatom)
     692             :                   ! atomic matrices are always symmetric, treat it as such.
     693             :                   ! so only deal with upper triangular parts
     694        9216 :                   IF (jatom_global .GE. iatom_global) THEN
     695             :                      ! find the source proc that supposed to own the block
     696             :                      ! (iatom_global, jatom_global)
     697             :                      CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
     698             :                                                        iatom_global, &
     699             :                                                        jatom_global, &
     700        4608 :                                                        processor=src)
     701             :                      ! we must encode the global atom indices rather the halo
     702             :                      ! atomic indices in each task, because halo atomic indices
     703             :                      ! are local to each halo, and each processor is working on a
     704             :                      ! different halo local to them. So one processor would not
     705             :                      ! have the information about the halo on another processor,
     706             :                      ! rendering the halo atomic indices rather useless outside
     707             :                      ! the local processor.
     708        4608 :                      tasks_recv(TASK_DEST, itask) = dest
     709        4608 :                      tasks_recv(TASK_SRC, itask) = src
     710             :                      CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
     711             :                                                    iatom_global, jatom_global, &
     712        4608 :                                                    nblkrows_total)
     713             :                      ! calculation of cost not implemented at the moment
     714        4608 :                      tasks_recv(TASK_COST, itask) = 0
     715        4608 :                      itask = itask + 1
     716             :                   END IF
     717             :                END DO ! jatom
     718             :             END DO ! iatom
     719             :          END DO ! ihalo
     720             :       END ASSOCIATE
     721             : 
     722             :       ! set the actual number of tasks obtained
     723          32 :       ntasks_recv = itask - 1
     724             : 
     725             :       ! create tasks
     726          32 :       CALL fb_com_tasks_create(com_tasks_recv)
     727          32 :       CALL fb_com_tasks_create(com_tasks_send)
     728             : 
     729             :       CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
     730             :                             task_dim=TASK_N_RECORDS, &
     731             :                             ntasks=ntasks_recv, &
     732             :                             nencode=nblkrows_total, &
     733          32 :                             tasks=tasks_recv)
     734             : 
     735             :       ! genearte the send task list (tasks_send) from the recv task list
     736             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
     737          32 :                                            para_env)
     738             : 
     739             :       CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
     740             :                             ntasks=ntasks_send, &
     741             :                             tasks=tasks_send, &
     742          32 :                             nencode=nencode)
     743             : 
     744             :       ! because the atomic_halos and the neighbor_list_set used to
     745             :       ! generate the sparse structure of the DBCSR matrix do not
     746             :       ! necessarily have to coincide, we must check of the blocks in
     747             :       ! tasks_send (these should be local to the processor) do indeed
     748             :       ! exist in the DBCSR matrix, if not, then we need to prune these
     749             :       ! out of the task list
     750             : 
     751          32 :       counter = 0
     752        4640 :       DO itask = 1, ntasks_send
     753        4608 :          pair = tasks_send(TASK_PAIR, itask)
     754        4608 :          CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
     755             :          ! check if block exists in DBCSR matrix
     756             :          CALL dbcsr_get_block_p(matrix=dbcsr_mat, row=iatom_global, &
     757             :                                 col=jatom_global, block=mat_block, &
     758        4608 :                                 found=found)
     759        4640 :          IF (found) THEN
     760        4608 :             counter = counter + 1
     761             :             ! we can do this here, because essencially we are inspecting
     762             :             ! the send tasks one by one, and then omit ones which the
     763             :             ! block is not found in the DBCSR matrix. itask is always
     764             :             ! .GE. counter
     765       23040 :             tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
     766             :          END IF
     767             :       END DO
     768             :       ! the new send task list should have size counter. counter
     769             :       ! .LE. the old ntasks_send, thus the task list does not really
     770             :       ! need to be reallocated (as it is just a temporary array), and
     771             :       ! the useful data will cutoff at counter, and the rest of the
     772             :       ! array will just be garbage
     773          32 :       ntasks_send = counter
     774             : 
     775             :       ! tasks_send is set through the pointer already
     776             :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
     777          32 :                             ntasks=ntasks_send)
     778             : 
     779             :       ! now, re-distribute the new send tasks list to other processors
     780             :       ! to build the updated recv tasks list
     781             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
     782          32 :                                            para_env)
     783             : 
     784             :       ! task lists are now complete, now construct the atom_pairs_send
     785             :       ! and atom_pairs_recv from the tasks lists
     786             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
     787             :                                          atom_pairs=atom_pairs_send, &
     788             :                                          natoms_encode=nencode, &
     789          32 :                                          send_or_recv="send")
     790             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
     791             :                                          atom_pairs=atom_pairs_recv, &
     792             :                                          natoms_encode=nencode, &
     793          32 :                                          send_or_recv="recv")
     794             : 
     795             :       ! cleanup
     796          32 :       CALL fb_com_tasks_release(com_tasks_recv)
     797          32 :       CALL fb_com_tasks_release(com_tasks_send)
     798             : 
     799          32 :       CALL timestop(handle)
     800             : 
     801          96 :    END SUBROUTINE fb_atmatrix_generate_com_pairs_2
     802             : 
     803             : END MODULE qs_fb_atomic_matrix_methods

Generated by: LCOV version 1.15