LCOV - code coverage report
Current view: top level - src - hfx_communication.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 241 255 94.5 %
Date: 2024-11-21 06:45:46 Functions: 4 5 80.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines for data exchange between MPI processes
      10             : !> \par History
      11             : !>      04.2008 created [Manuel Guidon]
      12             : !> \author Manuel Guidon
      13             : ! **************************************************************************************************
      14             : MODULE hfx_communication
      15             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      16             :                                               get_atomic_kind_set
      17             :    USE cp_control_types,                ONLY: dft_control_type
      18             :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      19             :                                               dbcsr_iterator_blocks_left,&
      20             :                                               dbcsr_iterator_next_block,&
      21             :                                               dbcsr_iterator_start,&
      22             :                                               dbcsr_iterator_stop,&
      23             :                                               dbcsr_iterator_type,&
      24             :                                               dbcsr_p_type,&
      25             :                                               dbcsr_type
      26             :    USE hfx_types,                       ONLY: hfx_2D_map,&
      27             :                                               hfx_basis_type,&
      28             :                                               hfx_type
      29             :    USE kinds,                           ONLY: dp,&
      30             :                                               int_8
      31             :    USE message_passing,                 ONLY: mp_para_env_type,&
      32             :                                               mp_request_type,&
      33             :                                               mp_waitall
      34             :    USE particle_types,                  ONLY: particle_type
      35             :    USE qs_environment_types,            ONLY: get_qs_env,&
      36             :                                               qs_environment_type
      37             : #include "./base/base_uses.f90"
      38             : 
      39             :    IMPLICIT NONE
      40             :    PRIVATE
      41             : 
      42             :    PUBLIC :: get_full_density, &
      43             :              distribute_ks_matrix, &
      44             :              scale_and_add_fock_to_ks_matrix, &
      45             :              get_atomic_block_maps
      46             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
      47             : 
      48             : !***
      49             : 
      50             : CONTAINS
      51             : 
      52             : ! **************************************************************************************************
      53             : !> \brief - Collects full density matrix from all CPUs
      54             : !> \param para_env ...
      55             : !> \param full_density The full Density matrix
      56             : !> \param rho Distributed density
      57             : !> \param number_of_p_entries Maximal buffer size
      58             : !> \param block_offset ...
      59             : !> \param kind_of ...
      60             : !> \param basis_parameter ...
      61             : !> \param get_max_vals_spin ...
      62             : !> \param rho_beta ...
      63             : !> \param antisymmetric ...
      64             : !> \par History
      65             : !>      11.2007 created [Manuel Guidon]
      66             : !> \author Manuel Guidon
      67             : !> \note
      68             : !>      - Communication with left/right node only
      69             : !>        added a mpi_sync before and after the ring of isendrecv. This *speed up* the
      70             : !>        communication, and might protect against idle neighbors flooding a busy node
      71             : !>        with messages [Joost]
      72             : ! **************************************************************************************************
      73       42971 :    SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
      74             :                                block_offset, kind_of, basis_parameter, &
      75             :                                get_max_vals_spin, rho_beta, antisymmetric)
      76             : 
      77             :       TYPE(mp_para_env_type), POINTER                    :: para_env
      78             :       REAL(dp), DIMENSION(:)                             :: full_density
      79             :       TYPE(dbcsr_type), POINTER                          :: rho
      80             :       INTEGER, INTENT(IN)                                :: number_of_p_entries
      81             :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
      82             :       INTEGER                                            :: kind_of(*)
      83             :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
      84             :       LOGICAL, INTENT(IN)                                :: get_max_vals_spin
      85             :       TYPE(dbcsr_type), OPTIONAL, POINTER                :: rho_beta
      86             :       LOGICAL, INTENT(IN)                                :: antisymmetric
      87             : 
      88             :       INTEGER :: blk, block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, &
      89             :          jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source, source_cpu
      90       42971 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
      91             :       LOGICAL                                            :: found
      92             :       REAL(dp)                                           :: symmfac
      93       42971 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
      94       42971 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block, sparse_block_beta
      95             :       TYPE(dbcsr_iterator_type)                          :: iter
      96      128913 :       TYPE(mp_request_type), DIMENSION(2)                :: req
      97             : 
      98    12707036 :       full_density = 0.0_dp
      99      128913 :       ALLOCATE (sendbuffer(number_of_p_entries))
     100       85942 :       ALLOCATE (recbuffer(number_of_p_entries))
     101             : 
     102       42971 :       i = 1
     103       42971 :       CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
     104      176478 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     105      133507 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
     106             :          ! the resulting vector will eb only the upper triangle.
     107             :          ! in case of antisymmetry take care to change signs if a lower block gets copied
     108      133507 :          symmfac = 1.0_dp
     109      133507 :          IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
     110      133507 :          ikind = kind_of(iatom)
     111      133507 :          nseta = basis_parameter(ikind)%nset
     112      133507 :          nsgfa => basis_parameter(ikind)%nsgf
     113      133507 :          jkind = kind_of(jatom)
     114      133507 :          nsetb = basis_parameter(jkind)%nset
     115      133507 :          nsgfb => basis_parameter(jkind)%nsgf
     116      176478 :          IF (get_max_vals_spin) THEN
     117             :             CALL dbcsr_get_block_p(rho_beta, &
     118           0 :                                    row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
     119           0 :             pa = 0
     120           0 :             DO iset = 1, nseta
     121             :                pb = 0
     122           0 :                DO jset = 1, nsetb
     123           0 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     124           0 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     125           0 :                         sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
     126           0 :                         i = i + 1
     127             :                      END DO
     128             :                   END DO
     129           0 :                   pb = pb + nsgfb(jset)
     130             :                END DO
     131           0 :                pa = pa + nsgfa(iset)
     132             :             END DO
     133             :          ELSE
     134             :             pa = 0
     135      523306 :             DO iset = 1, nseta
     136             :                pb = 0
     137     1619473 :                DO jset = 1, nsetb
     138     4048259 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     139    10370537 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     140     6322278 :                         sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
     141     9140863 :                         i = i + 1
     142             :                      END DO
     143             :                   END DO
     144     1619473 :                   pb = pb + nsgfb(jset)
     145             :                END DO
     146      523306 :                pa = pa + nsgfa(iset)
     147             :             END DO
     148             :          END IF
     149             :       END DO
     150       42971 :       CALL dbcsr_iterator_stop(iter)
     151             : 
     152             :       ! sync before/after a ring of isendrecv
     153       42971 :       CALL para_env%sync()
     154       42971 :       ncpu = para_env%num_pe
     155       42971 :       mepos = para_env%mepos
     156       42971 :       dest = MODULO(mepos + 1, ncpu)
     157       42971 :       source = MODULO(mepos - 1, ncpu)
     158      128830 :       DO icpu = 0, ncpu - 1
     159       85859 :          IF (icpu .NE. ncpu - 1) THEN
     160             :             CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     161       42888 :                                     req(1), req(2), 13)
     162             :          END IF
     163       85859 :          data_from = MODULO(mepos - icpu, ncpu)
     164       85859 :          source_cpu = MODULO(data_from, ncpu) + 1
     165       85859 :          block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
     166    12706953 :          full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
     167             : 
     168       85859 :          IF (icpu .NE. ncpu - 1) THEN
     169       42888 :             CALL mp_waitall(req)
     170             :          END IF
     171       85859 :          swapbuffer => sendbuffer
     172       85859 :          sendbuffer => recbuffer
     173      128830 :          recbuffer => swapbuffer
     174             :       END DO
     175       42971 :       DEALLOCATE (sendbuffer, recbuffer)
     176             :       ! sync before/after a ring of isendrecv
     177       42971 :       CALL para_env%sync()
     178             : 
     179       85942 :    END SUBROUTINE get_full_density
     180             : 
     181             : ! **************************************************************************************************
     182             : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
     183             : !> \param para_env ...
     184             : !> \param full_ks The full Kohn-Sham matrix
     185             : !> \param ks_matrix Distributed Kohn-Sham matrix
     186             : !> \param number_of_p_entries Maximal buffer size
     187             : !> \param block_offset ...
     188             : !> \param kind_of ...
     189             : !> \param basis_parameter ...
     190             : !> \param off_diag_fac ...
     191             : !> \param diag_fac ...
     192             : !> \par History
     193             : !>      11.2007 created [Manuel Guidon]
     194             : !> \author Manuel Guidon
     195             : !> \note
     196             : !>      - Communication with left/right node only
     197             : ! **************************************************************************************************
     198       40205 :    SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
     199             :                                    block_offset, kind_of, basis_parameter, &
     200             :                                    off_diag_fac, diag_fac)
     201             : 
     202             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     203             :       REAL(dp), DIMENSION(:)                             :: full_ks
     204             :       TYPE(dbcsr_type), POINTER                          :: ks_matrix
     205             :       INTEGER, INTENT(IN)                                :: number_of_p_entries
     206             :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     207             :       INTEGER                                            :: kind_of(*)
     208             :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     209             :       REAL(dp), INTENT(IN), OPTIONAL                     :: off_diag_fac, diag_fac
     210             : 
     211             :       INTEGER :: blk, block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, &
     212             :          jkind, jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source
     213       40205 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
     214             :       REAL(dp)                                           :: my_fac, myd_fac
     215       40205 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
     216       40205 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block
     217             :       TYPE(dbcsr_iterator_type)                          :: iter
     218      120615 :       TYPE(mp_request_type), DIMENSION(2)                :: req
     219             : 
     220       40205 :       my_fac = 1.0_dp; myd_fac = 1.0_dp
     221       40205 :       IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
     222       40205 :       IF (PRESENT(diag_fac)) myd_fac = diag_fac
     223             : 
     224      120615 :       ALLOCATE (sendbuffer(number_of_p_entries))
     225     8081455 :       sendbuffer = 0.0_dp
     226       80410 :       ALLOCATE (recbuffer(number_of_p_entries))
     227     8081455 :       recbuffer = 0.0_dp
     228             : 
     229       40205 :       ncpu = para_env%num_pe
     230       40205 :       mepos = para_env%mepos
     231       40205 :       dest = MODULO(mepos + 1, ncpu)
     232       40205 :       source = MODULO(mepos - 1, ncpu)
     233             : 
     234             :       ! sync before/after a ring of isendrecv
     235       40205 :       CALL para_env%sync()
     236       80327 :       DO icpu = 1, ncpu
     237       80327 :          i = 1
     238       80327 :          data_to = mepos - icpu
     239       80327 :          dest_cpu = MODULO(data_to, ncpu) + 1
     240       80327 :          block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
     241    11945867 :        sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
     242       80327 :          IF (icpu .EQ. ncpu) EXIT
     243             :          CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     244       40122 :                                  req(1), req(2), 13)
     245             : 
     246       40122 :          CALL mp_waitall(req)
     247       40122 :          swapbuffer => sendbuffer
     248       40122 :          sendbuffer => recbuffer
     249       80327 :          recbuffer => swapbuffer
     250             :       END DO
     251             :       ! sync before/after a ring of isendrecv
     252       40205 :       CALL para_env%sync()
     253             : 
     254       40205 :       i = 1
     255       40205 :       CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
     256      165627 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     257      125422 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
     258             : 
     259      125422 :          ikind = kind_of(iatom)
     260      125422 :          nseta = basis_parameter(ikind)%nset
     261      125422 :          nsgfa => basis_parameter(ikind)%nsgf
     262      125422 :          jkind = kind_of(jatom)
     263      125422 :          nsetb = basis_parameter(jkind)%nset
     264      125422 :          nsgfb => basis_parameter(jkind)%nsgf
     265      125422 :          pa = 0
     266      533041 :          DO iset = 1, nseta
     267             :             pb = 0
     268     1529771 :             DO jset = 1, nsetb
     269     3821230 :                DO pa1 = pa + 1, pa + nsgfa(iset)
     270     9769521 :                   DO pb1 = pb + 1, pb + nsgfb(jset)
     271     5948291 :                      IF (iatom == jatom .AND. pa1 == pb1) THEN
     272      374114 :                         sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
     273             :                      ELSE
     274     5574177 :                         sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
     275             :                      END IF
     276     8607164 :                      i = i + 1
     277             :                   END DO
     278             :                END DO
     279     1529771 :                pb = pb + nsgfb(jset)
     280             :             END DO
     281      492836 :             pa = pa + nsgfa(iset)
     282             :          END DO
     283             :       END DO
     284       40205 :       CALL dbcsr_iterator_stop(iter)
     285             : 
     286       40205 :       DEALLOCATE (sendbuffer, recbuffer)
     287             : 
     288       80410 :    END SUBROUTINE distribute_ks_matrix
     289             : 
     290             : ! **************************************************************************************************
     291             : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
     292             : !>        case of adiabatic rescaling. This is just a refactored version of
     293             : !>        distribute_ks_matrix
     294             : !> \param para_env ...
     295             : !> \param qs_env ...
     296             : !> \param ks_matrix Distributed Kohn-Sham matrix
     297             : !> \param irep ...
     298             : !> \param scaling_factor ...
     299             : !> \par History
     300             : !>      11.2007 created [Manuel Guidon]
     301             : !> \author Manuel Guidon
     302             : !> \note
     303             : !>      - Communication with left/right node only
     304             : ! **************************************************************************************************
     305         112 :    SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
     306             :                                               scaling_factor)
     307             : 
     308             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     309             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     310             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
     311             :       INTEGER, INTENT(IN)                                :: irep
     312             :       REAL(dp), INTENT(IN)                               :: scaling_factor
     313             : 
     314             :       INTEGER                                            :: iatom, ikind, img, natom, nimages, nspins
     315         112 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global
     316         112 :       REAL(dp), DIMENSION(:, :), POINTER                 :: full_ks
     317         112 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     318             :       TYPE(dft_control_type), POINTER                    :: dft_control
     319             :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     320             :       TYPE(hfx_type), POINTER                            :: actual_x_data
     321         112 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     322             : 
     323             : !! All shared data is saved in i_thread = 1!
     324             : 
     325         112 :       NULLIFY (dft_control)
     326         112 :       actual_x_data => qs_env%x_data(irep, 1)
     327         112 :       basis_parameter => actual_x_data%basis_parameter
     328             : 
     329             :       CALL get_qs_env(qs_env=qs_env, &
     330             :                       atomic_kind_set=atomic_kind_set, &
     331             :                       particle_set=particle_set, &
     332         112 :                       dft_control=dft_control)
     333             : 
     334         112 :       nspins = dft_control%nspins
     335         112 :       nimages = dft_control%nimages
     336         112 :       CPASSERT(nimages == 1)
     337             : 
     338         112 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
     339             : 
     340         112 :       natom = SIZE(particle_set, 1)
     341         336 :       ALLOCATE (last_sgf_global(0:natom))
     342         112 :       last_sgf_global(0) = 0
     343         224 :       DO iatom = 1, natom
     344         112 :          ikind = kind_of(iatom)
     345         224 :          last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
     346             :       END DO
     347         112 :       full_ks => actual_x_data%full_ks_alpha
     348         112 :       IF (scaling_factor /= 1.0_dp) THEN
     349       22288 :          full_ks = full_ks*scaling_factor
     350             :       END IF
     351         224 :       DO img = 1, nimages
     352             :          CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
     353             :                                    actual_x_data%block_offset, kind_of, basis_parameter, &
     354         224 :                                    off_diag_fac=0.5_dp)
     355             :       END DO
     356         112 :       DEALLOCATE (actual_x_data%full_ks_alpha)
     357             : 
     358         112 :       IF (nspins == 2) THEN
     359          60 :          full_ks => actual_x_data%full_ks_beta
     360          60 :          IF (scaling_factor /= 1.0_dp) THEN
     361       11940 :             full_ks = full_ks*scaling_factor
     362             :          END IF
     363         120 :          DO img = 1, nimages
     364             :             CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
     365             :                                       actual_x_data%block_offset, kind_of, basis_parameter, &
     366         120 :                                       off_diag_fac=0.5_dp)
     367             :          END DO
     368          60 :          DEALLOCATE (actual_x_data%full_ks_beta)
     369             :       END IF
     370             : 
     371         112 :       DEALLOCATE (last_sgf_global)
     372             : 
     373         224 :    END SUBROUTINE scale_and_add_fock_to_ks_matrix
     374             : 
     375             : ! **************************************************************************************************
     376             : !> \brief Given a 2d index pair, this function returns a 1d index pair for
     377             : !>        a symmetric upper triangle NxN matrix
     378             : !>        The compiler should inline this function, therefore it appears in
     379             : !>        several modules
     380             : !> \param i 2d index
     381             : !> \param j 2d index
     382             : !> \param N matrix size
     383             : !> \return ...
     384             : !> \par History
     385             : !>      03.2009 created [Manuel Guidon]
     386             : !> \author Manuel Guidon
     387             : ! **************************************************************************************************
     388           0 :    PURE FUNCTION get_1D_idx(i, j, N)
     389             :       INTEGER, INTENT(IN)                                :: i, j
     390             :       INTEGER(int_8), INTENT(IN)                         :: N
     391             :       INTEGER(int_8)                                     :: get_1D_idx
     392             : 
     393             :       INTEGER(int_8)                                     :: min_ij
     394             : 
     395           0 :       min_ij = MIN(i, j)
     396           0 :       get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
     397             : 
     398           0 :    END FUNCTION get_1D_idx
     399             : 
     400             : ! **************************************************************************************************
     401             : !> \brief create a several maps array that reflects the ks matrix sparsity
     402             : !> \param matrix ...
     403             : !> \param basis_parameter ...
     404             : !> \param kind_of ...
     405             : !> \param is_assoc_atomic_block ...
     406             : !> \param number_of_p_entries ...
     407             : !> \param para_env ...
     408             : !> \param atomic_block_offset ...
     409             : !> \param set_offset ...
     410             : !> \param block_offset ...
     411             : !> \param map_atoms_to_cpus ...
     412             : !> \param nkind ...
     413             : !> \par History
     414             : !>      11.2007 refactored [Joost VandeVondele]
     415             : !>      07.2009 add new maps
     416             : !> \author Manuel Guidon
     417             : !> \notes
     418             : !>      is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
     419             : !>      zero for unassiated blocks
     420             : ! **************************************************************************************************
     421        2098 :    SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
     422        2098 :                                     is_assoc_atomic_block, number_of_p_entries, &
     423             :                                     para_env, atomic_block_offset, set_offset, &
     424             :                                     block_offset, map_atoms_to_cpus, nkind)
     425             : 
     426             :       TYPE(dbcsr_type), POINTER                          :: matrix
     427             :       TYPE(hfx_basis_type), DIMENSION(:)                 :: basis_parameter
     428             :       INTEGER, DIMENSION(:)                              :: kind_of
     429             :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: is_assoc_atomic_block
     430             :       INTEGER, INTENT(OUT)                               :: number_of_p_entries
     431             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     432             :       INTEGER, DIMENSION(:, :), POINTER                  :: atomic_block_offset
     433             :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offset
     434             :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     435             :       TYPE(hfx_2D_map), DIMENSION(:), POINTER            :: map_atoms_to_cpus
     436             :       INTEGER                                            :: nkind
     437             : 
     438             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps'
     439             : 
     440             :       INTEGER :: blk, handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, &
     441             :          natom, ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
     442        2098 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: buffer_in, buffer_out, counter, rcount, &
     443        2098 :                                                             rdispl
     444        2098 :       INTEGER, DIMENSION(:), POINTER                     :: iatom_list, jatom_list, nsgfa, nsgfb
     445        2098 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: sparse_block
     446             :       TYPE(dbcsr_iterator_type)                          :: iter
     447             : 
     448        2098 :       CALL timeset(routineN, handle)
     449             : 
     450       31606 :       is_assoc_atomic_block = 0
     451        2098 :       number_of_p_entries = 0
     452        2098 :       number_of_p_blocks = 0
     453             : 
     454             :       !
     455             :       ! count number_of_p_blocks and number_of_p_entries
     456             :       !
     457        2098 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     458        9432 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     459        7334 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
     460        7334 :          ikind = kind_of(iatom)
     461        7334 :          jkind = kind_of(jatom)
     462        7334 :          number_of_p_blocks = number_of_p_blocks + 1
     463             :          number_of_p_entries = number_of_p_entries + &
     464        7334 :                                basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     465             :       END DO
     466        2098 :       CALL dbcsr_iterator_stop(iter)
     467             : 
     468        6294 :       tmp = (/number_of_p_entries, number_of_p_blocks/)
     469        2098 :       CALL para_env%max(tmp)
     470        2098 :       number_of_p_entries = tmp(1)
     471        2098 :       number_of_p_blocks = tmp(2)
     472             :       !
     473             :       ! send this info around, so we can construct is_assoc_atomic_block
     474             :       ! pack all data in buffers and use allgatherv
     475             :       !
     476        6294 :       ALLOCATE (buffer_in(3*number_of_p_blocks))
     477        6294 :       ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
     478        8392 :       ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
     479             : 
     480       30148 :       buffer_in = 0
     481        2098 :       ibuf = 0
     482             : 
     483        2098 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     484        9432 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     485        7334 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
     486             : 
     487        7334 :          buffer_in(ibuf + 1) = iatom
     488        7334 :          buffer_in(ibuf + 2) = jatom
     489        7334 :          buffer_in(ibuf + 3) = para_env%mepos + 1
     490        7334 :          ibuf = ibuf + 3
     491             :       END DO
     492        2098 :       CALL dbcsr_iterator_stop(iter)
     493             : 
     494        6292 :       rcount = SIZE(buffer_in)
     495        2098 :       rdispl(1) = 0
     496        4194 :       DO icpu = 2, para_env%num_pe
     497        4194 :          rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
     498             :       END DO
     499        2098 :       CALL para_env%allgatherv(buffer_in, buffer_out, rcount, rdispl)
     500             : 
     501       20786 :       DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
     502       18688 :          itask = buffer_out(ibuf + 3)
     503             :          ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
     504             :          ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
     505       20786 :          IF (itask .NE. 0) THEN
     506       14656 :             iatom = buffer_out(ibuf + 1)
     507       14656 :             jatom = buffer_out(ibuf + 2)
     508       14656 :             is_assoc_atomic_block(iatom, jatom) = itask
     509       14656 :             is_assoc_atomic_block(jatom, iatom) = itask
     510             :          END IF
     511             :       END DO
     512             : 
     513        2098 :       IF (ASSOCIATED(map_atoms_to_cpus)) THEN
     514        2640 :          DO icpu = 1, para_env%num_pe
     515        1760 :             DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
     516        2640 :             DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
     517             :          END DO
     518         880 :          DEALLOCATE (map_atoms_to_cpus)
     519             :       END IF
     520             : 
     521        2098 :       natom = SIZE(is_assoc_atomic_block, 1)
     522       10488 :       ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
     523        6294 :       ALLOCATE (counter(para_env%num_pe))
     524        6292 :       counter = 0
     525             : 
     526        8424 :       DO iatom = 1, natom
     527       23178 :          DO jatom = iatom, natom
     528       14754 :             icpu = is_assoc_atomic_block(jatom, iatom)
     529       21080 :             IF (icpu > 0) counter(icpu) = counter(icpu) + 1
     530             :          END DO
     531             :       END DO
     532        6292 :       DO icpu = 1, para_env%num_pe
     533       12526 :          ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
     534       10430 :          ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
     535             :       END DO
     536        6292 :       counter = 0
     537        8424 :       DO iatom = 1, natom
     538       23178 :          DO jatom = iatom, natom
     539       14754 :             icpu = is_assoc_atomic_block(jatom, iatom)
     540       21080 :             IF (icpu > 0) THEN
     541       14656 :                counter(icpu) = counter(icpu) + 1
     542       14656 :                map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
     543       14656 :                map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
     544             :             END IF
     545             :          END DO
     546             :       END DO
     547             : 
     548        2098 :       DEALLOCATE (counter)
     549             : 
     550        2098 :       ncpu = para_env%num_pe
     551        2098 :       offset = 1
     552       31606 :       atomic_block_offset = 0
     553        8390 :       block_offset = 0
     554        6292 :       DO icpu = 1, ncpu
     555        4194 :          iatom_list => map_atoms_to_cpus(icpu)%iatom_list
     556        4194 :          jatom_list => map_atoms_to_cpus(icpu)%jatom_list
     557        4194 :          block_offset(icpu) = offset
     558       20948 :          DO ilist = 1, SIZE(iatom_list)
     559       14656 :             iatom = iatom_list(ilist)
     560       14656 :             ikind = kind_of(iatom)
     561       14656 :             jatom = jatom_list(ilist)
     562       14656 :             jkind = kind_of(jatom)
     563       14656 :             atomic_block_offset(iatom, jatom) = offset
     564       14656 :             atomic_block_offset(jatom, iatom) = offset
     565       18850 :             offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     566             :          END DO
     567             :       END DO
     568        2098 :       block_offset(ncpu + 1) = offset
     569      147578 :       set_offset = 0
     570        5894 :       DO ikind = 1, nkind
     571        3796 :          nseta = basis_parameter(ikind)%nset
     572        3796 :          nsgfa => basis_parameter(ikind)%nsgf
     573       14286 :          DO jkind = 1, nkind
     574        8392 :             nsetb = basis_parameter(jkind)%nset
     575        8392 :             nsgfb => basis_parameter(jkind)%nsgf
     576        8392 :             offset = 1
     577       34356 :             DO iset = 1, nseta
     578      105698 :                DO jset = 1, nsetb
     579       75138 :                   set_offset(jset, iset, jkind, ikind) = offset
     580       97306 :                   offset = offset + nsgfa(iset)*nsgfb(jset)
     581             :                END DO
     582             :             END DO
     583             :          END DO
     584             :       END DO
     585             : 
     586        2098 :       CALL timestop(handle)
     587             : 
     588        4196 :    END SUBROUTINE get_atomic_block_maps
     589             : 
     590             : END MODULE hfx_communication

Generated by: LCOV version 1.15