LCOV - code coverage report
Current view: top level - src - xas_tdp_kernel.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 382 438 87.2 %
Date: 2024-11-21 06:45:46 Functions: 14 15 93.3 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief All the kernel specific subroutines for XAS TDP calculations
      10             : !> \author A. Bussy (03.2019)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE xas_tdp_kernel
      14             :    USE basis_set_types,                 ONLY: gto_basis_set_p_type
      15             :    USE cp_dbcsr_api,                    ONLY: &
      16             :         dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      17             :         dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
      18             :         dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      19             :         dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      20             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      21             :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_reserve_block2d, dbcsr_set, &
      22             :         dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
      23             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&
      24             :                                               dbcsr_deallocate_matrix_set
      25             :    USE dbt_api,                         ONLY: dbt_get_block,&
      26             :                                               dbt_iterator_blocks_left,&
      27             :                                               dbt_iterator_next_block,&
      28             :                                               dbt_iterator_start,&
      29             :                                               dbt_iterator_stop,&
      30             :                                               dbt_iterator_type,&
      31             :                                               dbt_type
      32             :    USE distribution_2d_types,           ONLY: distribution_2d_type
      33             :    USE kinds,                           ONLY: dp
      34             :    USE message_passing,                 ONLY: mp_para_env_type
      35             :    USE particle_methods,                ONLY: get_particle_set
      36             :    USE particle_types,                  ONLY: particle_type
      37             :    USE qs_environment_types,            ONLY: get_qs_env,&
      38             :                                               qs_environment_type
      39             :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      40             :    USE qs_kind_types,                   ONLY: qs_kind_type
      41             :    USE util,                            ONLY: get_limit
      42             :    USE xas_tdp_types,                   ONLY: donor_state_type,&
      43             :                                               get_proc_batch_sizes,&
      44             :                                               xas_tdp_control_type,&
      45             :                                               xas_tdp_env_type
      46             : 
      47             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
      48             : #include "./base/base_uses.f90"
      49             : 
      50             :    IMPLICIT NONE
      51             :    PRIVATE
      52             : 
      53             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_kernel'
      54             : 
      55             :    PUBLIC :: kernel_coulomb_xc, kernel_exchange, contract2_AO_to_doMO, &
      56             :              reserve_contraction_blocks, ri_all_blocks_mm
      57             : 
      58             : CONTAINS
      59             : 
      60             : ! **************************************************************************************************
      61             : !> \brief Computes, if asked for it, the Coulomb and XC kernel matrices, in the usuall matrix format
      62             : !> \param coul_ker pointer the the Coulomb kernel matrix (can be void pointer)
      63             : !> \param xc_ker array of pointer to the different xc kernels (5 of them):
      64             : !>               1) the restricted closed-shell singlet kernel
      65             : !>               2) the restricted closed-shell triplet kernel
      66             : !>               3) the spin-conserving open-shell xc kernel
      67             : !>               4) the on-diagonal spin-flip open-shell xc kernel
      68             : !> \param donor_state ...
      69             : !> \param xas_tdp_env ...
      70             : !> \param xas_tdp_control ...
      71             : !> \param qs_env ...
      72             : !> \note Coulomb and xc kernel are put together in the same routine because they use the same RI
      73             : !>       Coulomb: (aI|Jb) = (aI|P) (P|Q)^-1 (Q|Jb)
      74             : !>       XC : (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
      75             : !>       In the above formula, a,b label the sgfs
      76             : !>       The routine analyses the xas_tdp_control to know which kernel must be computed and how
      77             : !>       (open-shell, singlet, triplet, ROKS, LSD, etc...)
      78             : !>       On entry, the pointers should be allocated
      79             : ! **************************************************************************************************
      80         112 :    SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
      81             : 
      82             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
      83             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: xc_ker
      84             :       TYPE(donor_state_type), POINTER                    :: donor_state
      85             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
      86             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
      87             :       TYPE(qs_environment_type), POINTER                 :: qs_env
      88             : 
      89             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kernel_coulomb_xc'
      90             : 
      91             :       INTEGER                                            :: batch_size, bo(2), handle, i, ibatch, &
      92             :                                                             iex, lb, natom, nbatch, ndo_mo, &
      93             :                                                             ndo_so, nex_atom, nsgfp, ri_atom, &
      94             :                                                             source, ub
      95          56 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
      96             :       LOGICAL                                            :: do_coulomb, do_sc, do_sf, do_sg, do_tp, &
      97             :                                                             do_xc, found
      98          56 :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
      99             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     100          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     101             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     102             : 
     103          56 :       NULLIFY (contr1_int, PQ, para_env, dist, blk_size)
     104             : 
     105             : !  Initialization
     106          56 :       ndo_mo = donor_state%ndo_mo
     107          56 :       do_xc = xas_tdp_control%do_xc
     108          56 :       do_sg = xas_tdp_control%do_singlet
     109          56 :       do_tp = xas_tdp_control%do_triplet
     110          56 :       do_sc = xas_tdp_control%do_spin_cons
     111          56 :       do_sf = xas_tdp_control%do_spin_flip
     112          56 :       ndo_so = ndo_mo; IF (xas_tdp_control%do_uks) ndo_so = 2*ndo_mo
     113          56 :       ri_atom = donor_state%at_index
     114          56 :       CALL get_qs_env(qs_env, natom=natom, para_env=para_env)
     115          56 :       do_coulomb = xas_tdp_control%do_coulomb
     116          56 :       dist => donor_state%dbcsr_dist
     117          56 :       blk_size => donor_state%blk_size
     118             : 
     119             : !  If no Coulomb nor xc, simply exit
     120          56 :       IF ((.NOT. do_coulomb) .AND. (.NOT. do_xc)) RETURN
     121             : 
     122          56 :       CALL timeset(routineN, handle)
     123             : 
     124             : !  Contract the RI 3-center integrals once to get (aI|P)
     125          56 :       CALL contract2_AO_to_doMO(contr1_int, "COULOMB", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     126             : 
     127             : !  Deal with the Coulomb case
     128          56 :       IF (do_coulomb) CALL coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, &
     129          56 :                                    xas_tdp_control, qs_env)
     130             : 
     131             : !  Deal with the XC case
     132          56 :       IF (do_xc) THEN
     133             : 
     134             :          ! In the end, we compute: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
     135             :          ! where fxc can take different spin contributions.
     136             : 
     137             :          ! Precompute the product (aI|P) * (P|Q)^-1 and store it in contr1_int
     138          48 :          PQ => xas_tdp_env%ri_inv_coul
     139          48 :          CALL ri_all_blocks_mm(contr1_int, PQ)
     140             : 
     141             :          ! If not already done (e.g. when multpile donor states for a given excited atom), broadcast
     142             :          ! the RI matrix (Q|fxc|R) on all procs
     143          48 :          IF (.NOT. xas_tdp_env%fxc_avail) THEN
     144             :             ! Find on which processor the integrals (Q|fxc|R) for this atom are stored
     145          46 :             nsgfp = SIZE(PQ, 1)
     146          46 :             CALL get_qs_env(qs_env, para_env=para_env)
     147          46 :             found = .FALSE.
     148          46 :             nex_atom = SIZE(xas_tdp_env%ex_atom_indices)
     149          46 :             CALL get_proc_batch_sizes(batch_size, nbatch, nex_atom, para_env%num_pe)
     150             : 
     151          54 :             DO ibatch = 0, nbatch - 1
     152             : 
     153          54 :                bo = get_limit(nex_atom, nbatch, ibatch)
     154          62 :                DO iex = bo(1), bo(2)
     155             : 
     156          62 :                   IF (xas_tdp_env%ex_atom_indices(iex) == ri_atom) THEN
     157          46 :                      source = ibatch*batch_size
     158             :                      found = .TRUE. !but simply take the first
     159             :                      EXIT
     160             :                   END IF
     161             :                END DO !iex
     162           8 :                IF (found) EXIT
     163             :             END DO !ip
     164             : 
     165             :             ! Broadcast the integrals to all procs (deleted after all donor states for this atoms are treated)
     166          46 :             lb = 1; IF (do_sf .AND. .NOT. do_sc) lb = 4
     167          46 :             ub = 2; IF (do_sc) ub = 3; IF (do_sf) ub = 4
     168         140 :             DO i = lb, ub
     169          94 :                IF (.NOT. ASSOCIATED(xas_tdp_env%ri_fxc(ri_atom, i)%array)) THEN
     170          64 :                   ALLOCATE (xas_tdp_env%ri_fxc(ri_atom, i)%array(nsgfp, nsgfp))
     171             :                END IF
     172     1157332 :                CALL para_env%bcast(xas_tdp_env%ri_fxc(ri_atom, i)%array, source)
     173             :             END DO
     174             : 
     175          46 :             xas_tdp_env%fxc_avail = .TRUE.
     176             :          END IF
     177             : 
     178             :          ! Case study on the calculation type
     179          48 :          IF (do_sg .OR. do_tp) THEN
     180             :             CALL rcs_xc(xc_ker(1)%matrix, xc_ker(2)%matrix, contr1_int, dist, blk_size, &
     181          46 :                         donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     182             :          END IF
     183             : 
     184          48 :          IF (do_sc) THEN
     185             :             CALL sc_os_xc(xc_ker(3)%matrix, contr1_int, dist, blk_size, donor_state, &
     186           2 :                           xas_tdp_env, xas_tdp_control, qs_env)
     187             :          END IF
     188             : 
     189          48 :          IF (do_sf) THEN
     190             :             CALL ondiag_sf_os_xc(xc_ker(4)%matrix, contr1_int, dist, blk_size, donor_state, &
     191           0 :                                  xas_tdp_env, xas_tdp_control, qs_env)
     192             :          END IF
     193             : 
     194             :       END IF ! do_xc
     195             : 
     196             : !  Clean-up
     197          56 :       CALL dbcsr_deallocate_matrix_set(contr1_int)
     198             : 
     199          56 :       CALL timestop(handle)
     200             : 
     201          56 :    END SUBROUTINE kernel_coulomb_xc
     202             : 
     203             : ! **************************************************************************************************
     204             : !> \brief Create the matrix containing the Coulomb kernel, which is:
     205             : !>        (aI_sigma|J_tau b) ~= (aI_sigma|P) * (P|Q) * (Q|J_tau b)
     206             : !> \param coul_ker the Coulomb kernel
     207             : !> \param contr1_int the once contracted RI integrals (aI|P)
     208             : !> \param dist the inherited dbcsr ditribution
     209             : !> \param blk_size the inherited block sizes
     210             : !> \param xas_tdp_env ...
     211             : !> \param xas_tdp_control ...
     212             : !> \param qs_env ...
     213             : ! **************************************************************************************************
     214          56 :    SUBROUTINE coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, xas_tdp_control, qs_env)
     215             : 
     216             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
     217             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     218             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     219             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     220             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     221             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     222             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     223             : 
     224             :       LOGICAL                                            :: quadrants(3)
     225             :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
     226          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     227             :       TYPE(dbcsr_type)                                   :: work_mat
     228             : 
     229          56 :       NULLIFY (PQ, rhs_int, lhs_int)
     230             : 
     231             :       ! Get the inver RI coulomb
     232          56 :       PQ => xas_tdp_env%ri_inv_coul
     233             : 
     234             :       ! Create a normal type work matrix
     235             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     236          56 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     237             : 
     238             :       ! Compute the product (aI|P) * (P|Q)^-1 * (Q|Jb) = (aI|Jb)
     239          56 :       rhs_int => contr1_int ! the incoming contr1_int is not modified
     240         244 :       ALLOCATE (lhs_int(SIZE(contr1_int)))
     241          56 :       CALL copy_ri_contr_int(lhs_int, rhs_int) ! RHS containts (Q|JB)^T
     242          56 :       CALL ri_all_blocks_mm(lhs_int, PQ) ! LHS contatins (aI|P)*(P|Q)^-1
     243             : 
     244             :       !In the special case of ROKS, same MOs for each spin => put same (aI|Jb) product on the
     245             :       !alpha-alpha, alpha-beta and beta-beta quadrants of the kernel matrix.
     246          56 :       IF (xas_tdp_control%do_roks) THEN
     247           0 :          quadrants = [.TRUE., .TRUE., .TRUE.]
     248             :       ELSE
     249          56 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     250             :       END IF
     251             :       CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     252          56 :                           eps_filter=xas_tdp_control%eps_filter)
     253          56 :       CALL dbcsr_finalize(work_mat)
     254             : 
     255             :       !Create the symmetric kernel matrix and redistribute work_mat into it
     256             :       CALL dbcsr_create(coul_ker, name="COULOMB KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &
     257          56 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     258          56 :       CALL dbcsr_complete_redistribute(work_mat, coul_ker)
     259             : 
     260             :       !clean-up
     261          56 :       CALL dbcsr_release(work_mat)
     262          56 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     263             : 
     264          56 :    END SUBROUTINE coulomb
     265             : 
     266             : ! **************************************************************************************************
     267             : !> \brief Create the matrix containing the XC kenrel in the spin-conserving open-shell case:
     268             : !>        (aI_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
     269             : !> \param xc_ker the kernel matrix
     270             : !> \param contr1_int_PQ the once contracted RI integrals, with inverse coulomb: (aI_sigma|P) (P|Q)^-1
     271             : !> \param dist inherited dbcsr dist
     272             : !> \param blk_size inherited block sizes
     273             : !> \param donor_state ...
     274             : !> \param xas_tdp_env ...
     275             : !> \param xas_tdp_control ...
     276             : !> \param qs_env ...
     277             : !> note Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
     278             : ! **************************************************************************************************
     279           2 :    SUBROUTINE sc_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
     280             :                        xas_tdp_control, qs_env)
     281             : 
     282             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
     283             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     284             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     285             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     286             :       TYPE(donor_state_type), POINTER                    :: donor_state
     287             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     288             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     289             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     290             : 
     291             :       INTEGER                                            :: ndo_mo, ri_atom
     292             :       LOGICAL                                            :: quadrants(3)
     293           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     294             :       TYPE(dbcsr_type)                                   :: work_mat
     295             : 
     296           2 :       NULLIFY (lhs_int, rhs_int)
     297             : 
     298             :       ! Initialization
     299           2 :       ndo_mo = donor_state%ndo_mo
     300           2 :       ri_atom = donor_state%at_index
     301             :       !normal type work matrix such that distribution of all spin quadrants match
     302             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     303           2 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     304             : 
     305           2 :       rhs_int => contr1_int_PQ ! contains [ (aI|P)*(P|Q)^-1 ]^T
     306          10 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! will contain (aI|P)*(P|Q)^-1 * (Q|fxc|R)
     307             : 
     308             :       ! Case study: UKS or ROKS ?
     309           2 :       IF (xas_tdp_control%do_uks) THEN
     310             : 
     311             :          ! In the case of UKS, donor MOs might be different for different spins. Moreover, the
     312             :          ! fxc itself might change since fxc = fxc_sigma,tau
     313             :          ! => Carfully treat each spin-quadrant separately
     314             : 
     315             :          ! alpha-alpha spin quadrant (upper-lefet)
     316           2 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     317             : 
     318             :          ! Copy the alpha part into lhs_int, multiply by the alpha-alpha (Q|fxc|R) and then
     319             :          ! by the alpha part of rhs_int
     320           2 :          CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
     321           2 :          CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 1)%array)
     322             :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
     323           2 :                              eps_filter=xas_tdp_control%eps_filter)
     324             : 
     325             :          ! alpha-beta spin quadrant (upper-right)
     326           2 :          quadrants = [.FALSE., .TRUE., .FALSE.]
     327             : 
     328             :          !Copy the alpha part into LHS, multiply by the alpha-beta kernel and the beta part of RHS
     329           2 :          CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
     330           2 :          CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 2)%array)
     331             :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     332           2 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     333             : 
     334             :          ! beta-beta spin quadrant (lower-right)
     335           2 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     336             : 
     337             :          !Copy the beta part into LHS, multiply by the beta-beta kernel and the beta part of RHS
     338           2 :          CALL copy_ri_contr_int(lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo))
     339           2 :          CALL ri_all_blocks_mm(lhs_int(ndo_mo + 1:2*ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 3)%array)
     340             :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     341           2 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     342             : 
     343           0 :       ELSE IF (xas_tdp_control%do_roks) THEN
     344             : 
     345             :          ! In the case of ROKS, fxc = fxc_sigma,tau is different for each spin quadrant, but the
     346             :          ! donor MOs remain the same
     347             : 
     348             :          ! alpha-alpha kernel in the upper left quadrant
     349           0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     350             : 
     351             :          !Copy the LHS and multiply by alpha-alpha kernel
     352           0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     353           0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 1)%array)
     354             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     355           0 :                              eps_filter=xas_tdp_control%eps_filter)
     356             : 
     357             :          ! alpha-beta kernel in the upper-right quadrant
     358           0 :          quadrants = [.FALSE., .TRUE., .FALSE.]
     359             : 
     360             :          !Copy LHS and multiply by the alpha-beta kernel
     361           0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     362           0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 2)%array)
     363             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     364           0 :                              eps_filter=xas_tdp_control%eps_filter)
     365             : 
     366             :          ! beta-beta kernel in the lower-right quadrant
     367           0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     368             : 
     369             :          !Copy the LHS and multiply by the beta-beta kernel
     370           0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     371           0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 3)%array)
     372             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     373           0 :                              eps_filter=xas_tdp_control%eps_filter)
     374             : 
     375             :       END IF
     376           2 :       CALL dbcsr_finalize(work_mat)
     377             : 
     378             :       ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
     379             :       CALL dbcsr_create(xc_ker, name="SC OS XC KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &
     380           2 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     381           2 :       CALL dbcsr_complete_redistribute(work_mat, xc_ker)
     382             : 
     383             :       !clean-up
     384           2 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     385           2 :       CALL dbcsr_release(work_mat)
     386             : 
     387           2 :    END SUBROUTINE sc_os_xc
     388             : 
     389             : ! **************************************************************************************************
     390             : !> \brief Create the matrix containing the on-diagonal spin-flip XC kernel (open-shell), which is:
     391             : !>        (a I_sigma|fxc|J_tau b) * delta_sigma,tau, fxc = 1/(rhoa-rhob) * (dE/drhoa - dE/drhob)
     392             : !>        with RI: (a I_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
     393             : !> \param xc_ker the kernel matrix
     394             : !> \param contr1_int_PQ the once contracted RI integrals with coulomb product: (aI_sigma|P) (P|Q)^-1
     395             : !> \param dist inherited dbcsr dist
     396             : !> \param blk_size inherited block sizes
     397             : !> \param donor_state ...
     398             : !> \param xas_tdp_env ...
     399             : !> \param xas_tdp_control ...
     400             : !> \param qs_env ...
     401             : !> \note  It must be later on multiplied by the spin-swapped Q projector
     402             : !>        Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
     403             : ! **************************************************************************************************
     404           0 :    SUBROUTINE ondiag_sf_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
     405             :                               xas_tdp_control, qs_env)
     406             : 
     407             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
     408             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     409             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     410             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     411             :       TYPE(donor_state_type), POINTER                    :: donor_state
     412             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     413             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     414             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     415             : 
     416             :       INTEGER                                            :: ndo_mo, ri_atom
     417             :       LOGICAL                                            :: quadrants(3)
     418           0 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     419             :       TYPE(dbcsr_type)                                   :: work_mat
     420             : 
     421           0 :       NULLIFY (lhs_int, rhs_int)
     422             : 
     423             :       ! Initialization
     424           0 :       ndo_mo = donor_state%ndo_mo
     425           0 :       ri_atom = donor_state%at_index
     426             :       !normal type work matrix such that distribution of all spin quadrants match
     427             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     428           0 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     429             : 
     430             :       !Create a lhs_int, in which the whole (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) will be put
     431             :       !because in spin-flip fxc is spin-independent, can take the product once and for all
     432           0 :       rhs_int => contr1_int_PQ
     433           0 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ)))
     434           0 :       CALL copy_ri_contr_int(lhs_int, rhs_int)
     435           0 :       CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 4)%array)
     436             : 
     437             :       ! Case study: UKS or ROKS ?
     438           0 :       IF (xas_tdp_control%do_uks) THEN
     439             : 
     440             :          ! In the case of UKS, donor MOs might be different for different spins
     441             :          ! => Carfully treat each spin-quadrant separately
     442             :          ! NO alpha-beta because of the delta_sigma,tau
     443             : 
     444             :          ! alpha-alpha spin quadrant (upper-lefet)
     445           0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     446             :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
     447           0 :                              eps_filter=xas_tdp_control%eps_filter)
     448             : 
     449             :          ! beta-beta spin quadrant (lower-right)
     450           0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     451             :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     452           0 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     453             : 
     454           0 :       ELSE IF (xas_tdp_control%do_roks) THEN
     455             : 
     456             :          ! In the case of ROKS, same donor MOs for both spins => can do it all at once
     457             :          ! But NOT the alpha-beta quadrant because of delta_sigma,tau
     458             : 
     459           0 :          quadrants = [.TRUE., .FALSE., .TRUE.]
     460             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     461           0 :                              eps_filter=xas_tdp_control%eps_filter)
     462             : 
     463             :       END IF
     464           0 :       CALL dbcsr_finalize(work_mat)
     465             : 
     466             :       ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
     467             :       CALL dbcsr_create(xc_ker, name="ON-DIAG SF OS XC KERNEL", matrix_type=dbcsr_type_symmetric, &
     468           0 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     469           0 :       CALL dbcsr_complete_redistribute(work_mat, xc_ker)
     470             : 
     471             :       !clean-up
     472           0 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     473           0 :       CALL dbcsr_release(work_mat)
     474             : 
     475           0 :    END SUBROUTINE ondiag_sf_os_xc
     476             : 
     477             : ! **************************************************************************************************
     478             : !> \brief Create the matrix containing the XC kernel in the restricted closed-shell case, for
     479             : !>        singlets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp+fxc_alp,bet|R) (R|S)^-1 (S|Jb)
     480             : !>        triplets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp-fxc_alp,bet|R) (R|S)^-1 (S|Jb)
     481             : !> \param sg_xc_ker the singlet kernel matrix
     482             : !> \param tp_xc_ker the triplet kernel matrix
     483             : !> \param contr1_int_PQ the once contracted RI integrals including inverse 2-center Coulomb prodcut:
     484             : !>                      (aI|P)*(P|Q)^-1
     485             : !> \param dist inherited dbcsr dist
     486             : !> \param blk_size inherited block sizes
     487             : !> \param donor_state ...
     488             : !> \param xas_tdp_env ...
     489             : !> \param xas_tdp_control ...
     490             : !> \param qs_env ...
     491             : ! **************************************************************************************************
     492          46 :    SUBROUTINE rcs_xc(sg_xc_ker, tp_xc_ker, contr1_int_PQ, dist, blk_size, donor_state, &
     493             :                      xas_tdp_env, xas_tdp_control, qs_env)
     494             : 
     495             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: sg_xc_ker, tp_xc_ker
     496             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     497             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     498             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     499             :       TYPE(donor_state_type), POINTER                    :: donor_state
     500             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     501             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     502             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     503             : 
     504             :       INTEGER                                            :: nsgfp, ri_atom
     505             :       LOGICAL                                            :: quadrants(3)
     506             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc
     507          46 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     508             :       TYPE(dbcsr_type)                                   :: work_mat
     509             : 
     510          46 :       NULLIFY (lhs_int, rhs_int)
     511             : 
     512             :       ! Initialization
     513          46 :       ri_atom = donor_state%at_index
     514          46 :       nsgfp = SIZE(xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1)
     515          46 :       rhs_int => contr1_int_PQ ! RHS contains [ (aI|P)*(P|Q)^-1 ]^T
     516         184 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! LHS will contatin (aI|P)*(P|Q)^-1 * (Q|fxc|R)
     517             : 
     518             :       ! Work structures
     519         184 :       ALLOCATE (fxc(nsgfp, nsgfp))
     520             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     521          46 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     522             : 
     523             :       ! Case study: singlet and/or triplet ?
     524          46 :       IF (xas_tdp_control%do_singlet) THEN
     525             : 
     526             :          ! Take the sum of fxc for alpha-alpha and alpha-beta
     527          46 :          CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
     528          46 :          CALL daxpy(nsgfp*nsgfp, 1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
     529             : 
     530             :          ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
     531          46 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     532          46 :          CALL ri_all_blocks_mm(lhs_int, fxc)
     533             : 
     534             :          ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
     535          46 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     536             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     537          46 :                              eps_filter=xas_tdp_control%eps_filter)
     538          46 :          CALL dbcsr_finalize(work_mat)
     539             : 
     540             :          !Create the symmetric kernel matrix and redistribute work_mat into it
     541             :          CALL dbcsr_create(sg_xc_ker, name="XC SINGLET KERNEL", matrix_type=dbcsr_type_symmetric, &
     542          46 :                            dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     543          46 :          CALL dbcsr_complete_redistribute(work_mat, sg_xc_ker)
     544             : 
     545             :       END IF
     546             : 
     547          46 :       IF (xas_tdp_control%do_triplet) THEN
     548             : 
     549             :          ! Take the difference of fxc for alpha-alpha and alpha-beta
     550           0 :          CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
     551           0 :          CALL daxpy(nsgfp*nsgfp, -1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
     552             : 
     553             :          ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
     554           0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     555           0 :          CALL ri_all_blocks_mm(lhs_int, fxc)
     556             : 
     557             :          ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
     558           0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     559             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     560           0 :                              eps_filter=xas_tdp_control%eps_filter)
     561           0 :          CALL dbcsr_finalize(work_mat)
     562             : 
     563             :          !Create the symmetric kernel matrix and redistribute work_mat into it
     564             :          CALL dbcsr_create(tp_xc_ker, name="XC TRIPLET KERNEL", matrix_type=dbcsr_type_symmetric, &
     565           0 :                            dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     566           0 :          CALL dbcsr_complete_redistribute(work_mat, tp_xc_ker)
     567             : 
     568             :       END IF
     569             : 
     570             :       ! clean-up
     571          46 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     572          46 :       CALL dbcsr_release(work_mat)
     573          46 :       DEALLOCATE (fxc)
     574             : 
     575          46 :    END SUBROUTINE rcs_xc
     576             : 
     577             : ! **************************************************************************************************
     578             : !> \brief Computes the exact exchange kernel matrix using RI. Returns an array of 2 matrices,
     579             : !>        which are:
     580             : !>        1) the on-diagonal kernel: (ab|I_sigma J_tau) * delta_sigma,tau
     581             : !>        2) the off-diagonal spin-conserving kernel: (aJ_sigma|I_tau b) * delta_sigma,tau
     582             : !>        An internal analysis determines which of the above are computed (can range from 0 to 2),
     583             : !> \param ex_ker ...
     584             : !> \param donor_state ...
     585             : !> \param xas_tdp_env ...
     586             : !> \param xas_tdp_control ...
     587             : !> \param qs_env ...
     588             : !> \note In the case of spin-conserving excitation, the kernel must later be multiplied by the
     589             : !>       usual Q projector. In the case of spin-flip, one needs to project the excitations coming
     590             : !>       from alpha donor MOs on the unoccupied beta MOs. This is done by multiplying by a Q
     591             : !>       projector where the alpha-alpha and beta-beta quadrants are swapped
     592             : !>       The ex_ker array should be allocated on entry (not the internals)
     593             : ! **************************************************************************************************
     594          98 :    SUBROUTINE kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     595             : 
     596             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker
     597             :       TYPE(donor_state_type), POINTER                    :: donor_state
     598             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     599             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     600             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     601             : 
     602             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kernel_exchange'
     603             : 
     604             :       INTEGER                                            :: handle
     605          56 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     606             :       LOGICAL                                            :: do_off_sc
     607             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     608          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     609             : 
     610          56 :       NULLIFY (contr1_int, dist, blk_size)
     611             : 
     612             :       !Don't do anything if no hfx
     613          14 :       IF (.NOT. xas_tdp_control%do_hfx) RETURN
     614             : 
     615          42 :       CALL timeset(routineN, handle)
     616             : 
     617          42 :       dist => donor_state%dbcsr_dist
     618          42 :       blk_size => donor_state%blk_size
     619             : 
     620             :       !compute the off-diag spin-conserving only if not TDA and anything that is spin-conserving
     621             :       do_off_sc = (.NOT. xas_tdp_control%tamm_dancoff) .AND. &
     622          42 :                   (xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet)
     623             : 
     624             :       ! Need the once contracted integrals (aI|P)
     625          42 :       CALL contract2_AO_to_doMO(contr1_int, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     626             : 
     627             : !  The on-diagonal exchange : (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau) * delta_sigma,tau
     628             :       CALL ondiag_ex(ex_ker(1)%matrix, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     629          42 :                      xas_tdp_control, qs_env)
     630             : 
     631             : !  The off-diag spin-conserving case: (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b) * delta_sigma,tau
     632          42 :       IF (do_off_sc) THEN
     633             :          CALL offdiag_ex_sc(ex_ker(2)%matrix, contr1_int, dist, blk_size, donor_state, &
     634           6 :                             xas_tdp_env, xas_tdp_control, qs_env)
     635             :       END IF
     636             : 
     637             :       !clean-up
     638          42 :       CALL dbcsr_deallocate_matrix_set(contr1_int)
     639             : 
     640          42 :       CALL timestop(handle)
     641             : 
     642          56 :    END SUBROUTINE kernel_exchange
     643             : 
     644             : ! **************************************************************************************************
     645             : !> \brief Create the matrix containing the on-diagonal exact exchange kernel, which is:
     646             : !>        (ab|I_sigma J_tau) * delta_sigma,tau, where a,b are AOs, I_sigma and J_tau are the donor
     647             : !>        spin-orbitals. A RI is done: (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
     648             : !> \param ondiag_ex_ker the on-diagonal exchange kernel in dbcsr format
     649             : !> \param contr1_int the already once-contracted RI 3-center integrals (aI_sigma|P)
     650             : !>        where each matrix of the array contains the contraction for the donor spin-orbital I_sigma
     651             : !> \param dist the inherited dbcsr distribution
     652             : !> \param blk_size the inherited dbcsr block sizes
     653             : !> \param donor_state ...
     654             : !> \param xas_tdp_env ...
     655             : !> \param xas_tdp_control ...
     656             : !> \param qs_env ...
     657             : !> \note In the presence of a RI metric, we have instead M^-1 * (P|Q) * M^-1
     658             : ! **************************************************************************************************
     659          42 :    SUBROUTINE ondiag_ex(ondiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     660             :                         xas_tdp_control, qs_env)
     661             : 
     662             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: ondiag_ex_ker
     663             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     664             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     665             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     666             :       TYPE(donor_state_type), POINTER                    :: donor_state
     667             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     668             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     669             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     670             : 
     671             :       INTEGER                                            :: blk, group, iblk, iso, jblk, jso, nblk, &
     672             :                                                             ndo_mo, ndo_so, nsgfa, nsgfp, ri_atom, &
     673             :                                                             source
     674          42 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_work, row_dist, &
     675          42 :                                                             row_dist_work
     676          42 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     677             :       LOGICAL                                            :: do_roks, do_uks, found
     678          42 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: coeffs, ri_coeffs
     679          42 :       REAL(dp), DIMENSION(:, :), POINTER                 :: aIQ, pblock, PQ
     680             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist, work_dbcsr_dist
     681             :       TYPE(dbcsr_iterator_type)                          :: iter
     682          42 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
     683             :       TYPE(dbcsr_type)                                   :: abIJ, mats_desymm, work_mat
     684             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     685             : 
     686          42 :       NULLIFY (para_env, matrix_s, pblock, aIQ, row_dist, col_dist, row_dist_work, col_dist_work, pgrid)
     687             : 
     688             :       ! We want to compute (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
     689             :       ! Already have (cJ_tau|P)  stored in contr1_int. Need to further contract the
     690             :       ! AOs with the coeff of the I_alpha spin-orbital.
     691             : 
     692             :       ! Initialization
     693          42 :       ndo_mo = donor_state%ndo_mo
     694          42 :       ri_atom = donor_state%at_index
     695          42 :       do_roks = xas_tdp_control%do_roks
     696          42 :       do_uks = xas_tdp_control%do_uks
     697           6 :       ndo_so = ndo_mo; IF (do_uks) ndo_so = 2*ndo_mo !if not UKS, same donor MOs for both spins
     698          42 :       PQ => xas_tdp_env%ri_inv_ex
     699             : 
     700          42 :       CALL get_qs_env(qs_env, para_env=para_env, matrix_s=matrix_s, natom=nblk)
     701          42 :       nsgfp = SIZE(PQ, 1)
     702          42 :       nsgfa = SIZE(donor_state%contract_coeffs, 1)
     703         252 :       ALLOCATE (coeffs(nsgfp, ndo_so), ri_coeffs(nsgfp, ndo_so))
     704             : 
     705             :       ! a and b need to overlap for non-zero (ab|IJ) => same block structure as overlap S
     706             :       ! need compatible distribution_2d with 3c tensor + normal type
     707          42 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
     708             : 
     709          42 :       CALL dbcsr_desymmetrize(matrix_s(1)%matrix, mats_desymm)
     710             : 
     711          42 :       CALL dbcsr_create(abIJ, template=mats_desymm, name="(ab|IJ)", dist=opt_dbcsr_dist)
     712          42 :       CALL dbcsr_complete_redistribute(mats_desymm, abIJ)
     713             : 
     714          42 :       CALL dbcsr_release(mats_desymm)
     715             : 
     716             :       ! Create a work distribution based on opt_dbcsr_dist, but for full size matrices
     717             :       CALL dbcsr_distribution_get(opt_dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
     718          42 :                                   pgrid=pgrid)
     719             : 
     720         126 :       ALLOCATE (row_dist_work(ndo_so*nblk))
     721          84 :       ALLOCATE (col_dist_work(ndo_so*nblk))
     722         102 :       DO iso = 1, ndo_so
     723         484 :          row_dist_work((iso - 1)*nblk + 1:iso*nblk) = row_dist(:)
     724         526 :          col_dist_work((iso - 1)*nblk + 1:iso*nblk) = col_dist(:)
     725             :       END DO
     726             : 
     727             :       CALL dbcsr_distribution_new(work_dbcsr_dist, group=group, pgrid=pgrid, row_dist=row_dist_work, &
     728          42 :                                   col_dist=col_dist_work)
     729             : 
     730             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=work_dbcsr_dist, &
     731          42 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     732             : 
     733             :       ! Loop over donor spin-orbitals. End matrix is symmetric => span only upper half
     734         102 :       DO iso = 1, ndo_so
     735             : 
     736             :          ! take the (aI|Q) block in contr1_int that has a centered on the excited atom
     737          60 :          CALL dbcsr_get_stored_coordinates(contr1_int(iso)%matrix, ri_atom, ri_atom, source)
     738          60 :          IF (para_env%mepos == source) THEN
     739          30 :             CALL dbcsr_get_block_p(contr1_int(iso)%matrix, ri_atom, ri_atom, aIQ, found)
     740             :          ELSE
     741         120 :             ALLOCATE (aIQ(nsgfa, nsgfp))
     742             :          END IF
     743      166224 :          CALL para_env%bcast(aIQ, source)
     744             : 
     745             :          ! get the contraction (Q|IJ) by taking (Q|Ia)*contract_coeffs and put it in coeffs
     746             :          CALL dgemm('T', 'N', nsgfp, ndo_so, nsgfa, 1.0_dp, aIQ, nsgfa, donor_state%contract_coeffs, &
     747          60 :                     nsgfa, 0.0_dp, coeffs, nsgfp)
     748             : 
     749             :          ! take (P|Q)^-1 * (Q|IJ) and put that in ri_coeffs
     750             :          CALL dgemm('N', 'N', nsgfp, ndo_so, nsgfp, 1.0_dp, PQ, nsgfp, coeffs, nsgfp, 0.0_dp, &
     751          60 :                     ri_coeffs, nsgfp)
     752             : 
     753          60 :          IF (.NOT. para_env%mepos == source) DEALLOCATE (aIQ)
     754             : 
     755         262 :          DO jso = iso, ndo_so
     756             : 
     757             :             ! There is no alpha-beta exchange. In case of UKS, iso,jso span all spin-orbitals
     758             :             ! => CYCLE if iso and jso are indexing MOs with different spin (and we have UKS)
     759         100 :             IF (do_uks .AND. (iso <= ndo_mo .AND. jso > ndo_mo)) CYCLE
     760             : 
     761             :             ! compute (ab|IJ) = sum_P (ab|P) * (P|Q)^-1 * (Q|IJ)
     762          78 :             CALL dbcsr_set(abIJ, 0.0_dp)
     763          78 :             CALL contract3_RI_to_doMOs(xas_tdp_env%ri_3c_ex, ri_coeffs(:, jso), abIJ, ri_atom)
     764             : 
     765             :             ! Loop over (ab|IJ) and copy into work. OK because dist are made to match
     766          78 :             CALL dbcsr_iterator_start(iter, abIJ)
     767         623 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
     768             : 
     769         545 :                CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
     770         545 :                IF (iso == jso .AND. jblk < iblk) CYCLE
     771             : 
     772         330 :                CALL dbcsr_get_block_p(abIJ, iblk, jblk, pblock, found)
     773             : 
     774         408 :                IF (found) THEN
     775         330 :                   CALL dbcsr_put_block(work_mat, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
     776             : 
     777             :                   !In case of ROKS, we have (ab|IJ) for alpha-alpha spin, but it is the same for
     778             :                   !beta-beta => replicate the blocks (alpha-beta is zero)
     779         330 :                   IF (do_roks) THEN
     780             :                      !the beta-beta block
     781             :                      CALL dbcsr_put_block(work_mat, (ndo_so + iso - 1)*nblk + iblk, &
     782           0 :                                           (ndo_so + jso - 1)*nblk + jblk, pblock)
     783             :                   END IF
     784             :                END IF
     785             : 
     786             :             END DO !iterator
     787         238 :             CALL dbcsr_iterator_stop(iter)
     788             : 
     789             :          END DO !jso
     790             :       END DO !iso
     791             : 
     792          42 :       CALL dbcsr_finalize(work_mat)
     793             :       CALL dbcsr_create(ondiag_ex_ker, name="ONDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &
     794          42 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     795          42 :       CALL dbcsr_complete_redistribute(work_mat, ondiag_ex_ker)
     796             : 
     797             :       !Clean-up
     798          42 :       CALL dbcsr_release(work_mat)
     799          42 :       CALL dbcsr_release(abIJ)
     800          42 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
     801          42 :       CALL dbcsr_distribution_release(work_dbcsr_dist)
     802          42 :       DEALLOCATE (col_dist_work, row_dist_work)
     803             : 
     804         210 :    END SUBROUTINE ondiag_ex
     805             : 
     806             : ! **************************************************************************************************
     807             : !> \brief Create the matrix containing the off-diagonal exact exchange kernel in the spin-conserving
     808             : !>        case (which also includes excitations from the closed=shell ref state ) This matrix reads:
     809             : !>        (aJ_sigma|I_tau b) * delta_sigma,tau , where a, b are AOs and J_sigma, I_tau are the donor
     810             : !>        spin-orbital. A RI is done: (aJ_sigma|I_tau b) = (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b)
     811             : !> \param offdiag_ex_ker the off-diagonal, spin-conserving exchange kernel in dbcsr format
     812             : !> \param contr1_int the once-contracted RI integrals: (aJ_sigma|P)
     813             : !> \param dist the inherited dbcsr ditribution
     814             : !> \param blk_size the inherited block sizes
     815             : !> \param donor_state ...
     816             : !> \param xas_tdp_env ...
     817             : !> \param xas_tdp_control ...
     818             : !> \param qs_env ...
     819             : ! **************************************************************************************************
     820           6 :    SUBROUTINE offdiag_ex_sc(offdiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     821             :                             xas_tdp_control, qs_env)
     822             : 
     823             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: offdiag_ex_ker
     824             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     825             :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     826             :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     827             :       TYPE(donor_state_type), POINTER                    :: donor_state
     828             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     829             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     830             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     831             : 
     832             :       INTEGER                                            :: ndo_mo
     833             :       LOGICAL                                            :: do_roks, do_uks, quadrants(3)
     834             :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
     835           6 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     836             :       TYPE(dbcsr_type)                                   :: work_mat
     837             : 
     838           6 :       NULLIFY (PQ, lhs_int, rhs_int)
     839             : 
     840             :       !Initialization
     841           6 :       ndo_mo = donor_state%ndo_mo
     842           6 :       do_roks = xas_tdp_control%do_roks
     843           6 :       do_uks = xas_tdp_control%do_uks
     844           6 :       PQ => xas_tdp_env%ri_inv_ex
     845             : 
     846           6 :       rhs_int => contr1_int
     847          24 :       ALLOCATE (lhs_int(SIZE(contr1_int)))
     848           6 :       CALL copy_ri_contr_int(lhs_int, rhs_int)
     849           6 :       CALL ri_all_blocks_mm(lhs_int, PQ)
     850             : 
     851             :       !Given the lhs_int and rhs_int, all we need to do is multiply elements from the former by
     852             :       !the transpose of the later, and put the result in the correct spin quadrants
     853             : 
     854             :       !Create a normal type work matrix
     855             :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     856           6 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     857             : 
     858             :       !Case study on closed-shell, ROKS or UKS
     859           6 :       IF (do_roks) THEN
     860             :          !In ROKS, the donor MOs for each spin are the same => copy the product in both the
     861             :          !alpha-alpha and the beta-beta quadrants
     862           0 :          quadrants = [.TRUE., .FALSE., .TRUE.]
     863             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     864           0 :                              eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     865             : 
     866           6 :       ELSE IF (do_uks) THEN
     867             :          !In UKS, the donor MOs are possibly different for each spin => start with the
     868             :          !alpha-alpha product and the perform the beta-beta product separately
     869           0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     870             :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, &
     871           0 :                              qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     872             : 
     873           0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     874             :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     875           0 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     876             :       ELSE
     877             :          !In the restricted closed-shell case, only have one spin and a single qudarant
     878           6 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     879             :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     880           6 :                              eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     881             :       END IF
     882           6 :       CALL dbcsr_finalize(work_mat)
     883             : 
     884             :       !Create the symmetric kernel matrix and redistribute work_mat into it
     885             :       CALL dbcsr_create(offdiag_ex_ker, name="OFFDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &
     886           6 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     887           6 :       CALL dbcsr_complete_redistribute(work_mat, offdiag_ex_ker)
     888             : 
     889             :       !clean-up
     890           6 :       CALL dbcsr_release(work_mat)
     891           6 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     892             : 
     893           6 :    END SUBROUTINE offdiag_ex_sc
     894             : 
     895             : ! **************************************************************************************************
     896             : !> \brief Reserves the blocks in of a dbcsr matrix as needed for RI 3-center contraction (aI|P)
     897             : !> \param matrices the matrices for which blocks are reserved
     898             : !> \param ri_atom the index of the atom on which RI is done (= all coeffs of I are there, and P too)
     899             : !> \param qs_env ...
     900             : !> \note the end product are normal type matrices that are possibly slightly spraser as matrix_s
     901             : ! **************************************************************************************************
     902         396 :    SUBROUTINE reserve_contraction_blocks(matrices, ri_atom, qs_env)
     903             : 
     904             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices
     905             :       INTEGER, INTENT(IN)                                :: ri_atom
     906             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     907             : 
     908             :       INTEGER                                            :: blk, i, iblk, jblk, n
     909             :       LOGICAL                                            :: found
     910         132 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock_m, pblock_s
     911             :       TYPE(dbcsr_distribution_type)                      :: dist
     912             :       TYPE(dbcsr_iterator_type)                          :: iter
     913         132 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
     914             :       TYPE(dbcsr_type)                                   :: template, work
     915             : 
     916         132 :       NULLIFY (matrix_s, pblock_s, pblock_m)
     917             : 
     918             : !  Initialization
     919         132 :       CALL get_qs_env(qs_env, matrix_s=matrix_s)
     920         132 :       n = SIZE(matrices)
     921         132 :       CALL dbcsr_get_info(matrices(1)%matrix, distribution=dist)
     922             : 
     923             : !  Need to redistribute matrix_s in the distribution of matrices
     924         132 :       CALL dbcsr_create(work, template=matrix_s(1)%matrix, dist=dist)
     925         132 :       CALL dbcsr_complete_redistribute(matrix_s(1)%matrix, work)
     926             : 
     927             : !  Need to desymmetrize matrix as as a template
     928         132 :       CALL dbcsr_desymmetrize(work, template)
     929             : 
     930             : !  Loop over matrix_s as need a,b to overlap
     931         132 :       CALL dbcsr_iterator_start(iter, template)
     932       19338 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     933             : 
     934       19206 :          CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
     935             :          !only have a,b pair if one of them is the ri_atom
     936       19206 :          IF (iblk .NE. ri_atom .AND. jblk .NE. ri_atom) CYCLE
     937             : 
     938         800 :          CALL dbcsr_get_block_p(template, iblk, jblk, pblock_s, found)
     939             : 
     940         932 :          IF (found) THEN
     941        2400 :             DO i = 1, n
     942        1600 :                NULLIFY (pblock_m)
     943        1600 :                CALL dbcsr_reserve_block2d(matrices(i)%matrix, iblk, jblk, pblock_m)
     944      854317 :                pblock_m = 0.0_dp
     945             :             END DO
     946             :          END IF
     947             : 
     948             :       END DO !dbcsr iter
     949         132 :       CALL dbcsr_iterator_stop(iter)
     950         396 :       DO i = 1, n
     951         396 :          CALL dbcsr_finalize(matrices(i)%matrix)
     952             :       END DO
     953             : 
     954             : !  Clean-up
     955         132 :       CALL dbcsr_release(template)
     956         132 :       CALL dbcsr_release(work)
     957             : 
     958         132 :    END SUBROUTINE reserve_contraction_blocks
     959             : 
     960             : ! **************************************************************************************************
     961             : !> \brief Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,
     962             : !>        for a given excited atom k => (aI|k) = sum_b c_Ib (ab|k)
     963             : !> \param contr_int the contracted integrals as array of dbcsr matrices
     964             : !> \param op_type for which operator type we contract (COULOMB or EXCHANGE)
     965             : !> \param donor_state ...
     966             : !> \param xas_tdp_env ...
     967             : !> \param xas_tdp_control ...
     968             : !> \param qs_env ...
     969             : !> \note  In the output matrices, (aI_b|k) is stored at block a,b where I_b is the partial
     970             : !>        contraction that only includes coeffs from atom b. Note that the contracted matrix is
     971             : !>        not symmetric. To get the fully contracted matrix over b, one need to add the block
     972             : !>        columns of (aI_b|k) (get an array of size nao*nsgfp). This step is unnessary in our case
     973             : !>        because we assume locality of donor state, and only one column od (aI_b|k) is pouplated
     974             : ! **************************************************************************************************
     975         132 :    SUBROUTINE contract2_AO_to_doMO(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     976             : 
     977             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr_int
     978             :       CHARACTER(len=*), INTENT(IN)                       :: op_type
     979             :       TYPE(donor_state_type), POINTER                    :: donor_state
     980             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     981             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     982             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     983             : 
     984             :       CHARACTER(len=*), PARAMETER :: routineN = 'contract2_AO_to_doMO'
     985             : 
     986             :       INTEGER                                            :: handle, i, imo, ispin, katom, kkind, &
     987             :                                                             natom, ndo_mo, ndo_so, nkind, nspins
     988         132 :       INTEGER, DIMENSION(:), POINTER                     :: ri_blk_size, std_blk_size
     989             :       LOGICAL                                            :: do_uks
     990         132 :       REAL(dp), DIMENSION(:, :), POINTER                 :: coeffs
     991             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
     992         132 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices, matrix_s
     993             :       TYPE(dbcsr_type), POINTER                          :: aI_P, P_Ib, work
     994             :       TYPE(dbt_type), POINTER                            :: pq_X
     995             :       TYPE(distribution_2d_type), POINTER                :: opt_dist2d
     996         132 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: ri_basis
     997             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     998         132 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     999         132 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1000             : 
    1001         132 :       NULLIFY (matrix_s, std_blk_size, ri_blk_size, qs_kind_set, ri_basis, pq_X)
    1002         132 :       NULLIFY (aI_P, P_Ib, work, matrices, coeffs, opt_dist2d, particle_set)
    1003             : 
    1004         132 :       CALL timeset(routineN, handle)
    1005             : 
    1006             : !  Initialization
    1007         132 :       CALL get_qs_env(qs_env, natom=natom, matrix_s=matrix_s, qs_kind_set=qs_kind_set, para_env=para_env)
    1008         132 :       ndo_mo = donor_state%ndo_mo
    1009         132 :       kkind = donor_state%kind_index
    1010         132 :       katom = donor_state%at_index
    1011             :       !by default contract for Coulomb
    1012         132 :       pq_X => xas_tdp_env%ri_3c_coul
    1013         132 :       opt_dist2d => xas_tdp_env%opt_dist2d_coul
    1014         132 :       IF (op_type == "EXCHANGE") THEN
    1015          76 :          CPASSERT(ASSOCIATED(xas_tdp_env%ri_3c_ex))
    1016          76 :          pq_X => xas_tdp_env%ri_3c_ex
    1017          76 :          opt_dist2d => xas_tdp_env%opt_dist2d_ex
    1018             :       END IF
    1019         132 :       do_uks = xas_tdp_control%do_uks
    1020         132 :       nspins = 1; IF (do_uks) nspins = 2
    1021         132 :       ndo_so = nspins*ndo_mo
    1022             : 
    1023             : !  contracted integrals block sizes
    1024         132 :       CALL dbcsr_get_info(matrix_s(1)%matrix, col_blk_size=std_blk_size)
    1025             :       ! getting the block dimensions for the RI basis
    1026         132 :       CALL get_qs_env(qs_env, particle_set=particle_set, nkind=nkind)
    1027         876 :       ALLOCATE (ri_basis(nkind), ri_blk_size(natom))
    1028         132 :       CALL basis_set_list_setup(ri_basis, "RI_XAS", qs_kind_set)
    1029         132 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=ri_blk_size, basis=ri_basis)
    1030             : 
    1031             : !  Create  work matrices. Everything that goes into a 3c routine must be compatible with the optimal dist_2d
    1032         132 :       CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)
    1033             : 
    1034         396 :       ALLOCATE (aI_P, P_Ib, work, matrices(2))
    1035             :       CALL dbcsr_create(aI_P, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(aI|P)", &
    1036         132 :                         row_blk_size=std_blk_size, col_blk_size=ri_blk_size)
    1037             : 
    1038             :       CALL dbcsr_create(P_Ib, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(P|Ib)", &
    1039         132 :                         row_blk_size=ri_blk_size, col_blk_size=std_blk_size)
    1040             : 
    1041             :       !reserve the blocks (needed for 3c contraction routines)
    1042         132 :       matrices(1)%matrix => aI_P; matrices(2)%matrix => P_Ib
    1043         132 :       CALL reserve_contraction_blocks(matrices, katom, qs_env)
    1044         132 :       DEALLOCATE (matrices)
    1045             : 
    1046             :       ! Create the contracted integral matrices
    1047         582 :       ALLOCATE (contr_int(ndo_so))
    1048         318 :       DO i = 1, ndo_so
    1049         186 :          ALLOCATE (contr_int(i)%matrix)
    1050             :          CALL dbcsr_create(matrix=contr_int(i)%matrix, template=matrix_s(1)%matrix, &
    1051             :                            matrix_type=dbcsr_type_no_symmetry, row_blk_size=std_blk_size, &
    1052         318 :                            col_blk_size=ri_blk_size)
    1053             :       END DO
    1054             : 
    1055             :       ! Only take the coeffs for atom on which MOs I,J are localized
    1056         132 :       coeffs => donor_state%contract_coeffs
    1057             : 
    1058         286 :       DO ispin = 1, nspins
    1059             : 
    1060             : !     Loop over the donor MOs and contract
    1061         472 :          DO imo = 1, ndo_mo
    1062             : 
    1063             :             ! do the contraction
    1064         186 :             CALL dbcsr_set(aI_P, 0.0_dp); CALL dbcsr_set(P_Ib, 0.0_dp)
    1065         186 :             CALL contract2_AO_to_doMO_low(pq_X, coeffs(:, (ispin - 1)*ndo_mo + imo), aI_P, P_Ib, katom)
    1066             : 
    1067             :             ! Get the full (aI|P) contracted integrals
    1068         186 :             CALL dbcsr_transposed(work, P_Ib)
    1069         186 :             CALL dbcsr_add(work, aI_P, 1.0_dp, 1.0_dp)
    1070         186 :             CALL dbcsr_complete_redistribute(work, contr_int((ispin - 1)*ndo_mo + imo)%matrix)
    1071         186 :             CALL dbcsr_filter(contr_int((ispin - 1)*ndo_mo + imo)%matrix, 1.0E-16_dp)
    1072             : 
    1073         340 :             CALL dbcsr_release(work)
    1074             :          END DO !imo
    1075             :       END DO !ispin
    1076             : 
    1077             : !  Clean-up
    1078         132 :       CALL dbcsr_release(aI_P)
    1079         132 :       CALL dbcsr_release(P_Ib)
    1080         132 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1081         132 :       DEALLOCATE (ri_blk_size, aI_P, P_Ib, work, ri_basis)
    1082             : 
    1083         132 :       CALL timestop(handle)
    1084             : 
    1085         264 :    END SUBROUTINE contract2_AO_to_doMO
    1086             : 
    1087             : ! **************************************************************************************************
    1088             : !> \brief Contraction of the 3-center integrals (ab|Q) over the RI basis elements Q to get donor MOS
    1089             : !>        => (ab|IJ) = sum_X (ab|Q) coeffs_Q
    1090             : !> \param ab_Q the tensor holding the integrals
    1091             : !> \param vec the contraction coefficients
    1092             : !> \param mat_abIJ the matrix holding the (ab|IJ) integrals (blocks must be reserved)
    1093             : !> \param atom_k the atom for which we contract, i.e. we only take RI basis Q centered on atom_k
    1094             : !> \note By construction, distribution of tensor and matrix match, also for OMP threads
    1095             : ! **************************************************************************************************
    1096          78 :    SUBROUTINE contract3_RI_to_doMOs(ab_Q, vec, mat_abIJ, atom_k)
    1097             : 
    1098             :       TYPE(dbt_type)                                     :: ab_Q
    1099             :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec
    1100             :       TYPE(dbcsr_type)                                   :: mat_abIJ
    1101             :       INTEGER, INTENT(IN)                                :: atom_k
    1102             : 
    1103             :       CHARACTER(len=*), PARAMETER :: routineN = 'contract3_RI_to_doMOs'
    1104             : 
    1105             :       INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, katom
    1106             :       LOGICAL                                            :: found, t_found
    1107             :       REAL(dp)                                           :: prefac
    1108          78 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc
    1109          78 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pblock
    1110             :       TYPE(dbcsr_type)                                   :: work
    1111             :       TYPE(dbt_iterator_type)                            :: iter
    1112             : 
    1113          78 :       NULLIFY (pblock)
    1114             : 
    1115          78 :       CALL timeset(routineN, handle)
    1116             : 
    1117             : !$OMP PARALLEL DEFAULT(NONE) &
    1118             : !$OMP SHARED(ab_Q,vec,mat_abIJ,atom_k) &
    1119          78 : !$OMP PRIVATE(iter,ind,iatom,jatom,katom,prefac,iabc,t_found,found,pblock,i,j)
    1120             :       CALL dbt_iterator_start(iter, ab_Q)
    1121             :       DO WHILE (dbt_iterator_blocks_left(iter))
    1122             :          CALL dbt_iterator_next_block(iter, ind)
    1123             : 
    1124             :          iatom = ind(1)
    1125             :          jatom = ind(2)
    1126             :          katom = ind(3)
    1127             : 
    1128             :          IF (.NOT. atom_k == katom) CYCLE
    1129             : 
    1130             :          prefac = 1.0_dp
    1131             :          IF (iatom == jatom) prefac = 0.5_dp
    1132             : 
    1133             :          CALL dbt_get_block(ab_Q, ind, iabc, t_found)
    1134             : 
    1135             :          CALL dbcsr_get_block_p(mat_abIJ, iatom, jatom, pblock, found)
    1136             :          IF ((.NOT. found) .OR. (.NOT. t_found)) CYCLE
    1137             : 
    1138             :          DO i = 1, SIZE(pblock, 1)
    1139             :             DO j = 1, SIZE(pblock, 2)
    1140             : !$OMP ATOMIC
    1141             :                pblock(i, j) = pblock(i, j) + prefac*DOT_PRODUCT(vec(:), iabc(i, j, :))
    1142             :             END DO
    1143             :          END DO
    1144             : 
    1145             :          DEALLOCATE (iabc)
    1146             :       END DO !iter
    1147             :       CALL dbt_iterator_stop(iter)
    1148             : !$OMP END PARALLEL
    1149             : 
    1150             :       !matrix only half filled => need to add its transpose
    1151          78 :       CALL dbcsr_create(work, template=mat_abIJ)
    1152          78 :       CALL dbcsr_transposed(work, mat_abIJ)
    1153          78 :       CALL dbcsr_add(mat_abIJ, work, 1.0_dp, 1.0_dp)
    1154          78 :       CALL dbcsr_release(work)
    1155             : 
    1156          78 :       CALL timestop(handle)
    1157             : 
    1158         156 :    END SUBROUTINE contract3_RI_to_doMOs
    1159             : 
    1160             : ! **************************************************************************************************
    1161             : !> \brief Contraction of the 3-center integrals over index 1 and 2, for a given atom_k. The results
    1162             : !>        are stored in two matrices, such that (a,b are block indices):
    1163             : !>        mat_aIb(ab) = mat_aIb(ab) + sum j_b (i_aj_b|k)*v(j_b) and
    1164             : !>        mat_bIa(ba) = mat_bIa(ba) + sum i_a (i_aj_b|k)*v(i_a)
    1165             : !>        The block size of the columns of mat_aIb and the rows of mat_bIa are the size of k (RI)
    1166             : !> \param ab_Q the tensor containing the 3-center integrals
    1167             : !> \param vec the contraction coefficients
    1168             : !> \param mat_aIb normal type dbcsr matrix
    1169             : !> \param mat_bIa normal type dbcsr matrix
    1170             : !> \param atom_k the atom for which we contract
    1171             : !>       It is assumed that the contraction coefficients for MO I are all on atom_k
    1172             : !>       We do the classic thing when we fill half the matrix and add its transposed to get the full
    1173             : !>       one, but here, the matrix is not symmetric, hence we explicitely have 2 input matrices
    1174             : !>       The distribution of the integrals and the normal dbcsr matrix are compatible out of the box
    1175             : ! **************************************************************************************************
    1176         186 :    SUBROUTINE contract2_AO_to_doMO_low(ab_Q, vec, mat_aIb, mat_bIa, atom_k)
    1177             : 
    1178             :       TYPE(dbt_type)                                     :: ab_Q
    1179             :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec
    1180             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_aIb, mat_bIa
    1181             :       INTEGER, INTENT(IN)                                :: atom_k
    1182             : 
    1183             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'contract2_AO_to_doMO_low'
    1184             : 
    1185             :       INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, &
    1186             :                                                             katom, s1, s2
    1187         186 :       INTEGER, DIMENSION(:), POINTER                     :: atom_blk_size
    1188             :       LOGICAL                                            :: found, t_found
    1189         186 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc
    1190         186 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1191             :       TYPE(dbt_iterator_type)                            :: iter
    1192             : 
    1193         186 :       NULLIFY (atom_blk_size, pblock)
    1194             : 
    1195         186 :       CALL timeset(routineN, handle)
    1196             : 
    1197         186 :       CALL dbcsr_get_info(mat_aIb, row_blk_size=atom_blk_size)
    1198             : 
    1199             : !$OMP PARALLEL DEFAULT(NONE) &
    1200             : !$OMP SHARED(ab_Q,vec,mat_aIb,mat_bIa,atom_k,atom_blk_size) &
    1201         186 : !$OMP PRIVATE(iter,ind,iatom,jatom,katom,iabc,t_found,found,s1,s2,j,i,pblock)
    1202             :       CALL dbt_iterator_start(iter, ab_Q)
    1203             :       DO WHILE (dbt_iterator_blocks_left(iter))
    1204             :          CALL dbt_iterator_next_block(iter, ind)
    1205             : 
    1206             :          iatom = ind(1)
    1207             :          jatom = ind(2)
    1208             :          katom = ind(3)
    1209             : 
    1210             :          IF (atom_k .NE. katom) CYCLE
    1211             : 
    1212             :          CALL dbt_get_block(ab_Q, ind, iabc, t_found)
    1213             :          IF (.NOT. t_found) CYCLE
    1214             : 
    1215             :          ! Deal with mat_aIb
    1216             :          IF (jatom == atom_k) THEN
    1217             :             s1 = atom_blk_size(iatom)
    1218             :             s2 = SIZE(iabc, 3)
    1219             : 
    1220             :             CALL dbcsr_get_block_p(matrix=mat_aIb, row=iatom, col=jatom, BLOCK=pblock, found=found)
    1221             : 
    1222             :             IF (found) THEN
    1223             :                DO i = 1, s1
    1224             :                   DO j = 1, s2
    1225             : !$OMP ATOMIC
    1226             :                      pblock(i, j) = pblock(i, j) + DOT_PRODUCT(vec, iabc(i, :, j))
    1227             :                   END DO
    1228             :                END DO
    1229             :             END IF
    1230             :          END IF ! jatom == atom_k
    1231             : 
    1232             :          ! Deal with mat_bIa, keep block diagonal empty
    1233             :          IF (iatom == jatom) CYCLE
    1234             :          IF (iatom == atom_k) THEN
    1235             :             s1 = SIZE(iabc, 3)
    1236             :             s2 = atom_blk_size(jatom)
    1237             : 
    1238             :             CALL dbcsr_get_block_p(matrix=mat_bIa, row=iatom, col=jatom, BLOCK=pblock, found=found)
    1239             : 
    1240             :             IF (found) THEN
    1241             :                DO i = 1, s1
    1242             :                   DO j = 1, s2
    1243             : !$OMP ATOMIC
    1244             :                      pblock(i, j) = pblock(i, j) + DOT_PRODUCT(vec, iabc(:, j, i))
    1245             :                   END DO
    1246             :                END DO
    1247             :             END IF
    1248             :          END IF !iatom== atom_k
    1249             : 
    1250             :          DEALLOCATE (iabc)
    1251             :       END DO !iter
    1252             :       CALL dbt_iterator_stop(iter)
    1253             : !$OMP END PARALLEL
    1254             : 
    1255         186 :       CALL timestop(handle)
    1256             : 
    1257         372 :    END SUBROUTINE contract2_AO_to_doMO_low
    1258             : 
    1259             : ! **************************************************************************************************
    1260             : !> \brief Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|...|Q)
    1261             : !> \param contr_int the integral array
    1262             : !> \param PQ the smaller matrix to multiply all blocks
    1263             : !> \note  It is assumed that all non-zero blocks have the same number of columns. Can pass partial
    1264             : !>        arrays, e.g. contr_int(1:3)
    1265             : ! **************************************************************************************************
    1266         196 :    SUBROUTINE ri_all_blocks_mm(contr_int, PQ)
    1267             : 
    1268             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: contr_int
    1269             :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: PQ
    1270             : 
    1271             :       INTEGER                                            :: blk, iblk, imo, jblk, ndo_mo, s1, s2
    1272             :       LOGICAL                                            :: found
    1273         196 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work
    1274         196 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1275             :       TYPE(dbcsr_iterator_type)                          :: iter
    1276             : 
    1277         196 :       NULLIFY (pblock)
    1278             : 
    1279         196 :       ndo_mo = SIZE(contr_int)
    1280             : 
    1281         430 :       DO imo = 1, ndo_mo
    1282         234 :          CALL dbcsr_iterator_start(iter, contr_int(imo)%matrix)
    1283        1152 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1284             : 
    1285         918 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1286         918 :             CALL dbcsr_get_block_p(contr_int(imo)%matrix, iblk, jblk, pblock, found)
    1287             : 
    1288        1152 :             IF (found) THEN
    1289         918 :                s1 = SIZE(pblock, 1)
    1290         918 :                s2 = SIZE(pblock, 2)
    1291        3672 :                ALLOCATE (work(s1, s2))
    1292         918 :                CALL dgemm('N', 'N', s1, s2, s2, 1.0_dp, pblock, s1, PQ, s2, 0.0_dp, work, s1)
    1293         918 :                CALL dcopy(s1*s2, work, 1, pblock, 1)
    1294         918 :                DEALLOCATE (work)
    1295             :             END IF
    1296             : 
    1297             :          END DO ! dbcsr iterator
    1298         664 :          CALL dbcsr_iterator_stop(iter)
    1299             :       END DO !imo
    1300             : 
    1301         392 :    END SUBROUTINE ri_all_blocks_mm
    1302             : 
    1303             : ! **************************************************************************************************
    1304             : !> \brief Copies an (partial) array of contracted RI integrals into anoter one
    1305             : !> \param new_int where the copy is stored
    1306             : !> \param ref_int what is copied
    1307             : !> \note Allocate the matrices of new_int if not done already
    1308             : ! **************************************************************************************************
    1309         114 :    SUBROUTINE copy_ri_contr_int(new_int, ref_int)
    1310             : 
    1311             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: new_int
    1312             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: ref_int
    1313             : 
    1314             :       INTEGER                                            :: iso, ndo_so
    1315             : 
    1316         114 :       CPASSERT(SIZE(new_int) == SIZE(ref_int))
    1317         114 :       ndo_so = SIZE(ref_int)
    1318             : 
    1319         248 :       DO iso = 1, ndo_so
    1320         134 :          IF (.NOT. ASSOCIATED(new_int(iso)%matrix)) ALLOCATE (new_int(iso)%matrix)
    1321         248 :          CALL dbcsr_copy(new_int(iso)%matrix, ref_int(iso)%matrix)
    1322             :       END DO
    1323             : 
    1324         114 :    END SUBROUTINE copy_ri_contr_int
    1325             : 
    1326             : ! **************************************************************************************************
    1327             : !> \brief Takes the product of contracted integrals and put them in a kernel matrix
    1328             : !> \param kernel the matrix where the products are stored
    1329             : !> \param lhs_int the left-hand side contracted integrals
    1330             : !> \param rhs_int the right-hand side contracted integrals
    1331             : !> \param quadrants on which quadrant(s) on the kernel matrix the product is stored
    1332             : !> \param qs_env ...
    1333             : !> \param eps_filter filter for dbcsr matrix multiplication
    1334             : !> \param mo_transpose whether the MO blocks should be transpose, i.e. (aI|Jb) => (aJ|Ib)
    1335             : !> \note It is assumed that the kerenl matrix is NOT symmetric
    1336             : !>       There are three quadrants, corresponding to 1: the upper-left (diagonal), 2: the
    1337             : !>       upper-right (off-diagonal) and 3: the lower-right (diagonal).
    1338             : !>       Need to finalize the kernel matrix after calling this routine (possibly multiple times)
    1339             : ! **************************************************************************************************
    1340         114 :    SUBROUTINE ri_int_product(kernel, lhs_int, rhs_int, quadrants, qs_env, eps_filter, mo_transpose)
    1341             : 
    1342             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: kernel
    1343             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: lhs_int, rhs_int
    1344             :       LOGICAL, DIMENSION(3), INTENT(IN)                  :: quadrants
    1345             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1346             :       REAL(dp), INTENT(IN), OPTIONAL                     :: eps_filter
    1347             :       LOGICAL, INTENT(IN), OPTIONAL                      :: mo_transpose
    1348             : 
    1349             :       INTEGER                                            :: blk, i, iblk, iso, j, jblk, jso, nblk, &
    1350             :                                                             ndo_so
    1351             :       LOGICAL                                            :: found, my_mt
    1352         114 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1353             :       TYPE(dbcsr_iterator_type)                          :: iter
    1354         114 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1355             :       TYPE(dbcsr_type)                                   :: prod
    1356             : 
    1357         114 :       NULLIFY (matrix_s, pblock)
    1358             : 
    1359             : !  Initialization
    1360           0 :       CPASSERT(SIZE(lhs_int) == SIZE(rhs_int))
    1361         120 :       CPASSERT(ANY(quadrants))
    1362         114 :       ndo_so = SIZE(lhs_int)
    1363         114 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, natom=nblk)
    1364         114 :       CALL dbcsr_create(prod, template=matrix_s(1)%matrix, matrix_type=dbcsr_type_no_symmetry)
    1365         114 :       my_mt = .FALSE.
    1366         114 :       IF (PRESENT(mo_transpose)) my_mt = mo_transpose
    1367             : 
    1368             :       ! The kernel matrix is symmetric (even if normal type) => only fill upper half on diagonal
    1369             :       ! quadrants, but the whole thing on upper-right quadrant
    1370         248 :       DO iso = 1, ndo_so
    1371         466 :          DO jso = 1, ndo_so
    1372             : 
    1373             :             ! If on-diagonal quadrants only, can skip jso < iso
    1374         218 :             IF (.NOT. quadrants(2) .AND. jso < iso) CYCLE
    1375             : 
    1376         176 :             i = iso; j = jso; 
    1377         176 :             IF (my_mt) THEN
    1378           6 :                i = jso; j = iso; 
    1379             :             END IF
    1380             : 
    1381             :             ! Take the product lhs*rhs^T
    1382             :             CALL dbcsr_multiply('N', 'T', 1.0_dp, lhs_int(i)%matrix, rhs_int(j)%matrix, &
    1383         176 :                                 0.0_dp, prod, filter_eps=eps_filter)
    1384             : 
    1385             :             ! Loop over blocks of prod and fill kernel matrix => ok cuz same (but replicated) dist
    1386         176 :             CALL dbcsr_iterator_start(iter, prod)
    1387        7777 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
    1388             : 
    1389        7601 :                CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1390        7601 :                IF ((iso == jso .AND. jblk < iblk) .AND. .NOT. quadrants(2)) CYCLE
    1391             : 
    1392        3961 :                CALL dbcsr_get_block_p(prod, iblk, jblk, pblock, found)
    1393             : 
    1394        4137 :                IF (found) THEN
    1395             : 
    1396             :                   ! Case study on quadrant
    1397             :                   !upper-left
    1398        3961 :                   IF (quadrants(1)) THEN
    1399        3935 :                      CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
    1400             :                   END IF
    1401             : 
    1402             :                   !upper-right
    1403        3961 :                   IF (quadrants(2)) THEN
    1404          16 :                      CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
    1405             :                   END IF
    1406             : 
    1407             :                   !lower-right
    1408        3961 :                   IF (quadrants(3)) THEN
    1409          10 :                      CALL dbcsr_put_block(kernel, (ndo_so + iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
    1410             :                   END IF
    1411             : 
    1412             :                END IF
    1413             : 
    1414             :             END DO ! dbcsr iterator
    1415         528 :             CALL dbcsr_iterator_stop(iter)
    1416             : 
    1417             :          END DO !jso
    1418             :       END DO !iso
    1419             : 
    1420             : !  Clean-up
    1421         114 :       CALL dbcsr_release(prod)
    1422             : 
    1423         114 :    END SUBROUTINE ri_int_product
    1424             : 
    1425             : END MODULE xas_tdp_kernel

Generated by: LCOV version 1.15