LCOV - code coverage report
Current view: top level - src - xas_tdp_integrals.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 522 535 97.6 %
Date: 2024-11-21 06:45:46 Functions: 10 10 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief 3-center integrals machinery for the XAS_TDP method
      10             : !> \author A. Bussy (03.2020)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE xas_tdp_integrals
      14             :    USE OMP_LIB,                         ONLY: omp_get_max_threads,&
      15             :                                               omp_get_num_threads,&
      16             :                                               omp_get_thread_num
      17             :    USE ai_contraction_sphi,             ONLY: ab_contract,&
      18             :                                               abc_contract_xsmm
      19             :    USE atomic_kind_types,               ONLY: atomic_kind_type
      20             :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      21             :                                               gto_basis_set_p_type,&
      22             :                                               gto_basis_set_type
      23             :    USE cell_types,                      ONLY: cell_type
      24             :    USE constants_operator,              ONLY: operator_coulomb
      25             :    USE cp_array_utils,                  ONLY: cp_1d_i_p_type,&
      26             :                                               cp_2d_r_p_type
      27             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      28             :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get,&
      29             :                                               dbcsr_distribution_release,&
      30             :                                               dbcsr_distribution_type
      31             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist
      32             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param,&
      33             :                                               cp_eri_mme_set_params
      34             :    USE cp_files,                        ONLY: close_file,&
      35             :                                               open_file
      36             :    USE dbt_api,                         ONLY: &
      37             :         dbt_create, dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, &
      38             :         dbt_finalize, dbt_pgrid_create, dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, &
      39             :         dbt_reserve_blocks, dbt_type
      40             :    USE distribution_1d_types,           ONLY: distribution_1d_type
      41             :    USE distribution_2d_types,           ONLY: distribution_2d_create,&
      42             :                                               distribution_2d_type
      43             :    USE eri_mme_integrate,               ONLY: eri_mme_2c_integrate
      44             :    USE eri_mme_types,                   ONLY: eri_mme_init,&
      45             :                                               eri_mme_release
      46             :    USE gamma,                           ONLY: init_md_ftable
      47             :    USE generic_os_integrals,            ONLY: int_operators_r12_ab_os
      48             :    USE input_constants,                 ONLY: do_potential_coulomb,&
      49             :                                               do_potential_id,&
      50             :                                               do_potential_short,&
      51             :                                               do_potential_truncated
      52             :    USE input_section_types,             ONLY: section_vals_val_get
      53             :    USE kinds,                           ONLY: dp
      54             :    USE libint_2c_3c,                    ONLY: cutoff_screen_factor,&
      55             :                                               eri_2center,&
      56             :                                               eri_3center,&
      57             :                                               libint_potential_type
      58             :    USE libint_wrapper,                  ONLY: cp_libint_cleanup_2eri,&
      59             :                                               cp_libint_cleanup_3eri,&
      60             :                                               cp_libint_init_2eri,&
      61             :                                               cp_libint_init_3eri,&
      62             :                                               cp_libint_set_contrdepth,&
      63             :                                               cp_libint_t
      64             :    USE mathlib,                         ONLY: invmat_symm
      65             :    USE message_passing,                 ONLY: mp_comm_type,&
      66             :                                               mp_para_env_type
      67             :    USE molecule_types,                  ONLY: molecule_type
      68             :    USE orbital_pointers,                ONLY: ncoset
      69             :    USE particle_methods,                ONLY: get_particle_set
      70             :    USE particle_types,                  ONLY: particle_type
      71             :    USE qs_environment_types,            ONLY: get_qs_env,&
      72             :                                               qs_environment_type
      73             :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      74             :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      75             :                                               qs_kind_type
      76             :    USE qs_neighbor_list_types,          ONLY: &
      77             :         get_iterator_info, get_neighbor_list_set_p, neighbor_list_iterate, &
      78             :         neighbor_list_iterator_create, neighbor_list_iterator_p_type, &
      79             :         neighbor_list_iterator_release, neighbor_list_set_p_type, nl_set_sub_iterator, &
      80             :         nl_sub_iterate, release_neighbor_list_sets
      81             :    USE qs_neighbor_lists,               ONLY: atom2d_build,&
      82             :                                               atom2d_cleanup,&
      83             :                                               build_neighbor_lists,&
      84             :                                               local_atoms_type,&
      85             :                                               pair_radius_setup
      86             :    USE qs_o3c_types,                    ONLY: get_o3c_iterator_info,&
      87             :                                               init_o3c_container,&
      88             :                                               o3c_container_type,&
      89             :                                               o3c_iterate,&
      90             :                                               o3c_iterator_create,&
      91             :                                               o3c_iterator_release,&
      92             :                                               o3c_iterator_type,&
      93             :                                               release_o3c_container
      94             :    USE t_c_g0,                          ONLY: get_lmax_init,&
      95             :                                               init
      96             :    USE xas_tdp_types,                   ONLY: xas_tdp_control_type,&
      97             :                                               xas_tdp_env_type
      98             : #include "./base/base_uses.f90"
      99             : 
     100             :    IMPLICIT NONE
     101             :    PRIVATE
     102             : 
     103             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_integrals'
     104             : 
     105             :    PUBLIC :: create_pqX_tensor, fill_pqX_tensor, compute_ri_3c_coulomb, compute_ri_3c_exchange, &
     106             :              build_xas_tdp_3c_nl, build_xas_tdp_ovlp_nl, get_opt_3c_dist2d, &
     107             :              compute_ri_coulomb2_int, compute_ri_exchange2_int
     108             : 
     109             : CONTAINS
     110             : 
     111             : ! **************************************************************************************************
     112             : !> \brief Prepares a tensor to hold 3-center integrals (pq|X), where p,q are distributed according
     113             : !>        to the given 2d dbcsr distribution of the given . The third dimension of the tensor is
     114             : !>        iteslf not distributed (i.e. the t_pgrid's third dimension has size 1). The blocks are
     115             : !>        reserved according to the neighbor lists
     116             : !> \param pq_X the tensor to store the integrals
     117             : !> \param ab_nl the 1st and 2nd center neighbor list
     118             : !> \param ac_nl the 1st and 3rd center neighbor list
     119             : !> \param matrix_dist ...
     120             : !> \param blk_size_1 the block size in the first dimension
     121             : !> \param blk_size_2 the block size in the second dimension
     122             : !> \param blk_size_3 the block size in the third dimension
     123             : !> \param only_bc_same_center only keep block if, for the corresponding integral (ab|c), b and c
     124             : !>        share the same center, i.e. r_bc = 0.0
     125             : ! **************************************************************************************************
     126        1452 :    SUBROUTINE create_pqX_tensor(pq_X, ab_nl, ac_nl, matrix_dist, blk_size_1, blk_size_2, &
     127         132 :                                 blk_size_3, only_bc_same_center)
     128             : 
     129             :       TYPE(dbt_type), INTENT(OUT)                        :: pq_X
     130             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     131             :          POINTER                                         :: ab_nl, ac_nl
     132             :       TYPE(dbcsr_distribution_type), INTENT(IN)          :: matrix_dist
     133             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_size_1, blk_size_2, blk_size_3
     134             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     135             : 
     136             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'create_pqX_tensor'
     137             : 
     138             :       INTEGER                                            :: A, b, group_handle, handle, i, iatom, &
     139             :                                                             ikind, jatom, katom, kkind, nblk, &
     140             :                                                             nblk_3, nblk_per_thread, nkind
     141         132 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx1, idx2, idx3
     142             :       INTEGER, DIMENSION(3)                              :: pdims
     143         132 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, row_dist
     144         132 :       INTEGER, DIMENSION(:, :), POINTER                  :: mat_pgrid
     145             :       LOGICAL                                            :: my_sort_bc, symmetric
     146             :       REAL(dp), DIMENSION(3)                             :: rab, rac, rbc
     147        1188 :       TYPE(dbt_distribution_type)                        :: t_dist
     148         396 :       TYPE(dbt_pgrid_type)                               :: t_pgrid
     149             :       TYPE(mp_comm_type)                                 :: group
     150             :       TYPE(neighbor_list_iterator_p_type), &
     151         132 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     152             : 
     153         132 :       NULLIFY (ab_iter, ac_iter, col_dist, row_dist, mat_pgrid)
     154             : 
     155         132 :       CALL timeset(routineN, handle)
     156             : 
     157         132 :       my_sort_bc = .FALSE.
     158         132 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     159             : 
     160         132 :       CALL get_neighbor_list_set_p(ab_nl, symmetric=symmetric)
     161         132 :       CPASSERT(symmetric)
     162             : 
     163             :       !get 2D distribution info from matrix_dist
     164             :       CALL dbcsr_distribution_get(matrix_dist, pgrid=mat_pgrid, group=group_handle, &
     165         132 :                                   row_dist=row_dist, col_dist=col_dist)
     166         132 :       CALL group%set_handle(group_handle)
     167             : 
     168             :       !create the corresponding tensor proc grid and dist
     169         132 :       pdims(1) = SIZE(mat_pgrid, 1); pdims(2) = SIZE(mat_pgrid, 2); pdims(3) = 1
     170         132 :       CALL dbt_pgrid_create(group, pdims, t_pgrid)
     171             : 
     172         132 :       nblk_3 = SIZE(blk_size_3)
     173             :       CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=row_dist, nd_dist_2=col_dist, &
     174        2572 :                                 nd_dist_3=[(0, i=1, nblk_3)])
     175             : 
     176             :       !create the tensor itself.
     177             :       CALL dbt_create(pq_X, name="(pq|X)", dist=t_dist, map1_2d=[1, 2], map2_2d=[3], &
     178         132 :                       blk_size_1=blk_size_1, blk_size_2=blk_size_2, blk_size_3=blk_size_3)
     179             : 
     180             :       !count the blocks to reserve !note: dbcsr takes care of only keeping unique indices
     181         132 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     182         132 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     183         132 :       nblk = 0
     184       17360 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     185       17228 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, nkind=nkind, r=rab)
     186             : 
     187       50578 :          DO kkind = 1, nkind
     188       33218 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     189             : 
     190       71754 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     191             : 
     192       21308 :                IF (my_sort_bc) THEN
     193             :                   !we check for rbc or rac because of symmetry in ab_nl
     194         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     195        2760 :                   rbc(:) = rac(:) - rab(:)
     196        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     197             : 
     198             :                END IF
     199             : 
     200       21308 :                nblk = nblk + 1
     201             :             END DO !ac_iter
     202             :          END DO !kkind
     203             :       END DO !ab_iter
     204         132 :       CALL neighbor_list_iterator_release(ab_iter)
     205         132 :       CALL neighbor_list_iterator_release(ac_iter)
     206             : 
     207         824 :       ALLOCATE (idx1(nblk), idx2(nblk), idx3(nblk))
     208             : 
     209             :       !actually reserve the blocks
     210         132 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     211         132 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     212         132 :       nblk = 0
     213       17360 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     214       17228 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, jatom=jatom, nkind=nkind, r=rab)
     215             : 
     216       50578 :          DO kkind = 1, nkind
     217       33218 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     218             : 
     219       71754 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     220       21308 :                CALL get_iterator_info(ac_iter, jatom=katom, r=rac)
     221             : 
     222       21308 :                IF (my_sort_bc) THEN
     223             :                   !we check for rbc or rac because of symmetry in ab_nl
     224         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     225        2760 :                   rbc(:) = rac(:) - rab(:)
     226        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     227             : 
     228             :                END IF
     229             : 
     230       21018 :                nblk = nblk + 1
     231             : 
     232       21018 :                idx1(nblk) = iatom
     233       21018 :                idx2(nblk) = jatom
     234       21308 :                idx3(nblk) = katom
     235             : 
     236             :             END DO !ac_iter
     237             :          END DO !kkind
     238             :       END DO !ab_iter
     239         132 :       CALL neighbor_list_iterator_release(ab_iter)
     240         132 :       CALL neighbor_list_iterator_release(ac_iter)
     241             : 
     242             : !TODO: Parallelize creation of block list.
     243         132 : !$OMP PARALLEL DEFAULT(NONE) SHARED(pq_X, nblk, idx1, idx2, idx3) PRIVATE(nblk_per_thread,A,b)
     244             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     245             :       a = omp_get_thread_num()*nblk_per_thread + 1
     246             :       b = MIN(a + nblk_per_thread, nblk)
     247             :       CALL dbt_reserve_blocks(pq_X, idx1(a:b), idx2(a:b), idx3(a:b))
     248             : !$OMP END PARALLEL
     249         132 :       CALL dbt_finalize(pq_X)
     250             : 
     251             :       !clean-up
     252         132 :       CALL dbt_distribution_destroy(t_dist)
     253         132 :       CALL dbt_pgrid_destroy(t_pgrid)
     254             : 
     255         132 :       CALL timestop(handle)
     256             : 
     257         660 :    END SUBROUTINE create_pqX_tensor
     258             : 
     259             : ! **************************************************************************************************
     260             : !> \brief Fills the given 3 dimensional (pq|X) tensor with integrals
     261             : !> \param pq_X the tensor to fill
     262             : !> \param ab_nl the neighbor list for the first 2 centers
     263             : !> \param ac_nl the neighbor list for the first and third centers
     264             : !> \param basis_set_list_a basis sets for first center
     265             : !> \param basis_set_list_b basis sets for second center
     266             : !> \param basis_set_list_c basis sets for third center
     267             : !> \param potential_parameter the operator for the integrals
     268             : !> \param qs_env ...
     269             : !> \param only_bc_same_center same as in create_pqX_tensor
     270             : !> \param eps_screen threshold for possible screening
     271             : !> \note The following indices are happily mixed within this routine: First center i,a,p
     272             : !>       Second center: j,b,q       Third center: k,c,X
     273             : ! **************************************************************************************************
     274         132 :    SUBROUTINE fill_pqX_tensor(pq_X, ab_nl, ac_nl, basis_set_list_a, basis_set_list_b, &
     275             :                               basis_set_list_c, potential_parameter, qs_env, &
     276             :                               only_bc_same_center, eps_screen)
     277             : 
     278             :       TYPE(dbt_type)                                     :: pq_X
     279             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     280             :          POINTER                                         :: ab_nl, ac_nl
     281             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b, &
     282             :                                                             basis_set_list_c
     283             :       TYPE(libint_potential_type)                        :: potential_parameter
     284             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     285             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     286             :       REAL(dp), INTENT(IN), OPTIONAL                     :: eps_screen
     287             : 
     288             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fill_pqX_tensor'
     289             : 
     290             :       INTEGER :: egfa, egfb, egfc, handle, i, iatom, ibasis, ikind, ilist, imax, iset, jatom, &
     291             :          jkind, jset, katom, kkind, kset, m_max, max_nset, maxli, maxlj, maxlk, mepos, nbasis, &
     292             :          ncoa, ncob, ncoc, ni, nj, nk, nseta, nsetb, nsetc, nthread, sgfa, sgfb, sgfc, unit_id
     293         132 :       INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
     294         132 :                                                             lc_min, npgfa, npgfb, npgfc, nsgfa, &
     295         132 :                                                             nsgfb, nsgfc
     296         132 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb, first_sgfc
     297             :       LOGICAL                                            :: do_screen, my_sort_bc
     298             :       REAL(dp)                                           :: dij, dik, djk, my_eps_screen, ri(3), &
     299             :                                                             rij(3), rik(3), rj(3), rjk(3), rk(3), &
     300             :                                                             sabc_ext, screen_radius
     301         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: ccp_buffer, cpp_buffer
     302         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: max_contr, max_contra, max_contrb, &
     303         132 :                                                             max_contrc
     304         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc, sabc, work
     305         132 :       REAL(dp), DIMENSION(:), POINTER                    :: set_radius_a, set_radius_b, set_radius_c
     306         132 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc
     307         132 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spb, spc, tspa
     308             :       TYPE(cp_libint_t)                                  :: lib
     309         132 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list
     310             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set, basis_set_a, basis_set_b, &
     311             :                                                             basis_set_c
     312             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     313             :       TYPE(o3c_container_type), POINTER                  :: o3c
     314             :       TYPE(o3c_iterator_type)                            :: o3c_iterator
     315             : 
     316         132 :       NULLIFY (basis_set, basis_set_list, para_env, la_max, la_min)
     317         132 :       NULLIFY (lb_max, lb_min, lc_max, lc_min, npgfa, npgfb, npgfc, nsgfa, nsgfb, nsgfc)
     318         132 :       NULLIFY (first_sgfa, first_sgfb, first_sgfc, set_radius_a, set_radius_b, set_radius_c)
     319         132 :       NULLIFY (rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc)
     320         132 :       NULLIFY (basis_set_a, basis_set_b, basis_set_c, tspa, spb, spc)
     321             : 
     322         132 :       CALL timeset(routineN, handle)
     323             : 
     324             :       !Need the max l for each basis for libint (and overall max #of sets for screening)
     325         132 :       nbasis = SIZE(basis_set_list_a)
     326         132 :       max_nset = 0
     327         132 :       maxli = 0
     328         344 :       DO ibasis = 1, nbasis
     329             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_a(ibasis)%gto_basis_set, &
     330         212 :                                 maxl=imax, nset=iset, nsgf_set=nsgfa)
     331         212 :          maxli = MAX(maxli, imax)
     332         344 :          max_nset = MAX(max_nset, iset)
     333             :       END DO
     334             :       maxlj = 0
     335         344 :       DO ibasis = 1, nbasis
     336             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_b(ibasis)%gto_basis_set, &
     337         212 :                                 maxl=imax, nset=iset, nsgf_set=nsgfb, npgf=npgfb)
     338         212 :          maxlj = MAX(maxlj, imax)
     339         344 :          max_nset = MAX(max_nset, iset)
     340             :       END DO
     341             :       maxlk = 0
     342         344 :       DO ibasis = 1, nbasis
     343             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_c(ibasis)%gto_basis_set, &
     344         212 :                                 maxl=imax, nset=iset, npgf=npgfc)
     345         212 :          maxlk = MAX(maxlk, imax)
     346         344 :          max_nset = MAX(max_nset, iset)
     347             :       END DO
     348         132 :       m_max = maxli + maxlj + maxlk
     349             : 
     350             :       !Screening
     351         132 :       do_screen = .FALSE.
     352         132 :       IF (PRESENT(eps_screen)) THEN
     353          42 :          do_screen = .TRUE.
     354          42 :          my_eps_screen = eps_screen
     355             :       END IF
     356         132 :       screen_radius = 0.0_dp
     357         132 :       IF (potential_parameter%potential_type == do_potential_truncated .OR. &
     358             :           potential_parameter%potential_type == do_potential_short) THEN
     359             : 
     360          12 :          screen_radius = potential_parameter%cutoff_radius*cutoff_screen_factor
     361         120 :       ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
     362             : 
     363          72 :          screen_radius = 1000000.0_dp
     364             :       END IF
     365             : 
     366             :       !get maximum contraction values for abc_contract screening
     367         132 :       IF (do_screen) THEN
     368             : 
     369             :          !Allocate max_contraction arrays such that we have a specific value for each set/kind
     370           0 :          ALLOCATE (max_contr(max_nset, nbasis), max_contra(max_nset, nbasis), &
     371         420 :                    max_contrb(max_nset, nbasis), max_contrc(max_nset, nbasis))
     372             : 
     373             :          !Not the most elegent, but better than copying 3 times the same
     374         168 :          DO ilist = 1, 3
     375             : 
     376         126 :             IF (ilist == 1) basis_set_list => basis_set_list_a
     377         126 :             IF (ilist == 2) basis_set_list => basis_set_list_b
     378         126 :             IF (ilist == 3) basis_set_list => basis_set_list_c
     379             : 
     380        1392 :             max_contr = 0.0_dp
     381             : 
     382         330 :             DO ibasis = 1, nbasis
     383         204 :                basis_set => basis_set_list(ibasis)%gto_basis_set
     384             : 
     385        1018 :                DO iset = 1, basis_set%nset
     386             : 
     387         688 :                   ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     388         688 :                   sgfa = basis_set%first_sgf(1, iset)
     389         688 :                   egfa = sgfa + basis_set%nsgf_set(iset) - 1
     390             : 
     391             :                   max_contr(iset, ibasis) = &
     392      465504 :                      MAXVAL((/(SUM(ABS(basis_set%sphi(1:ncoa, i))), i=sgfa, egfa)/))
     393             : 
     394             :                END DO !iset
     395             :             END DO !ibasis
     396             : 
     397         548 :             IF (ilist == 1) max_contra(:, :) = max_contr(:, :)
     398         548 :             IF (ilist == 2) max_contrb(:, :) = max_contr(:, :)
     399         590 :             IF (ilist == 3) max_contrc(:, :) = max_contr(:, :)
     400             :          END DO !ilist
     401          42 :          DEALLOCATE (max_contr)
     402             :       END IF !do_screen
     403             : 
     404             :       !To minimize memory ops in contraction, we need to pre-allocate buffers, pre-tranpose sphi_a
     405             :       !and also trim sphi in general to have contiguous arrays
     406        4776 :       ALLOCATE (tspa(max_nset, nbasis), spb(max_nset, nbasis), spc(max_nset, nbasis))
     407         344 :       DO ibasis = 1, nbasis
     408        1372 :          DO iset = 1, max_nset
     409        1028 :             NULLIFY (tspa(iset, ibasis)%array)
     410        1028 :             NULLIFY (spb(iset, ibasis)%array)
     411        1240 :             NULLIFY (spc(iset, ibasis)%array)
     412             :          END DO
     413             :       END DO
     414             : 
     415         528 :       DO ilist = 1, 3
     416             : 
     417        1164 :          DO ibasis = 1, nbasis
     418         636 :             IF (ilist == 1) basis_set => basis_set_list_a(ibasis)%gto_basis_set
     419         636 :             IF (ilist == 2) basis_set => basis_set_list_b(ibasis)%gto_basis_set
     420         636 :             IF (ilist == 3) basis_set => basis_set_list_c(ibasis)%gto_basis_set
     421             : 
     422        3128 :             DO iset = 1, basis_set%nset
     423             : 
     424        2096 :                ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     425        2096 :                sgfa = basis_set%first_sgf(1, iset)
     426        2096 :                egfa = sgfa + basis_set%nsgf_set(iset) - 1
     427             : 
     428        2732 :                IF (ilist == 1) THEN
     429        3192 :                   ALLOCATE (tspa(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoa))
     430       41750 :                   tspa(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoa, sgfa:egfa))
     431        1298 :                ELSE IF (ilist == 2) THEN
     432        3192 :                   ALLOCATE (spb(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     433       36942 :                   spb(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     434             :                ELSE
     435        2000 :                   ALLOCATE (spc(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     436     2179948 :                   spc(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     437             :                END IF
     438             : 
     439             :             END DO !iset
     440             :          END DO !ibasis
     441             :       END DO !ilist
     442             : 
     443         132 :       my_sort_bc = .FALSE.
     444         132 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     445             : 
     446             :       !Init the truncated Coulomb operator
     447         132 :       CALL get_qs_env(qs_env, para_env=para_env)
     448         132 :       IF (potential_parameter%potential_type == do_potential_truncated) THEN
     449             : 
     450             :          !open the file only if necessary
     451           6 :          IF (m_max > get_lmax_init()) THEN
     452           0 :             IF (para_env%mepos == 0) THEN
     453           0 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
     454             :             END IF
     455           0 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
     456           0 :             IF (para_env%mepos == 0) THEN
     457           0 :                CALL close_file(unit_id)
     458             :             END IF
     459             :          END IF
     460             :       END IF
     461             : 
     462             :       !Inint the initial gamma function before the OMP region as it is not thread safe
     463         132 :       CALL init_md_ftable(nmax=m_max)
     464             : 
     465             :       !Strategy: we use the o3c iterator because it is OMP parallelized and also takes the
     466             :       !          only_bc_same_center argument. Only the dbcsr_put_block is critical
     467             : 
     468             :       nthread = 1
     469         132 : !$    nthread = omp_get_max_threads()
     470             : 
     471         132 :       ALLOCATE (o3c)
     472             :       CALL init_o3c_container(o3c, 1, basis_set_list_a, basis_set_list_b, basis_set_list_c, &
     473         132 :                               ab_nl, ac_nl, only_bc_same_center=my_sort_bc)
     474         132 :       CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
     475             : 
     476             : !$OMP PARALLEL DEFAULT(NONE) &
     477             : !$OMP SHARED (pq_X,do_screen,max_nset,basis_set_list_a,max_contra,max_contrb,max_contrc,&
     478             : !$OMP         basis_set_list_b, basis_set_list_c,ncoset,screen_radius,potential_parameter,&
     479             : !$OMP         my_eps_screen,maxli,maxlj,maxlk,my_sort_bc,nthread,o3c,o3c_iterator,tspa,spb,spc) &
     480             : !$OMP PRIVATE (lib,i,mepos,work,iset,ncoa,sgfa,egfa,nseta,iatom,ikind,jatom,jkind,katom,kkind,&
     481             : !$OMP          rij,rik,rjk,basis_set_a,nsetb,la_max,la_min,lb_max,lb_min,lc_max,lc_min,npgfa,&
     482             : !$OMP          npgfb,npgfc,nsgfa,nsgfb,nsgfc,ri,rk,first_sgfa,first_sgfb,first_sgfc,nsetc,&
     483             : !$OMP          set_radius_a,set_radius_b,set_radius_c,rj,rpgf_a,rpgf_b,rpgf_c,zeta,zetb,&
     484             : !$OMP          zetc,basis_set_b,basis_set_c,dij,dik,djk,ni,nj,nk,iabc,sabc,jset,kset,&
     485         132 : !$OMP          ncob,ncoc,sgfb,sgfc,egfb,egfc,sabc_ext,cpp_buffer,ccp_buffer)
     486             : 
     487             :       mepos = 0
     488             : !$    mepos = omp_get_thread_num()
     489             : 
     490             :       !each thread need its own libint object (internals may change at different rates)
     491             :       CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
     492             :       CALL cp_libint_set_contrdepth(lib, 1)
     493             : 
     494             :       DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
     495             :          CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, ikind=ikind, jkind=jkind, kkind=kkind, &
     496             :                                     iatom=iatom, jatom=jatom, katom=katom, rij=rij, rik=rik)
     497             : 
     498             :          !get first center basis info
     499             :          basis_set_a => basis_set_list_a(ikind)%gto_basis_set
     500             :          first_sgfa => basis_set_a%first_sgf
     501             :          la_max => basis_set_a%lmax
     502             :          la_min => basis_set_a%lmin
     503             :          npgfa => basis_set_a%npgf
     504             :          nseta = basis_set_a%nset
     505             :          nsgfa => basis_set_a%nsgf_set
     506             :          zeta => basis_set_a%zet
     507             :          rpgf_a => basis_set_a%pgf_radius
     508             :          set_radius_a => basis_set_a%set_radius
     509             :          ni = SUM(nsgfa)
     510             :          !second center basis info
     511             :          basis_set_b => basis_set_list_b(jkind)%gto_basis_set
     512             :          first_sgfb => basis_set_b%first_sgf
     513             :          lb_max => basis_set_b%lmax
     514             :          lb_min => basis_set_b%lmin
     515             :          npgfb => basis_set_b%npgf
     516             :          nsetb = basis_set_b%nset
     517             :          nsgfb => basis_set_b%nsgf_set
     518             :          zetb => basis_set_b%zet
     519             :          rpgf_b => basis_set_b%pgf_radius
     520             :          set_radius_b => basis_set_b%set_radius
     521             :          nj = SUM(nsgfb)
     522             :          !third center basis info
     523             :          basis_set_c => basis_set_list_c(kkind)%gto_basis_set
     524             :          first_sgfc => basis_set_c%first_sgf
     525             :          lc_max => basis_set_c%lmax
     526             :          lc_min => basis_set_c%lmin
     527             :          npgfc => basis_set_c%npgf
     528             :          nsetc = basis_set_c%nset
     529             :          nsgfc => basis_set_c%nsgf_set
     530             :          zetc => basis_set_c%zet
     531             :          rpgf_c => basis_set_c%pgf_radius
     532             :          set_radius_c => basis_set_c%set_radius
     533             :          nk = SUM(nsgfc)
     534             : 
     535             :          !position and distances, only relative pos matter for libint
     536             :          rjk = rik - rij
     537             :          ri = 0.0_dp
     538             :          rj = rij ! ri + rij
     539             :          rk = rik ! ri + rik
     540             : 
     541             :          djk = NORM2(rjk)
     542             :          dij = NORM2(rij)
     543             :          dik = NORM2(rik)
     544             : 
     545             :          !sgf integrals
     546             :          ALLOCATE (iabc(ni, nj, nk))
     547             :          iabc(:, :, :) = 0.0_dp
     548             : 
     549             :          DO iset = 1, nseta
     550             :             ncoa = npgfa(iset)*ncoset(la_max(iset))
     551             :             sgfa = first_sgfa(1, iset)
     552             :             egfa = sgfa + nsgfa(iset) - 1
     553             : 
     554             :             DO jset = 1, nsetb
     555             :                ncob = npgfb(jset)*ncoset(lb_max(jset))
     556             :                sgfb = first_sgfb(1, jset)
     557             :                egfb = sgfb + nsgfb(jset) - 1
     558             : 
     559             :                !screening (overlap)
     560             :                IF (set_radius_a(iset) + set_radius_b(jset) < dij) CYCLE
     561             : 
     562             :                DO kset = 1, nsetc
     563             :                   ncoc = npgfc(kset)*ncoset(lc_max(kset))
     564             :                   sgfc = first_sgfc(1, kset)
     565             :                   egfc = sgfc + nsgfc(kset) - 1
     566             : 
     567             :                   !screening (potential)
     568             :                   IF (set_radius_a(iset) + set_radius_c(kset) + screen_radius < dik) CYCLE
     569             :                   IF (set_radius_b(jset) + set_radius_c(kset) + screen_radius < djk) CYCLE
     570             : 
     571             :                   !pgf integrals
     572             :                   ALLOCATE (sabc(ncoa, ncob, ncoc))
     573             :                   sabc(:, :, :) = 0.0_dp
     574             : 
     575             :                   IF (do_screen) THEN
     576             :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     577             :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     578             :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     579             :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     580             :                                       djk, lib, potential_parameter, int_abc_ext=sabc_ext)
     581             :                      IF (my_eps_screen > sabc_ext*(max_contra(iset, ikind)* &
     582             :                                                    max_contrb(jset, jkind)* &
     583             :                                                    max_contrc(kset, kkind))) THEN
     584             :                         DEALLOCATE (sabc)
     585             :                         CYCLE
     586             :                      END IF
     587             :                   ELSE
     588             :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     589             :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     590             :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     591             :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     592             :                                       djk, lib, potential_parameter)
     593             :                   END IF
     594             : 
     595             :                   ALLOCATE (work(nsgfa(iset), nsgfb(jset), nsgfc(kset)))
     596             : 
     597             :                   CALL abc_contract_xsmm(work, sabc, tspa(iset, ikind)%array, spb(jset, jkind)%array, &
     598             :                                          spc(kset, kkind)%array, ncoa, ncob, ncoc, nsgfa(iset), &
     599             :                                          nsgfb(jset), nsgfc(kset), cpp_buffer, ccp_buffer)
     600             : 
     601             :                   iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc) = work(:, :, :)
     602             :                   DEALLOCATE (sabc, work)
     603             : 
     604             :                END DO !kset
     605             :             END DO !jset
     606             :          END DO !iset
     607             : 
     608             :          !Add the integral to the proper tensor block
     609             : !$OMP CRITICAL
     610             :          CALL dbt_put_block(pq_X, [iatom, jatom, katom], SHAPE(iabc), iabc, summation=.TRUE.)
     611             : !$OMP END CRITICAL
     612             : 
     613             :          DEALLOCATE (iabc)
     614             :       END DO !o3c_iterator
     615             : 
     616             :       IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
     617             :       IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
     618             : 
     619             :       CALL cp_libint_cleanup_3eri(lib)
     620             : 
     621             : !$OMP END PARALLEL
     622         132 :       CALL o3c_iterator_release(o3c_iterator)
     623         132 :       CALL release_o3c_container(o3c)
     624         132 :       DEALLOCATE (o3c)
     625             : 
     626         780 :       DO iset = 1, max_nset
     627        1808 :          DO ibasis = 1, nbasis
     628        1028 :             IF (ASSOCIATED(tspa(iset, ibasis)%array)) DEALLOCATE (tspa(iset, ibasis)%array)
     629        1028 :             IF (ASSOCIATED(spb(iset, ibasis)%array)) DEALLOCATE (spb(iset, ibasis)%array)
     630        1676 :             IF (ASSOCIATED(spc(iset, ibasis)%array)) DEALLOCATE (spc(iset, ibasis)%array)
     631             :          END DO
     632             :       END DO
     633         132 :       DEALLOCATE (tspa, spb, spc)
     634             : 
     635         132 :       CALL timestop(handle)
     636             : 
     637         264 :    END SUBROUTINE fill_pqX_tensor
     638             : 
     639             : ! **************************************************************************************************
     640             : !> \brief Builds a neighbor lists set for overlaping 2-center S_ab, where b is restricted on a
     641             : !>        a given list of atoms. Used for Coulomb RI where (aI|P) = sum_b C_bI (ab|P), where
     642             : !>        contraction coeff C_bI is assumed to be non-zero only on excited atoms
     643             : !> \param ab_list the neighbor list
     644             : !> \param basis_a basis set list for atom a
     645             : !> \param basis_b basis set list for atom b
     646             : !> \param qs_env ...
     647             : !> \param excited_atoms the indices of the excited atoms on which b is centered
     648             : !> \param ext_dist2d use an external distribution2d
     649             : ! **************************************************************************************************
     650         256 :    SUBROUTINE build_xas_tdp_ovlp_nl(ab_list, basis_a, basis_b, qs_env, excited_atoms, ext_dist2d)
     651             : 
     652             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     653             :          POINTER                                         :: ab_list
     654             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_b
     655             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     656             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     657             :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     658             : 
     659             :       INTEGER                                            :: ikind, nkind
     660             :       LOGICAL                                            :: my_restrictb
     661             :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, b_present
     662             :       REAL(dp)                                           :: subcells
     663             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, b_radius
     664         256 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     665         256 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     666             :       TYPE(cell_type), POINTER                           :: cell
     667             :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     668             :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     669         256 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     670         256 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     671         256 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     672             : 
     673         256 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     674             : 
     675             : !  Initialization
     676         256 :       CALL get_qs_env(qs_env, nkind=nkind)
     677         256 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     678             : 
     679         256 :       my_restrictb = .FALSE.
     680         256 :       IF (PRESENT(excited_atoms)) THEN
     681          88 :          my_restrictb = .TRUE.
     682             :       END IF
     683             : 
     684        1024 :       ALLOCATE (a_present(nkind), b_present(nkind))
     685         662 :       a_present = .FALSE.
     686         662 :       b_present = .FALSE.
     687        1024 :       ALLOCATE (a_radius(nkind), b_radius(nkind))
     688         662 :       a_radius = 0.0_dp
     689         662 :       b_radius = 0.0_dp
     690             : 
     691             : !  Set up the radii
     692         662 :       DO ikind = 1, nkind
     693         406 :          IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     694         406 :             a_present(ikind) = .TRUE.
     695         406 :             CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     696             :          END IF
     697             : 
     698         662 :          IF (ASSOCIATED(basis_b(ikind)%gto_basis_set)) THEN
     699         406 :             b_present(ikind) = .TRUE.
     700         406 :             CALL get_gto_basis_set(basis_b(ikind)%gto_basis_set, kind_radius=b_radius(ikind))
     701             :          END IF
     702             :       END DO !ikind
     703             : 
     704        1024 :       ALLOCATE (pair_radius(nkind, nkind))
     705        1404 :       pair_radius = 0.0_dp
     706         256 :       CALL pair_radius_setup(a_present, b_present, a_radius, b_radius, pair_radius)
     707             : 
     708             : !  Set up the nl
     709             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     710             :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     711         256 :                       particle_set=particle_set, molecule_set=molecule_set)
     712             : 
     713             :       !use an external distribution_2d if required
     714         256 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     715             : 
     716        1174 :       ALLOCATE (atom2d(nkind))
     717             :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     718         256 :                         molecule_set, .FALSE., particle_set)
     719             : 
     720         256 :       IF (my_restrictb) THEN
     721             : 
     722             :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     723          88 :                                    atomb_to_keep=excited_atoms, nlname="XAS_TDP_ovlp_nl")
     724             : 
     725             :       ELSE
     726             : 
     727             :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     728         168 :                                    nlname="XAS_TDP_ovlp_nl")
     729             : 
     730             :       END IF
     731             : !  Clean-up
     732         256 :       CALL atom2d_cleanup(atom2d)
     733             : 
     734         512 :    END SUBROUTINE build_xas_tdp_ovlp_nl
     735             : 
     736             : ! **************************************************************************************************
     737             : !> \brief Builds a neighbor lists set taylored for 3-center integral within XAS TDP, such that only
     738             : !>        excited atoms are taken into account for the list_c
     739             : !> \param ac_list the neighbor list ready for 3-center integrals
     740             : !> \param basis_a basis set list for atom a
     741             : !> \param basis_c basis set list for atom c
     742             : !> \param op_type to indicate whther the list should be built with overlap, Coulomb or else in mind
     743             : !> \param qs_env ...
     744             : !> \param excited_atoms the indices of the excited atoms to consider (if not given, all atoms are taken)
     745             : !> \param x_range in case some truncated/screened operator is used, gives its range
     746             : !> \param ext_dist2d external distribution_2d to be used
     747             : !> \note Based on setup_neighbor_list with added features
     748             : ! **************************************************************************************************
     749         218 :    SUBROUTINE build_xas_tdp_3c_nl(ac_list, basis_a, basis_c, op_type, qs_env, excited_atoms, &
     750             :                                   x_range, ext_dist2d)
     751             : 
     752             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     753             :          POINTER                                         :: ac_list
     754             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_c
     755             :       INTEGER, INTENT(IN)                                :: op_type
     756             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     757             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     758             :       REAL(dp), INTENT(IN), OPTIONAL                     :: x_range
     759             :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     760             : 
     761             :       INTEGER                                            :: ikind, nkind
     762             :       LOGICAL                                            :: sort_atoms
     763             :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, c_present
     764             :       REAL(dp)                                           :: subcells
     765             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, c_radius
     766         218 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     767         218 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     768             :       TYPE(cell_type), POINTER                           :: cell
     769             :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     770             :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     771         218 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     772         218 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     773         218 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     774             : 
     775         218 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     776             : 
     777             : !  Initialization
     778         218 :       CALL get_qs_env(qs_env, nkind=nkind)
     779         218 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     780         218 :       sort_atoms = .FALSE.
     781         218 :       IF ((PRESENT(excited_atoms))) sort_atoms = .TRUE.
     782             : 
     783         872 :       ALLOCATE (a_present(nkind), c_present(nkind))
     784         564 :       a_present = .FALSE.
     785         564 :       c_present = .FALSE.
     786         872 :       ALLOCATE (a_radius(nkind), c_radius(nkind))
     787         564 :       a_radius = 0.0_dp
     788         564 :       c_radius = 0.0_dp
     789             : 
     790             : !  Set up the radii, depending on the operator type
     791         218 :       IF (op_type == do_potential_id) THEN
     792             : 
     793             :          !overlap => use the kind radius for both a and c
     794         352 :          DO ikind = 1, nkind
     795             :             !orbital basis set
     796         214 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     797         214 :                a_present(ikind) = .TRUE.
     798         214 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     799             :             END IF
     800             :             !RI_XAS basis set
     801         352 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     802         214 :                c_present(ikind) = .TRUE.
     803         214 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     804             :             END IF
     805             :          END DO !ikind
     806             : 
     807          80 :       ELSE IF (op_type == do_potential_coulomb) THEN
     808             : 
     809             :          !Coulomb operator, virtually infinite range => set c_radius to arbitrarily large number
     810         164 :          DO ikind = 1, nkind
     811         108 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     812         108 :                c_present(ikind) = .TRUE.
     813         108 :                c_radius(ikind) = 1000000.0_dp
     814             :             END IF
     815         164 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) a_present(ikind) = .TRUE.
     816             :          END DO !ikind
     817             : 
     818          24 :       ELSE IF (op_type == do_potential_truncated .OR. op_type == do_potential_short) THEN
     819             : 
     820             :          !Truncated coulomb/short range: set c_radius to x_range + the kind_radii
     821          48 :          DO ikind = 1, nkind
     822          24 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     823          24 :                a_present(ikind) = .TRUE.
     824          24 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     825             :             END IF
     826          48 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     827          24 :                c_present(ikind) = .TRUE.
     828          24 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     829          24 :                c_radius(ikind) = c_radius(ikind) + x_range
     830             :             END IF
     831             :          END DO !ikind
     832             : 
     833             :       ELSE
     834           0 :          CPABORT("Operator not known")
     835             :       END IF
     836             : 
     837         872 :       ALLOCATE (pair_radius(nkind, nkind))
     838        1198 :       pair_radius = 0.0_dp
     839         218 :       CALL pair_radius_setup(a_present, c_present, a_radius, c_radius, pair_radius)
     840             : 
     841             : !  Actually setup the list
     842             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     843             :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     844         218 :                       particle_set=particle_set, molecule_set=molecule_set)
     845             : 
     846             :       !use an external distribution_2d if required
     847         218 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     848             : 
     849        1000 :       ALLOCATE (atom2d(nkind))
     850             :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     851         218 :                         molecule_set, .FALSE., particle_set)
     852             : 
     853         218 :       IF (sort_atoms) THEN
     854             :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     855             :                                    operator_type="ABC", atomb_to_keep=excited_atoms, &
     856         218 :                                    nlname="XAS_TDP_3c_nl")
     857             :       ELSE
     858             :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     859           0 :                                    operator_type="ABC", nlname="XAS_TDP_3c_nl")
     860             :       END IF
     861             : 
     862             : !  Clean-up
     863         218 :       CALL atom2d_cleanup(atom2d)
     864             : 
     865         436 :    END SUBROUTINE build_xas_tdp_3c_nl
     866             : 
     867             : ! **************************************************************************************************
     868             : !> \brief Returns an optimized distribution_2d for the given neighbor lists based on an evaluation
     869             : !>        of the cost of the corresponding 3-center integrals
     870             : !> \param opt_3c_dist2d the optimized distribution_2d
     871             : !> \param ab_list ...
     872             : !> \param ac_list ...
     873             : !> \param basis_set_a ...
     874             : !> \param basis_set_b ...
     875             : !> \param basis_set_c ...
     876             : !> \param qs_env ...
     877             : !> \param only_bc_same_center ...
     878             : ! **************************************************************************************************
     879          86 :    SUBROUTINE get_opt_3c_dist2d(opt_3c_dist2d, ab_list, ac_list, basis_set_a, basis_set_b, &
     880             :                                 basis_set_c, qs_env, only_bc_same_center)
     881             : 
     882             :       TYPE(distribution_2d_type), POINTER                :: opt_3c_dist2d
     883             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     884             :          POINTER                                         :: ab_list, ac_list
     885             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_a, basis_set_b, basis_set_c
     886             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     887             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     888             : 
     889             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_opt_3c_dist2d'
     890             : 
     891             :       INTEGER                                            :: handle, i, iatom, ikind, ip, jatom, &
     892             :                                                             jkind, kkind, mypcol, myprow, n, &
     893             :                                                             natom, nkind, npcol, nprow, nsgfa, &
     894             :                                                             nsgfb, nsgfc
     895          86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nparticle_local_col, nparticle_local_row
     896          86 :       INTEGER, DIMENSION(:, :), POINTER                  :: col_dist, row_dist
     897             :       LOGICAL                                            :: my_sort_bc
     898             :       REAL(dp)                                           :: cost, rab(3), rac(3), rbc(3)
     899          86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: col_cost, col_proc_cost, row_cost, &
     900          86 :                                                             row_proc_cost
     901          86 :       TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: local_particle_col, local_particle_row
     902             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     903             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     904             :       TYPE(neighbor_list_iterator_p_type), &
     905          86 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     906          86 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     907          86 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     908             : 
     909          86 :       NULLIFY (para_env, col_dist, row_dist, blacs_env, qs_kind_set, particle_set)
     910          86 :       NULLIFY (local_particle_col, local_particle_row, ab_iter, ac_iter)
     911             : 
     912          86 :       CALL timeset(routineN, handle)
     913             : 
     914             :       !Idea: create a,b and a,c nl_iterators in the original dist, then loop over them and compute the
     915             :       !      cost of each ab pairs and project on the col/row. Then distribute the atom col/row to
     916             :       !      the proc col/row in order to spread out the cost as much as possible
     917             : 
     918          86 :       my_sort_bc = .FALSE.
     919          86 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     920             : 
     921             :       CALL get_qs_env(qs_env, natom=natom, para_env=para_env, blacs_env=blacs_env, &
     922          86 :                       qs_kind_set=qs_kind_set, particle_set=particle_set, nkind=nkind)
     923             : 
     924          86 :       myprow = blacs_env%mepos(1) + 1
     925          86 :       mypcol = blacs_env%mepos(2) + 1
     926          86 :       nprow = blacs_env%num_pe(1)
     927          86 :       npcol = blacs_env%num_pe(2)
     928             : 
     929         344 :       ALLOCATE (col_cost(natom), row_cost(natom))
     930        1320 :       col_cost = 0.0_dp; row_cost = 0.0_dp
     931             : 
     932         344 :       ALLOCATE (row_dist(natom, 2), col_dist(natom, 2))
     933        2726 :       row_dist = 1; col_dist = 1
     934             : 
     935          86 :       CALL neighbor_list_iterator_create(ab_iter, ab_list)
     936          86 :       CALL neighbor_list_iterator_create(ac_iter, ac_list, search=.TRUE.)
     937        1165 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     938        1079 :          CALL get_iterator_info(ab_iter, ikind=ikind, jkind=jkind, iatom=iatom, jatom=jatom, r=rab)
     939             : 
     940        2635 :          DO kkind = 1, nkind
     941        1470 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     942             : 
     943        6887 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     944             : 
     945        4338 :                IF (my_sort_bc) THEN
     946             :                   !only take a,b,c if b,c (or a,c because of symmetry) share the same center
     947         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     948        2760 :                   rbc(:) = rac(:) - rab(:)
     949        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     950             : 
     951             :                END IF
     952             : 
     953             :                !Use the size of integral as measure as contraciton cost seems to dominate
     954        4048 :                nsgfa = basis_set_a(ikind)%gto_basis_set%nsgf
     955        4048 :                nsgfb = basis_set_b(jkind)%gto_basis_set%nsgf
     956        4048 :                nsgfc = basis_set_c(kkind)%gto_basis_set%nsgf
     957             : 
     958        4048 :                cost = REAL(nsgfa*nsgfb*nsgfc, dp)
     959             : 
     960        4048 :                row_cost(iatom) = row_cost(iatom) + cost
     961        4338 :                col_cost(jatom) = col_cost(jatom) + cost
     962             : 
     963             :             END DO !ac_iter
     964             :          END DO !kkind
     965             :       END DO !ab_iter
     966          86 :       CALL neighbor_list_iterator_release(ab_iter)
     967          86 :       CALL neighbor_list_iterator_release(ac_iter)
     968             : 
     969          86 :       CALL para_env%sum(row_cost)
     970          86 :       CALL para_env%sum(col_cost)
     971             : 
     972             :       !Distribute the cost as evenly as possible
     973         430 :       ALLOCATE (col_proc_cost(npcol), row_proc_cost(nprow))
     974         430 :       col_proc_cost = 0.0_dp; row_proc_cost = 0.0_dp
     975         660 :       DO i = 1, natom
     976       21340 :          iatom = MAXLOC(row_cost, 1)
     977        2296 :          ip = MINLOC(row_proc_cost, 1)
     978         574 :          row_proc_cost(ip) = row_proc_cost(ip) + row_cost(iatom)
     979         574 :          row_dist(iatom, 1) = ip
     980         574 :          row_cost(iatom) = 0.0_dp
     981             : 
     982       21340 :          iatom = MAXLOC(col_cost, 1)
     983        1722 :          ip = MINLOC(col_proc_cost, 1)
     984         574 :          col_proc_cost(ip) = col_proc_cost(ip) + col_cost(iatom)
     985         574 :          col_dist(iatom, 1) = ip
     986         660 :          col_cost(iatom) = 0.0_dp
     987             :       END DO
     988             : 
     989             :       !the usual stuff
     990         612 :       ALLOCATE (local_particle_col(nkind), local_particle_row(nkind))
     991         344 :       ALLOCATE (nparticle_local_row(nkind), nparticle_local_col(nkind))
     992         440 :       nparticle_local_row = 0; nparticle_local_col = 0
     993             : 
     994         660 :       DO iatom = 1, natom
     995         574 :          ikind = particle_set(iatom)%atomic_kind%kind_number
     996             : 
     997         574 :          IF (row_dist(iatom, 1) == myprow) nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
     998         660 :          IF (col_dist(iatom, 1) == mypcol) nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
     999             :       END DO
    1000             : 
    1001         220 :       DO ikind = 1, nkind
    1002         134 :          n = nparticle_local_row(ikind)
    1003         360 :          ALLOCATE (local_particle_row(ikind)%array(n))
    1004             : 
    1005         134 :          n = nparticle_local_col(ikind)
    1006         488 :          ALLOCATE (local_particle_col(ikind)%array(n))
    1007             :       END DO
    1008             : 
    1009         440 :       nparticle_local_row = 0; nparticle_local_col = 0
    1010         660 :       DO iatom = 1, natom
    1011         574 :          ikind = particle_set(iatom)%atomic_kind%kind_number
    1012             : 
    1013         574 :          IF (row_dist(iatom, 1) == myprow) THEN
    1014         287 :             nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
    1015         287 :             local_particle_row(ikind)%array(nparticle_local_row(ikind)) = iatom
    1016             :          END IF
    1017         660 :          IF (col_dist(iatom, 1) == mypcol) THEN
    1018         574 :             nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
    1019         574 :             local_particle_col(ikind)%array(nparticle_local_col(ikind)) = iatom
    1020             :          END IF
    1021             :       END DO
    1022             : 
    1023             :       !Finally create the dist_2d
    1024         660 :       row_dist(:, 1) = row_dist(:, 1) - 1
    1025         660 :       col_dist(:, 1) = col_dist(:, 1) - 1
    1026             :       CALL distribution_2d_create(opt_3c_dist2d, row_distribution_ptr=row_dist, &
    1027             :                                   col_distribution_ptr=col_dist, local_rows_ptr=local_particle_row, &
    1028          86 :                                   local_cols_ptr=local_particle_col, blacs_env=blacs_env)
    1029             : 
    1030          86 :       CALL timestop(handle)
    1031             : 
    1032         172 :    END SUBROUTINE get_opt_3c_dist2d
    1033             : 
    1034             : ! **************************************************************************************************
    1035             : !> \brief Computes the RI exchange 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1036             : !>        centered on excited atoms and kind. The operator used is that of the RI metric
    1037             : !> \param ex_atoms excited atoms on which the third center is located
    1038             : !> \param xas_tdp_env ...
    1039             : !> \param xas_tdp_control ...
    1040             : !> \param qs_env ...
    1041             : !> \note  This routine is called once for each excited atom. Because there are many different a,b
    1042             : !>        pairs involved, load balance is ok. This allows memory saving
    1043             : ! **************************************************************************************************
    1044          42 :    SUBROUTINE compute_ri_3c_exchange(ex_atoms, xas_tdp_env, xas_tdp_control, qs_env)
    1045             : 
    1046             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: ex_atoms
    1047             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1048             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1049             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1050             : 
    1051             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_exchange'
    1052             : 
    1053             :       INTEGER                                            :: handle, natom, nkind
    1054             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1055             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1056          42 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1057             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1058             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1059          42 :          POINTER                                         :: ab_list, ac_list
    1060          42 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1061          42 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1062             : 
    1063          42 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1064             : 
    1065          42 :       CALL timeset(routineN, handle)
    1066             : 
    1067             : !  Take what we need from the qs_env
    1068             :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1069          42 :                       natom=natom, particle_set=particle_set)
    1070             : 
    1071             : !  Build the basis set lists
    1072         194 :       ALLOCATE (basis_set_ri(nkind))
    1073         194 :       ALLOCATE (basis_set_orb(nkind))
    1074          42 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1075          42 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1076             : 
    1077             : !  Get the optimized distribution 2d for theses integrals (and store it in xas_tdp_env)
    1078          42 :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
    1079             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1080             :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1081          42 :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius)
    1082             : 
    1083             :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_ex, ab_list, ac_list, basis_set_orb, &
    1084          42 :                              basis_set_orb, basis_set_ri, qs_env)
    1085          42 :       CALL release_neighbor_list_sets(ab_list)
    1086          42 :       CALL release_neighbor_list_sets(ac_list)
    1087             : 
    1088             : !  Build the ab and ac centers neighbor lists based on the optimized distribution
    1089             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1090          42 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1091             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1092             :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1093             :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius, &
    1094          42 :                                ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1095             : 
    1096             : !  Allocate, init and compute the integrals.
    1097         210 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1098          42 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
    1099          42 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1100          42 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1101             : 
    1102         420 :       ALLOCATE (xas_tdp_env%ri_3c_ex)
    1103             :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1104          42 :                              blk_size_orb, blk_size_ri)
    1105             :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1106             :                            basis_set_ri, xas_tdp_control%ri_m_potential, qs_env, &
    1107          42 :                            eps_screen=xas_tdp_control%eps_screen)
    1108             : 
    1109             : ! Clean-up
    1110          42 :       CALL release_neighbor_list_sets(ab_list)
    1111          42 :       CALL release_neighbor_list_sets(ac_list)
    1112          42 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1113          42 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1114             : 
    1115             :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1116             :       !timings for other routines
    1117          42 :       CALL para_env%sync()
    1118             : 
    1119          42 :       CALL timestop(handle)
    1120             : 
    1121         126 :    END SUBROUTINE compute_ri_3c_exchange
    1122             : 
    1123             : ! **************************************************************************************************
    1124             : !> \brief Computes the RI Coulomb 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1125             : !>        centered on the excited atoms of xas_tdp_env
    1126             : !> \param xas_tdp_env ...
    1127             : !> \param qs_env ...
    1128             : !> \note  The ri_3c_coul tensor of xas_tdp_env is defined and allocated here. Only computed once
    1129             : !>        for the whole system (for optimized load balance). Ok because not too much memory needed
    1130             : ! **************************************************************************************************
    1131          44 :    SUBROUTINE compute_ri_3c_coulomb(xas_tdp_env, qs_env)
    1132             : 
    1133             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1134             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1135             : 
    1136             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_coulomb'
    1137             : 
    1138             :       INTEGER                                            :: handle, natom, nkind
    1139             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1140             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1141          44 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1142             :       TYPE(libint_potential_type)                        :: pot
    1143             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1144             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1145          44 :          POINTER                                         :: ab_list, ac_list
    1146          44 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1147          44 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1148             : 
    1149          44 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1150             : 
    1151          44 :       CALL timeset(routineN, handle)
    1152             : 
    1153             : !  Take what we need from the qs_env
    1154             :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1155          44 :                       natom=natom, particle_set=particle_set)
    1156             : 
    1157             : !  Build the basis set lists
    1158         198 :       ALLOCATE (basis_set_ri(nkind))
    1159         198 :       ALLOCATE (basis_set_orb(nkind))
    1160          44 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1161          44 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1162             : 
    1163             : !  Get the optimized distribution 2d for these integrals (and store it in xas_tdp_env)
    1164             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1165          44 :                                  excited_atoms=xas_tdp_env%ex_atom_indices)
    1166             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1167          44 :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices)
    1168             :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_coul, ab_list, ac_list, basis_set_orb, &
    1169          44 :                              basis_set_orb, basis_set_ri, qs_env, only_bc_same_center=.TRUE.)
    1170          44 :       CALL release_neighbor_list_sets(ab_list)
    1171          44 :       CALL release_neighbor_list_sets(ac_list)
    1172             : 
    1173             : !  Build a neighbor list for the ab centers. Assume (aI|c) = sum_b c_bI (ab|c), with c_bI only
    1174             : !  non-zero for b centered on the same atom as c => build overlap nl, but only keeping b if centered
    1175             : !  on an excited atom
    1176             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1177             :                                  excited_atoms=xas_tdp_env%ex_atom_indices, &
    1178          44 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1179             : 
    1180             : !  Build a neighbor list for the ac centers. Since we later contract as (aI|c) and we assume I is
    1181             : !  very localized on the same atom as c, we take a,c as neighbors if they overlap
    1182             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1183             :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices, &
    1184          44 :                                ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1185             : 
    1186             : !  Allocate, init and compute the integrals
    1187         220 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1188          44 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_coul, opt_dbcsr_dist)
    1189          44 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1190          44 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1191             :       pot%potential_type = do_potential_coulomb
    1192             : 
    1193         440 :       ALLOCATE (xas_tdp_env%ri_3c_coul)
    1194             :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1195          44 :                              blk_size_orb, blk_size_ri, only_bc_same_center=.TRUE.)
    1196             :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1197          44 :                            basis_set_ri, pot, qs_env, only_bc_same_center=.TRUE.)
    1198             : 
    1199             : ! Clean-up
    1200          44 :       CALL release_neighbor_list_sets(ab_list)
    1201          44 :       CALL release_neighbor_list_sets(ac_list)
    1202          44 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1203          44 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1204             : 
    1205             :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1206             :       !timings for other routines
    1207          44 :       CALL para_env%sync()
    1208             : 
    1209          44 :       CALL timestop(handle)
    1210             : 
    1211         132 :    END SUBROUTINE compute_ri_3c_coulomb
    1212             : 
    1213             : ! **************************************************************************************************
    1214             : !> \brief Computes the two-center Exchange integral needed for the RI in kernel calculation. Stores
    1215             : !>        the integrals in the xas_tdp_env as global (small) arrays. Does that for a given excited
    1216             : !>        kind. The quantity stored is M^-1 (P|Q) M^-1, where M is the RI metric. If the metric is
    1217             : !>        the same as the exchange potential, then we end up with the V-approximation (P|Q)^-1
    1218             : !>        By default (if no metric), the ri_m_potential is a copy of the x_potential
    1219             : !> \param ex_kind ...
    1220             : !> \param xas_tdp_env ...
    1221             : !> \param xas_tdp_control ...
    1222             : !> \param qs_env ...
    1223             : !> \note Computes all these integrals in non-PBCs as we assume that the range is short enough that
    1224             : !>       atoms do not exchange with their periodic images
    1225             : ! **************************************************************************************************
    1226          42 :    SUBROUTINE compute_ri_exchange2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1227             : 
    1228             :       INTEGER, INTENT(IN)                                :: ex_kind
    1229             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1230             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1231             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1232             : 
    1233             :       INTEGER                                            :: egfp, egfq, maxl, ncop, ncoq, nset, &
    1234             :                                                             nsgf, pset, qset, sgfp, sgfq, unit_id
    1235          42 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf_set, nsgf_set
    1236          42 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1237             :       REAL(dp)                                           :: r(3)
    1238          42 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: metric, pq, work
    1239          42 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf, sphi, zet
    1240             :       TYPE(cp_libint_t)                                  :: lib
    1241             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1242             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1243          42 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1244             : 
    1245          42 :       NULLIFY (ri_basis, qs_kind_set, para_env, lmin, lmax, npgf_set, zet, rpgf, first_sgf)
    1246          42 :       NULLIFY (sphi, nsgf_set)
    1247             : 
    1248             : !  Initialization
    1249          42 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
    1250          42 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_ex)) THEN
    1251           4 :          DEALLOCATE (xas_tdp_env%ri_inv_ex)
    1252             :       END IF
    1253             : 
    1254             : !  Get the RI basis of interest and its quantum numbers
    1255          42 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1256             :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf, maxl=maxl, npgf=npgf_set, lmin=lmin, &
    1257             :                              lmax=lmax, zet=zet, pgf_radius=rpgf, first_sgf=first_sgf, &
    1258          42 :                              nsgf_set=nsgf_set, sphi=sphi, nset=nset)
    1259         168 :       ALLOCATE (metric(nsgf, nsgf))
    1260      397238 :       metric = 0.0_dp
    1261             : 
    1262          42 :       r = 0.0_dp
    1263             : 
    1264             :       !init the libint 2-center object
    1265          42 :       CALL cp_libint_init_2eri(lib, maxl)
    1266          42 :       CALL cp_libint_set_contrdepth(lib, 1)
    1267             : 
    1268             :       !make sure the truncted coulomb is initialized
    1269          42 :       IF (xas_tdp_control%ri_m_potential%potential_type == do_potential_truncated) THEN
    1270             : 
    1271           6 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1272           6 :             IF (para_env%mepos == 0) THEN
    1273           3 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%ri_m_potential%filename)
    1274             :             END IF
    1275           6 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1276           6 :             IF (para_env%mepos == 0) THEN
    1277           3 :                CALL close_file(unit_id)
    1278             :             END IF
    1279             :          END IF
    1280             :       END IF
    1281             : 
    1282             : !  Compute the RI metric
    1283         126 :       DO pset = 1, nset
    1284          84 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1285          84 :          sgfp = first_sgf(1, pset)
    1286          84 :          egfp = sgfp + nsgf_set(pset) - 1
    1287             : 
    1288         294 :          DO qset = 1, nset
    1289         168 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1290         168 :             sgfq = first_sgf(1, qset)
    1291         168 :             egfq = sgfq + nsgf_set(qset) - 1
    1292             : 
    1293         672 :             ALLOCATE (work(ncop, ncoq))
    1294     1317508 :             work = 0.0_dp
    1295             : 
    1296             :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1297             :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1298         168 :                              r, 0.0_dp, lib, xas_tdp_control%ri_m_potential)
    1299             : 
    1300             :             CALL ab_contract(metric(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1301         168 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1302             : 
    1303         252 :             DEALLOCATE (work)
    1304             :          END DO !qset
    1305             :       END DO !pset
    1306             : 
    1307             : !  Inverting (to M^-1)
    1308          42 :       CALL invmat_symm(metric)
    1309             : 
    1310          42 :       IF (.NOT. xas_tdp_control%do_ri_metric) THEN
    1311             : 
    1312             :          !If no metric, then x_pot = ri_m_pot and (P|Q)^-1 = M^-1 (V-approximation)
    1313         108 :          ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1314      374540 :          xas_tdp_env%ri_inv_ex(:, :) = metric(:, :)
    1315          36 :          CALL cp_libint_cleanup_2eri(lib)
    1316          36 :          RETURN
    1317             : 
    1318             :       END IF
    1319             : 
    1320             :       !make sure the truncted coulomb is initialized
    1321           6 :       IF (xas_tdp_control%x_potential%potential_type == do_potential_truncated) THEN
    1322             : 
    1323           2 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1324           0 :             IF (para_env%mepos == 0) THEN
    1325           0 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%x_potential%filename)
    1326             :             END IF
    1327           0 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1328           0 :             IF (para_env%mepos == 0) THEN
    1329           0 :                CALL close_file(unit_id)
    1330             :             END IF
    1331             :          END IF
    1332             :       END IF
    1333             : 
    1334             : !  Compute the proper exchange 2-center
    1335          18 :       ALLOCATE (pq(nsgf, nsgf))
    1336       22698 :       pq = 0.0_dp
    1337             : 
    1338          18 :       DO pset = 1, nset
    1339          12 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1340          12 :          sgfp = first_sgf(1, pset)
    1341          12 :          egfp = sgfp + nsgf_set(pset) - 1
    1342             : 
    1343          42 :          DO qset = 1, nset
    1344          24 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1345          24 :             sgfq = first_sgf(1, qset)
    1346          24 :             egfq = sgfq + nsgf_set(qset) - 1
    1347             : 
    1348          96 :             ALLOCATE (work(ncop, ncoq))
    1349       58824 :             work = 0.0_dp
    1350             : 
    1351             :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1352             :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1353          24 :                              r, 0.0_dp, lib, xas_tdp_control%x_potential)
    1354             : 
    1355             :             CALL ab_contract(pq(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1356          24 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1357             : 
    1358          36 :             DEALLOCATE (work)
    1359             :          END DO !qset
    1360             :       END DO !pset
    1361             : 
    1362             : !  Compute and store M^-1 (P|Q) M^-1
    1363          18 :       ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1364       22698 :       xas_tdp_env%ri_inv_ex = 0.0_dp
    1365             : 
    1366             :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, metric, nsgf, pq, nsgf, &
    1367           6 :                  0.0_dp, xas_tdp_env%ri_inv_ex, nsgf)
    1368             :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, xas_tdp_env%ri_inv_ex, nsgf, metric, nsgf, &
    1369           6 :                  0.0_dp, pq, nsgf)
    1370       22698 :       xas_tdp_env%ri_inv_ex(:, :) = pq(:, :)
    1371             : 
    1372           6 :       CALL cp_libint_cleanup_2eri(lib)
    1373             : 
    1374         126 :    END SUBROUTINE compute_ri_exchange2_int
    1375             : 
    1376             : ! **************************************************************************************************
    1377             : !> \brief Computes the two-center Coulomb integral needed for the RI in kernel calculation. Stores
    1378             : !>        the integrals (P|Q)^-1 in the xas_tdp_env as global (small) arrays. Does that for a given
    1379             : !>        excited kind
    1380             : !> \param ex_kind ...
    1381             : !> \param xas_tdp_env ...
    1382             : !> \param xas_tdp_control ...
    1383             : !> \param qs_env ...
    1384             : ! **************************************************************************************************
    1385          52 :    SUBROUTINE compute_ri_coulomb2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1386             : 
    1387             :       INTEGER, INTENT(IN)                                :: ex_kind
    1388             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1389             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1390             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1391             : 
    1392             :       INTEGER                                            :: nsgf
    1393             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1394          52 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1395             : 
    1396          52 :       NULLIFY (ri_basis, qs_kind_set)
    1397             : 
    1398             : !  Initialization
    1399          52 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
    1400          52 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_coul)) THEN
    1401           4 :          DEALLOCATE (xas_tdp_env%ri_inv_coul)
    1402             :       END IF
    1403             : 
    1404             : !  Get the RI basis of interest and its quantum numbers
    1405          52 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1406          52 :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf)
    1407         208 :       ALLOCATE (xas_tdp_env%ri_inv_coul(nsgf, nsgf))
    1408      482860 :       xas_tdp_env%ri_inv_coul = 0.0_dp
    1409             : 
    1410          52 :       IF (.NOT. xas_tdp_control%is_periodic) THEN
    1411             :          CALL int_operators_r12_ab_os(r12_operator=operator_coulomb, vab=xas_tdp_env%ri_inv_coul, &
    1412             :                                       rab=(/0.0_dp, 0.0_dp, 0.0_dp/), fba=ri_basis, fbb=ri_basis, &
    1413          36 :                                       calculate_forces=.FALSE.)
    1414          36 :          CPASSERT(ASSOCIATED(xas_tdp_control))
    1415             :       ELSE
    1416          16 :          CALL periodic_ri_coulomb2(xas_tdp_env%ri_inv_coul, ri_basis, qs_env)
    1417             :       END IF
    1418             : 
    1419             : !  Inverting
    1420          52 :       CALL invmat_symm(xas_tdp_env%ri_inv_coul)
    1421             : 
    1422         104 :    END SUBROUTINE compute_ri_coulomb2_int
    1423             : 
    1424             : ! **************************************************************************************************
    1425             : !> \brief Computes the two-center inverse coulomb integral in the case of PBCs
    1426             : !> \param ri_coul2 ...
    1427             : !> \param ri_basis ...
    1428             : !> \param qs_env ...
    1429             : ! **************************************************************************************************
    1430          16 :    SUBROUTINE periodic_ri_coulomb2(ri_coul2, ri_basis, qs_env)
    1431             : 
    1432             :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: ri_coul2
    1433             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1434             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1435             : 
    1436             :       INTEGER                                            :: maxco, ncop, ncoq, nset, op, oq, ppgf, &
    1437             :                                                             pset, qpgf, qset, sgfp, sgfq
    1438          32 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf
    1439          16 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1440             :       REAL(dp)                                           :: r(3)
    1441          16 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: hpq
    1442          16 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sphi, zet
    1443             :       TYPE(cell_type), POINTER                           :: cell
    1444         416 :       TYPE(cp_eri_mme_param)                             :: mme_param
    1445             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1446          16 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1447             : 
    1448          16 :       NULLIFY (cell, qs_kind_set, lmin, lmax, nsgf, npgf, zet, sphi, first_sgf)
    1449             : 
    1450             :       ! Use eri_mme for this. Don't want to annoy the user with a full input section just for this
    1451             :       ! tiny bit => initialize our own eri_mme section with the defaults
    1452             : 
    1453          16 :       CALL get_qs_env(qs_env, cell=cell, qs_kind_set=qs_kind_set, para_env=para_env)
    1454             : 
    1455             :       CALL eri_mme_init(mme_param%par, n_minimax=20, cutoff=300.0_dp, do_calib_cutoff=.TRUE., &
    1456             :                         cutoff_min=10.0_dp, cutoff_max=10000.0_dp, cutoff_eps=0.01_dp, &
    1457             :                         cutoff_delta=0.9_dp, sum_precision=1.0E-12_dp, debug=.FALSE., &
    1458             :                         debug_delta=1.0E-6_dp, debug_nsum=1000000, unit_nr=0, print_calib=.FALSE., &
    1459          16 :                         do_error_est=.FALSE.)
    1460          16 :       mme_param%do_calib = .TRUE.
    1461             : 
    1462          16 :       CALL cp_eri_mme_set_params(mme_param, cell, qs_kind_set, basis_type_1="RI_XAS", para_env=para_env)
    1463             : 
    1464             :       CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, lmin=lmin, nset=nset, &
    1465          16 :                              nsgf_set=nsgf, sphi=sphi, first_sgf=first_sgf, maxco=maxco)
    1466             : 
    1467          16 :       r = 0.0_dp
    1468          64 :       ALLOCATE (hpq(nset*maxco, nset*maxco))
    1469      161616 :       hpq = 0.0_dp
    1470             : 
    1471          48 :       DO pset = 1, nset
    1472          32 :          ncop = npgf(pset)*ncoset(lmax(pset))
    1473          32 :          sgfp = first_sgf(1, pset)
    1474             : 
    1475         112 :          DO qset = 1, nset
    1476          64 :             ncoq = npgf(qset)*ncoset(lmax(qset))
    1477          64 :             sgfq = first_sgf(1, qset)
    1478             : 
    1479         608 :             DO ppgf = 1, npgf(pset)
    1480         544 :                op = (pset - 1)*maxco + (ppgf - 1)*ncoset(lmax(pset))
    1481        5232 :                DO qpgf = 1, npgf(qset)
    1482        4624 :                   oq = (qset - 1)*maxco + (qpgf - 1)*ncoset(lmax(qset))
    1483             : 
    1484             :                   CALL eri_mme_2c_integrate(mme_param%par, lmin(pset), lmax(pset), lmin(qset), &
    1485             :                                             lmax(qset), zet(ppgf, pset), zet(qpgf, qset), r, hpq, &
    1486        5168 :                                             op, oq)
    1487             : 
    1488             :                END DO !qpgf
    1489             :             END DO ! ppgf
    1490             : 
    1491             :             !contraction into sgfs
    1492          64 :             op = (pset - 1)*maxco + 1
    1493          64 :             oq = (qset - 1)*maxco + 1
    1494             : 
    1495             :             CALL ab_contract(ri_coul2(sgfp:sgfp + nsgf(pset) - 1, sgfq:sgfq + nsgf(qset) - 1), &
    1496             :                              hpq(op:op + ncop - 1, oq:oq + ncoq - 1), sphi(:, sgfp:), sphi(:, sgfq:), &
    1497          96 :                              ncop, ncoq, nsgf(pset), nsgf(qset))
    1498             : 
    1499             :          END DO !qset
    1500             :       END DO !pset
    1501             : 
    1502             :       !celan-up
    1503          16 :       CALL eri_mme_release(mme_param%par)
    1504             : 
    1505          64 :    END SUBROUTINE periodic_ri_coulomb2
    1506             : 
    1507             : END MODULE xas_tdp_integrals

Generated by: LCOV version 1.15