LCOV - code coverage report
Current view: top level - src - qs_tensors.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 1093 1152 94.9 %
Date: 2024-11-22 07:00:40 Functions: 19 21 90.5 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Utility methods to build 3-center integral tensors of various types.
      10             : ! **************************************************************************************************
      11             : MODULE qs_tensors
      12             :    USE OMP_LIB,                         ONLY: omp_get_num_threads,&
      13             :                                               omp_get_thread_num
      14             :    USE ai_contraction,                  ONLY: block_add
      15             :    USE ai_contraction_sphi,             ONLY: ab_contract,&
      16             :                                               abc_contract_xsmm
      17             :    USE atomic_kind_types,               ONLY: atomic_kind_type
      18             :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      19             :                                               gto_basis_set_p_type,&
      20             :                                               gto_basis_set_type
      21             :    USE block_p_types,                   ONLY: block_p_type
      22             :    USE cell_types,                      ONLY: cell_type,&
      23             :                                               real_to_scaled
      24             :    USE cp_array_utils,                  ONLY: cp_2d_r_p_type
      25             :    USE cp_control_types,                ONLY: dft_control_type
      26             :    USE cp_dbcsr_api,                    ONLY: dbcsr_filter,&
      27             :                                               dbcsr_finalize,&
      28             :                                               dbcsr_get_block_p,&
      29             :                                               dbcsr_get_matrix_type,&
      30             :                                               dbcsr_has_symmetry,&
      31             :                                               dbcsr_type,&
      32             :                                               dbcsr_type_antisymmetric,&
      33             :                                               dbcsr_type_no_symmetry
      34             :    USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_alloc_block_from_nbl
      35             :    USE cp_files,                        ONLY: close_file,&
      36             :                                               open_file
      37             :    USE dbt_api,                         ONLY: &
      38             :         dbt_blk_sizes, dbt_clear, dbt_copy, dbt_create, dbt_destroy, dbt_filter, dbt_get_block, &
      39             :         dbt_get_info, dbt_get_num_blocks, dbt_get_nze_total, dbt_iterator_next_block, &
      40             :         dbt_iterator_num_blocks, dbt_iterator_start, dbt_iterator_stop, dbt_iterator_type, &
      41             :         dbt_ndims, dbt_put_block, dbt_reserve_blocks, dbt_type
      42             :    USE distribution_1d_types,           ONLY: distribution_1d_type
      43             :    USE distribution_2d_types,           ONLY: distribution_2d_type
      44             :    USE gamma,                           ONLY: init_md_ftable
      45             :    USE hfx_compression_methods,         ONLY: hfx_add_mult_cache_elements,&
      46             :                                               hfx_add_single_cache_element,&
      47             :                                               hfx_decompress_first_cache,&
      48             :                                               hfx_flush_last_cache,&
      49             :                                               hfx_get_mult_cache_elements,&
      50             :                                               hfx_get_single_cache_element,&
      51             :                                               hfx_reset_cache_and_container
      52             :    USE hfx_types,                       ONLY: alloc_containers,&
      53             :                                               dealloc_containers,&
      54             :                                               hfx_cache_type,&
      55             :                                               hfx_compression_type,&
      56             :                                               hfx_container_type,&
      57             :                                               hfx_init_container
      58             :    USE input_constants,                 ONLY: do_potential_coulomb,&
      59             :                                               do_potential_id,&
      60             :                                               do_potential_short,&
      61             :                                               do_potential_truncated
      62             :    USE input_section_types,             ONLY: section_vals_val_get
      63             :    USE kinds,                           ONLY: dp,&
      64             :                                               int_8
      65             :    USE kpoint_types,                    ONLY: get_kpoint_info,&
      66             :                                               kpoint_type
      67             :    USE libint_2c_3c,                    ONLY: cutoff_screen_factor,&
      68             :                                               eri_2center,&
      69             :                                               eri_2center_derivs,&
      70             :                                               eri_3center,&
      71             :                                               eri_3center_derivs,&
      72             :                                               libint_potential_type
      73             :    USE libint_wrapper,                  ONLY: &
      74             :         cp_libint_cleanup_2eri, cp_libint_cleanup_2eri1, cp_libint_cleanup_3eri, &
      75             :         cp_libint_cleanup_3eri1, cp_libint_init_2eri, cp_libint_init_2eri1, cp_libint_init_3eri, &
      76             :         cp_libint_init_3eri1, cp_libint_set_contrdepth, cp_libint_t
      77             :    USE message_passing,                 ONLY: mp_para_env_type
      78             :    USE molecule_types,                  ONLY: molecule_type
      79             :    USE orbital_pointers,                ONLY: ncoset
      80             :    USE particle_types,                  ONLY: particle_type
      81             :    USE qs_environment_types,            ONLY: get_qs_env,&
      82             :                                               qs_environment_type
      83             :    USE qs_kind_types,                   ONLY: qs_kind_type
      84             :    USE qs_neighbor_list_types,          ONLY: &
      85             :         get_iterator_info, get_neighbor_list_set_p, neighbor_list_iterate, &
      86             :         neighbor_list_iterator_create, neighbor_list_iterator_p_type, &
      87             :         neighbor_list_iterator_release, neighbor_list_set_p_type, nl_sub_iterate, &
      88             :         release_neighbor_list_sets
      89             :    USE qs_neighbor_lists,               ONLY: atom2d_build,&
      90             :                                               atom2d_cleanup,&
      91             :                                               build_neighbor_lists,&
      92             :                                               local_atoms_type,&
      93             :                                               pair_radius_setup
      94             :    USE qs_tensors_types,                ONLY: &
      95             :         distribution_3d_destroy, distribution_3d_type, neighbor_list_3c_iterator_type, &
      96             :         neighbor_list_3c_type, symmetric_ij, symmetric_ijk, symmetric_jk, symmetric_none, &
      97             :         symmetrik_ik
      98             :    USE t_c_g0,                          ONLY: get_lmax_init,&
      99             :                                               init
     100             :    USE util,                            ONLY: get_limit
     101             : 
     102             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
     103             : #include "./base/base_uses.f90"
     104             : 
     105             :    IMPLICIT NONE
     106             : 
     107             :    PRIVATE
     108             : 
     109             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_tensors'
     110             : 
     111             :    PUBLIC :: build_3c_neighbor_lists, &
     112             :              neighbor_list_3c_destroy, neighbor_list_3c_iterate, neighbor_list_3c_iterator_create, &
     113             :              neighbor_list_3c_iterator_destroy, get_3c_iterator_info, build_3c_integrals, &
     114             :              build_2c_neighbor_lists, build_2c_integrals, cutoff_screen_factor, &
     115             :              get_tensor_occupancy, compress_tensor, decompress_tensor, &
     116             :              build_3c_derivatives, build_2c_derivatives, calc_2c_virial, calc_3c_virial
     117             : 
     118             :    TYPE one_dim_int_array
     119             :       INTEGER, DIMENSION(:), ALLOCATABLE    :: array
     120             :    END TYPE
     121             : 
     122             :    ! cache size for integral compression
     123             :    INTEGER, PARAMETER, PRIVATE :: cache_size = 1024
     124             : 
     125             : CONTAINS
     126             : 
     127             : ! **************************************************************************************************
     128             : !> \brief Build 2-center neighborlists adapted to different operators
     129             : !>        This mainly wraps build_neighbor_lists for consistency with build_3c_neighbor_lists
     130             : !> \param ij_list 2c neighbor list for atom pairs i, j
     131             : !> \param basis_i basis object for atoms i
     132             : !> \param basis_j basis object for atoms j
     133             : !> \param potential_parameter ...
     134             : !> \param name name of 2c neighbor list
     135             : !> \param qs_env ...
     136             : !> \param sym_ij Symmetry in i, j (default .TRUE.)
     137             : !> \param molecular ...
     138             : !> \param dist_2d optionally a custom 2d distribution
     139             : !> \param pot_to_rad which radius (1 for i, 2 for j) should be adapted to incorporate potential
     140             : ! **************************************************************************************************
     141        9978 :    SUBROUTINE build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, &
     142             :                                       sym_ij, molecular, dist_2d, pot_to_rad)
     143             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     144             :          POINTER                                         :: ij_list
     145             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j
     146             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
     147             :       CHARACTER(LEN=*), INTENT(IN)                       :: name
     148             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     149             :       LOGICAL, INTENT(IN), OPTIONAL                      :: sym_ij, molecular
     150             :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: dist_2d
     151             :       INTEGER, INTENT(IN), OPTIONAL                      :: pot_to_rad
     152             : 
     153             :       INTEGER                                            :: ikind, nkind, pot_to_rad_prv
     154             :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: i_present, j_present
     155        9978 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     156             :       REAL(kind=dp)                                      :: subcells
     157             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: i_radius, j_radius
     158        9978 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     159             :       TYPE(cell_type), POINTER                           :: cell
     160             :       TYPE(distribution_1d_type), POINTER                :: local_particles
     161             :       TYPE(distribution_2d_type), POINTER                :: dist_2d_prv
     162        9978 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     163        9978 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     164        9978 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     165             : 
     166        9978 :       NULLIFY (atomic_kind_set, cell, local_particles, molecule_set, &
     167        9978 :                particle_set, dist_2d_prv)
     168             : 
     169        9978 :       IF (PRESENT(pot_to_rad)) THEN
     170        1348 :          pot_to_rad_prv = pot_to_rad
     171             :       ELSE
     172             :          pot_to_rad_prv = 1
     173             :       END IF
     174             : 
     175             :       CALL get_qs_env(qs_env, &
     176             :                       nkind=nkind, &
     177             :                       cell=cell, &
     178             :                       particle_set=particle_set, &
     179             :                       atomic_kind_set=atomic_kind_set, &
     180             :                       local_particles=local_particles, &
     181             :                       distribution_2d=dist_2d_prv, &
     182        9978 :                       molecule_set=molecule_set)
     183             : 
     184        9978 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     185             : 
     186       39912 :       ALLOCATE (i_present(nkind), j_present(nkind))
     187       39912 :       ALLOCATE (i_radius(nkind), j_radius(nkind))
     188             : 
     189       25899 :       i_present = .FALSE.
     190       25899 :       j_present = .FALSE.
     191       25899 :       i_radius = 0.0_dp
     192       25899 :       j_radius = 0.0_dp
     193             : 
     194        9978 :       IF (PRESENT(dist_2d)) dist_2d_prv => dist_2d
     195             : 
     196             :       !  Set up the radii, depending on the operator type
     197        9978 :       IF (potential_parameter%potential_type == do_potential_id) THEN
     198             : 
     199             :          !overlap => use the kind radius for both i and j
     200       21297 :          DO ikind = 1, nkind
     201       13101 :             IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
     202       13101 :                i_present(ikind) = .TRUE.
     203       13101 :                CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=i_radius(ikind))
     204             :             END IF
     205       21297 :             IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
     206       13101 :                j_present(ikind) = .TRUE.
     207       13101 :                CALL get_gto_basis_set(basis_j(ikind)%gto_basis_set, kind_radius=j_radius(ikind))
     208             :             END IF
     209             :          END DO
     210             : 
     211        1782 :       ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
     212             : 
     213             :          !Coulomb operator, virtually infinite range => set j_radius to arbitrarily large number
     214        1110 :          DO ikind = 1, nkind
     215         736 :             IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
     216         736 :                i_present(ikind) = .TRUE.
     217         736 :                IF (pot_to_rad_prv == 1) i_radius(ikind) = 1000000.0_dp
     218             :             END IF
     219        1110 :             IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
     220         736 :                j_present(ikind) = .TRUE.
     221         736 :                IF (pot_to_rad_prv == 2) j_radius(ikind) = 1000000.0_dp
     222             :             END IF
     223             :          END DO !ikind
     224             : 
     225        1408 :       ELSE IF (potential_parameter%potential_type == do_potential_truncated .OR. &
     226             :                potential_parameter%potential_type == do_potential_short) THEN
     227             : 
     228             :          !Truncated coulomb/short range: set j_radius to r_cutoff + the kind_radii
     229        3492 :          DO ikind = 1, nkind
     230        2084 :             IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
     231        2084 :                i_present(ikind) = .TRUE.
     232        2084 :                CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=i_radius(ikind))
     233        2084 :                IF (pot_to_rad_prv == 1) i_radius(ikind) = i_radius(ikind) + cutoff_screen_factor*potential_parameter%cutoff_radius
     234             :             END IF
     235        3492 :             IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
     236        2084 :                j_present(ikind) = .TRUE.
     237        2084 :                CALL get_gto_basis_set(basis_j(ikind)%gto_basis_set, kind_radius=j_radius(ikind))
     238        2084 :                IF (pot_to_rad_prv == 2) j_radius(ikind) = j_radius(ikind) + cutoff_screen_factor*potential_parameter%cutoff_radius
     239             :             END IF
     240             :          END DO
     241             : 
     242             :       ELSE
     243           0 :          CPABORT("Operator not implemented.")
     244             :       END IF
     245             : 
     246       39912 :       ALLOCATE (pair_radius(nkind, nkind))
     247       53742 :       pair_radius = 0.0_dp
     248        9978 :       CALL pair_radius_setup(i_present, j_present, i_radius, j_radius, pair_radius)
     249             : 
     250       45855 :       ALLOCATE (atom2d(nkind))
     251             : 
     252             :       CALL atom2d_build(atom2d, local_particles, dist_2d_prv, atomic_kind_set, &
     253        9978 :                         molecule_set, molecule_only=.FALSE., particle_set=particle_set)
     254             :       CALL build_neighbor_lists(ij_list, particle_set, atom2d, cell, pair_radius, subcells, &
     255        9978 :                                 symmetric=sym_ij, molecular=molecular, nlname=TRIM(name))
     256             : 
     257        9978 :       CALL atom2d_cleanup(atom2d)
     258             : 
     259       29934 :    END SUBROUTINE
     260             : 
     261             : ! **************************************************************************************************
     262             : !> \brief Build a 3-center neighbor list
     263             : !> \param ijk_list 3c neighbor list for atom triples i, j, k
     264             : !> \param basis_i basis object for atoms i
     265             : !> \param basis_j basis object for atoms j
     266             : !> \param basis_k basis object for atoms k
     267             : !> \param dist_3d 3d distribution object
     268             : !> \param potential_parameter ...
     269             : !> \param name name of 3c neighbor list
     270             : !> \param qs_env ...
     271             : !> \param sym_ij Symmetry in i, j (default .FALSE.)
     272             : !> \param sym_jk Symmetry in j, k (default .FALSE.)
     273             : !> \param sym_ik Symmetry in i, k (default .FALSE.)
     274             : !> \param molecular ??? not tested
     275             : !> \param op_pos ...
     276             : !> \param own_dist ...
     277             : ! **************************************************************************************************
     278         674 :    SUBROUTINE build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, &
     279             :                                       dist_3d, potential_parameter, name, qs_env, &
     280             :                                       sym_ij, sym_jk, sym_ik, molecular, op_pos, &
     281             :                                       own_dist)
     282             :       TYPE(neighbor_list_3c_type), INTENT(OUT)           :: ijk_list
     283             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j, basis_k
     284             :       TYPE(distribution_3d_type), INTENT(IN)             :: dist_3d
     285             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
     286             :       CHARACTER(LEN=*), INTENT(IN)                       :: name
     287             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     288             :       LOGICAL, INTENT(IN), OPTIONAL                      :: sym_ij, sym_jk, sym_ik, molecular
     289             :       INTEGER, INTENT(IN), OPTIONAL                      :: op_pos
     290             :       LOGICAL, INTENT(IN), OPTIONAL                      :: own_dist
     291             : 
     292             :       CHARACTER(len=*), PARAMETER :: routineN = 'build_3c_neighbor_lists'
     293             : 
     294             :       INTEGER                                            :: handle, op_pos_prv, sym_level
     295             :       TYPE(libint_potential_type)                        :: pot_par_1, pot_par_2
     296             : 
     297         674 :       CALL timeset(routineN, handle)
     298             : 
     299         674 :       IF (PRESENT(op_pos)) THEN
     300         478 :          op_pos_prv = op_pos
     301             :       ELSE
     302             :          op_pos_prv = 1
     303             :       END IF
     304             : 
     305         492 :       SELECT CASE (op_pos_prv)
     306             :       CASE (1)
     307         492 :          pot_par_1 = potential_parameter
     308         492 :          pot_par_2%potential_type = do_potential_id
     309             :       CASE (2)
     310         182 :          pot_par_2 = potential_parameter
     311         478 :          pot_par_1%potential_type = do_potential_id
     312             :       END SELECT
     313             : 
     314             :       CALL build_2c_neighbor_lists(ijk_list%ij_list, basis_i, basis_j, pot_par_1, TRIM(name)//"_sub_1", &
     315             :                                    qs_env, sym_ij=.FALSE., molecular=molecular, &
     316         674 :                                    dist_2d=dist_3d%dist_2d_1, pot_to_rad=1)
     317             : 
     318             :       CALL build_2c_neighbor_lists(ijk_list%jk_list, basis_j, basis_k, pot_par_2, TRIM(name)//"_sub_2", &
     319             :                                    qs_env, sym_ij=.FALSE., molecular=molecular, &
     320         674 :                                    dist_2d=dist_3d%dist_2d_2, pot_to_rad=2)
     321             : 
     322         674 :       ijk_list%sym = symmetric_none
     323             : 
     324         674 :       sym_level = 0
     325         674 :       IF (PRESENT(sym_ij)) THEN
     326         140 :          IF (sym_ij) THEN
     327           0 :             ijk_list%sym = symmetric_ij
     328           0 :             sym_level = sym_level + 1
     329             :          END IF
     330             :       END IF
     331             : 
     332         674 :       IF (PRESENT(sym_jk)) THEN
     333         472 :          IF (sym_jk) THEN
     334         418 :             ijk_list%sym = symmetric_jk
     335         418 :             sym_level = sym_level + 1
     336             :          END IF
     337             :       END IF
     338             : 
     339         674 :       IF (PRESENT(sym_ik)) THEN
     340           0 :          IF (sym_ik) THEN
     341           0 :             ijk_list%sym = symmetrik_ik
     342           0 :             sym_level = sym_level + 1
     343             :          END IF
     344             :       END IF
     345             : 
     346         674 :       IF (sym_level >= 2) THEN
     347           0 :          ijk_list%sym = symmetric_ijk
     348             :       END IF
     349             : 
     350         674 :       ijk_list%dist_3d = dist_3d
     351         674 :       IF (PRESENT(own_dist)) THEN
     352         674 :          ijk_list%owns_dist = own_dist
     353             :       ELSE
     354           0 :          ijk_list%owns_dist = .FALSE.
     355             :       END IF
     356             : 
     357         674 :       CALL timestop(handle)
     358         674 :    END SUBROUTINE
     359             : 
     360             : ! **************************************************************************************************
     361             : !> \brief Symmetry criterion
     362             : !> \param a ...
     363             : !> \param b ...
     364             : !> \return ...
     365             : ! **************************************************************************************************
     366     3048714 :    PURE FUNCTION include_symmetric(a, b)
     367             :       INTEGER, INTENT(IN)                                :: a, b
     368             :       LOGICAL                                            :: include_symmetric
     369             : 
     370     3048714 :       IF (a > b) THEN
     371     1171848 :          include_symmetric = (MODULO(a + b, 2) /= 0)
     372             :       ELSE
     373     1876866 :          include_symmetric = (MODULO(a + b, 2) == 0)
     374             :       END IF
     375             : 
     376     3048714 :    END FUNCTION
     377             : 
     378             : ! **************************************************************************************************
     379             : !> \brief Destroy 3c neighborlist
     380             : !> \param ijk_list ...
     381             : ! **************************************************************************************************
     382         674 :    SUBROUTINE neighbor_list_3c_destroy(ijk_list)
     383             :       TYPE(neighbor_list_3c_type), INTENT(INOUT)         :: ijk_list
     384             : 
     385         674 :       CALL release_neighbor_list_sets(ijk_list%ij_list)
     386         674 :       CALL release_neighbor_list_sets(ijk_list%jk_list)
     387             : 
     388         674 :       IF (ijk_list%owns_dist) THEN
     389         674 :          CALL distribution_3d_destroy(ijk_list%dist_3d)
     390             :       END IF
     391             : 
     392         674 :    END SUBROUTINE
     393             : 
     394             : ! **************************************************************************************************
     395             : !> \brief Create a 3-center neighborlist iterator
     396             : !> \param iterator ...
     397             : !> \param ijk_nl ...
     398             : ! **************************************************************************************************
     399       66114 :    SUBROUTINE neighbor_list_3c_iterator_create(iterator, ijk_nl)
     400             :       TYPE(neighbor_list_3c_iterator_type), INTENT(OUT)  :: iterator
     401             :       TYPE(neighbor_list_3c_type), INTENT(IN)            :: ijk_nl
     402             : 
     403             :       CHARACTER(len=*), PARAMETER :: routineN = 'neighbor_list_3c_iterator_create'
     404             : 
     405             :       INTEGER                                            :: handle
     406             : 
     407        7346 :       CALL timeset(routineN, handle)
     408             : 
     409        7346 :       CALL neighbor_list_iterator_create(iterator%iter_ij, ijk_nl%ij_list)
     410        7346 :       CALL neighbor_list_iterator_create(iterator%iter_jk, ijk_nl%jk_list, search=.TRUE.)
     411             : 
     412        7346 :       iterator%iter_level = 0
     413        7346 :       iterator%ijk_nl = ijk_nl
     414             : 
     415       22038 :       iterator%bounds_i = 0
     416       22038 :       iterator%bounds_j = 0
     417       22038 :       iterator%bounds_k = 0
     418             : 
     419        7346 :       CALL timestop(handle)
     420        7346 :    END SUBROUTINE
     421             : 
     422             : ! **************************************************************************************************
     423             : !> \brief impose atomic upper and lower bounds
     424             : !> \param iterator ...
     425             : !> \param bounds_i ...
     426             : !> \param bounds_j ...
     427             : !> \param bounds_k ...
     428             : ! **************************************************************************************************
     429        7276 :    SUBROUTINE nl_3c_iter_set_bounds(iterator, bounds_i, bounds_j, bounds_k)
     430             :       TYPE(neighbor_list_3c_iterator_type), &
     431             :          INTENT(INOUT)                                   :: iterator
     432             :       INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL        :: bounds_i, bounds_j, bounds_k
     433             : 
     434       23200 :       IF (PRESENT(bounds_i)) iterator%bounds_i = bounds_i
     435       13324 :       IF (PRESENT(bounds_j)) iterator%bounds_j = bounds_j
     436       16942 :       IF (PRESENT(bounds_k)) iterator%bounds_k = bounds_k
     437             : 
     438        7276 :    END SUBROUTINE
     439             : 
     440             : ! **************************************************************************************************
     441             : !> \brief Destroy 3c-nl iterator
     442             : !> \param iterator ...
     443             : ! **************************************************************************************************
     444        7346 :    SUBROUTINE neighbor_list_3c_iterator_destroy(iterator)
     445             :       TYPE(neighbor_list_3c_iterator_type), &
     446             :          INTENT(INOUT)                                   :: iterator
     447             : 
     448             :       CHARACTER(len=*), PARAMETER :: routineN = 'neighbor_list_3c_iterator_destroy'
     449             : 
     450             :       INTEGER                                            :: handle
     451             : 
     452        7346 :       CALL timeset(routineN, handle)
     453        7346 :       CALL neighbor_list_iterator_release(iterator%iter_ij)
     454        7346 :       CALL neighbor_list_iterator_release(iterator%iter_jk)
     455        7346 :       NULLIFY (iterator%iter_ij)
     456        7346 :       NULLIFY (iterator%iter_jk)
     457             : 
     458        7346 :       CALL timestop(handle)
     459        7346 :    END SUBROUTINE
     460             : 
     461             : ! **************************************************************************************************
     462             : !> \brief Iterate 3c-nl iterator
     463             : !> \param iterator ...
     464             : !> \return 0 if successful; 1 if end was reached
     465             : ! **************************************************************************************************
     466     4538971 :    RECURSIVE FUNCTION neighbor_list_3c_iterate(iterator) RESULT(iter_stat)
     467             :       TYPE(neighbor_list_3c_iterator_type), &
     468             :          INTENT(INOUT)                                   :: iterator
     469             :       INTEGER                                            :: iter_stat
     470             : 
     471             :       INTEGER                                            :: iatom, iter_level, jatom, jatom_1, &
     472             :                                                             jatom_2, katom
     473             :       LOGICAL                                            :: skip_this
     474             : 
     475     4538971 :       iter_level = iterator%iter_level
     476             : 
     477     4538971 :       IF (iter_level == 0) THEN
     478      174231 :          iter_stat = neighbor_list_iterate(iterator%iter_ij)
     479             : 
     480      174231 :          IF (iter_stat /= 0) THEN
     481             :             RETURN
     482             :          END IF
     483             : 
     484      166885 :          CALL get_iterator_info(iterator%iter_ij, iatom=iatom, jatom=jatom)
     485      166885 :          skip_this = .FALSE.
     486             :          IF ((iterator%bounds_i(1) > 0 .AND. iatom < iterator%bounds_i(1)) &
     487      166885 :              .OR. (iterator%bounds_i(2) > 0 .AND. iatom > iterator%bounds_i(2))) skip_this = .TRUE.
     488             :          IF ((iterator%bounds_j(1) > 0 .AND. jatom < iterator%bounds_j(1)) &
     489      166885 :              .OR. (iterator%bounds_j(2) > 0 .AND. jatom > iterator%bounds_j(2))) skip_this = .TRUE.
     490             : 
     491      166885 :          IF (skip_this) THEN
     492       70662 :             iter_stat = neighbor_list_3c_iterate(iterator)
     493       70662 :             RETURN
     494             :          END IF
     495             : 
     496             :       END IF
     497     4460963 :       iter_stat = nl_sub_iterate(iterator%iter_jk, iterator%iter_ij)
     498     4460963 :       IF (iter_stat /= 0) THEN
     499       96223 :          iterator%iter_level = 0
     500       96223 :          iter_stat = neighbor_list_3c_iterate(iterator)
     501       96223 :          RETURN
     502             :       ELSE
     503     4364740 :          iterator%iter_level = 1
     504             :       END IF
     505             : 
     506             :       CPASSERT(iter_stat == 0)
     507             :       CPASSERT(iterator%iter_level == 1)
     508     4364740 :       CALL get_iterator_info(iterator%iter_ij, iatom=iatom, jatom=jatom_1)
     509     4364740 :       CALL get_iterator_info(iterator%iter_jk, iatom=jatom_2, jatom=katom)
     510             : 
     511     4364740 :       CPASSERT(jatom_1 == jatom_2)
     512     4364740 :       jatom = jatom_1
     513             : 
     514     4364740 :       skip_this = .FALSE.
     515             :       IF ((iterator%bounds_k(1) > 0 .AND. katom < iterator%bounds_k(1)) &
     516     4364740 :           .OR. (iterator%bounds_k(2) > 0 .AND. katom > iterator%bounds_k(2))) skip_this = .TRUE.
     517             : 
     518             :       IF (skip_this) THEN
     519      240840 :          iter_stat = neighbor_list_3c_iterate(iterator)
     520      240840 :          RETURN
     521             :       END IF
     522             : 
     523     4123900 :       SELECT CASE (iterator%ijk_nl%sym)
     524             :       CASE (symmetric_none)
     525           0 :          skip_this = .FALSE.
     526             :       CASE (symmetric_ij)
     527           0 :          skip_this = .NOT. include_symmetric(iatom, jatom)
     528             :       CASE (symmetric_jk)
     529     3048714 :          skip_this = .NOT. include_symmetric(jatom, katom)
     530             :       CASE (symmetrik_ik)
     531           0 :          skip_this = .NOT. include_symmetric(iatom, katom)
     532             :       CASE (symmetric_ijk)
     533           0 :          skip_this = .NOT. include_symmetric(iatom, jatom) .OR. .NOT. include_symmetric(jatom, katom)
     534             :       CASE DEFAULT
     535     4123900 :          CPABORT("should not happen")
     536             :       END SELECT
     537             : 
     538     3048714 :       IF (skip_this) THEN
     539     1173987 :          iter_stat = neighbor_list_3c_iterate(iterator)
     540     1173987 :          RETURN
     541             :       END IF
     542             : 
     543             :    END FUNCTION
     544             : 
     545             : ! **************************************************************************************************
     546             : !> \brief Get info of current iteration
     547             : !> \param iterator ...
     548             : !> \param ikind ...
     549             : !> \param jkind ...
     550             : !> \param kkind ...
     551             : !> \param nkind ...
     552             : !> \param iatom ...
     553             : !> \param jatom ...
     554             : !> \param katom ...
     555             : !> \param rij ...
     556             : !> \param rjk ...
     557             : !> \param rik ...
     558             : !> \param cell_j ...
     559             : !> \param cell_k ...
     560             : !> \return ...
     561             : ! **************************************************************************************************
     562     2949913 :    SUBROUTINE get_3c_iterator_info(iterator, ikind, jkind, kkind, nkind, iatom, jatom, katom, &
     563             :                                    rij, rjk, rik, cell_j, cell_k)
     564             :       TYPE(neighbor_list_3c_iterator_type), &
     565             :          INTENT(INOUT)                                   :: iterator
     566             :       INTEGER, INTENT(OUT), OPTIONAL                     :: ikind, jkind, kkind, nkind, iatom, &
     567             :                                                             jatom, katom
     568             :       REAL(KIND=dp), DIMENSION(3), INTENT(OUT), OPTIONAL :: rij, rjk, rik
     569             :       INTEGER, DIMENSION(3), INTENT(OUT), OPTIONAL       :: cell_j, cell_k
     570             : 
     571             :       INTEGER, DIMENSION(2)                              :: atoms_1, atoms_2, kinds_1, kinds_2
     572             :       INTEGER, DIMENSION(3)                              :: cell_1, cell_2
     573             :       REAL(KIND=dp), DIMENSION(3)                        :: r_1, r_2
     574             : 
     575     2949913 :       CPASSERT(iterator%iter_level == 1)
     576             : 
     577             :       CALL get_iterator_info(iterator%iter_ij, &
     578             :                              ikind=kinds_1(1), jkind=kinds_1(2), nkind=nkind, &
     579             :                              iatom=atoms_1(1), jatom=atoms_1(2), r=r_1, &
     580     2949913 :                              cell=cell_1)
     581             : 
     582             :       CALL get_iterator_info(iterator%iter_jk, &
     583             :                              ikind=kinds_2(1), jkind=kinds_2(2), &
     584             :                              iatom=atoms_2(1), jatom=atoms_2(2), r=r_2, &
     585     2949913 :                              cell=cell_2)
     586             : 
     587     2949913 :       IF (PRESENT(ikind)) ikind = kinds_1(1)
     588     2949913 :       IF (PRESENT(jkind)) jkind = kinds_1(2)
     589     2949913 :       IF (PRESENT(kkind)) kkind = kinds_2(2)
     590     2949913 :       IF (PRESENT(iatom)) iatom = atoms_1(1)
     591     2949913 :       IF (PRESENT(jatom)) jatom = atoms_1(2)
     592     2949913 :       IF (PRESENT(katom)) katom = atoms_2(2)
     593             : 
     594     2949913 :       IF (PRESENT(rij)) rij = r_1
     595     2949913 :       IF (PRESENT(rjk)) rjk = r_2
     596    14749565 :       IF (PRESENT(rik)) rik = r_1 + r_2
     597             : 
     598     2949913 :       IF (PRESENT(cell_j)) cell_j = cell_1
     599    14070365 :       IF (PRESENT(cell_k)) cell_k = cell_1 + cell_2
     600             : 
     601     2949913 :    END SUBROUTINE
     602             : 
     603             : ! **************************************************************************************************
     604             : !> \brief Allocate blocks of a 3-center tensor based on neighborlist
     605             : !> \param t3c empty DBCSR tensor
     606             : !>            Should be of shape (1,1) if no kpoints are used and of shape (nimages, nimages)
     607             : !>            if k-points are used
     608             : !> \param nl_3c 3-center neighborlist
     609             : !> \param basis_i ...
     610             : !> \param basis_j ...
     611             : !> \param basis_k ...
     612             : !> \param qs_env ...
     613             : !> \param potential_parameter ...
     614             : !> \param op_pos ...
     615             : !> \param do_kpoints ...
     616             : !> \param do_hfx_kpoints ...
     617             : !> \param bounds_i ...
     618             : !> \param bounds_j ...
     619             : !> \param bounds_k ...
     620             : !> \param RI_range ...
     621             : !> \param img_to_RI_cell ...
     622             : !> \param cell_to_index ...
     623             : !> \param cell_sym ...
     624             : ! **************************************************************************************************
     625        2420 :    SUBROUTINE alloc_block_3c(t3c, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, op_pos, &
     626             :                              do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, RI_range, &
     627        2420 :                              img_to_RI_cell, cell_to_index, cell_sym)
     628             :       TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT)     :: t3c
     629             :       TYPE(neighbor_list_3c_type), INTENT(INOUT)         :: nl_3c
     630             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j, basis_k
     631             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     632             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
     633             :       INTEGER, INTENT(IN), OPTIONAL                      :: op_pos
     634             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints, do_hfx_kpoints
     635             :       INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL        :: bounds_i, bounds_j, bounds_k
     636             :       REAL(dp), INTENT(IN), OPTIONAL                     :: RI_range
     637             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: img_to_RI_cell
     638             :       INTEGER, DIMENSION(:, :, :), OPTIONAL, POINTER     :: cell_to_index
     639             :       LOGICAL, INTENT(IN), OPTIONAL                      :: cell_sym
     640             : 
     641             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'alloc_block_3c'
     642             : 
     643             :       INTEGER                                            :: handle, iatom, ikind, j_img, jatom, &
     644             :                                                             jcell, jkind, k_img, katom, kcell, &
     645             :                                                             kkind, natom, ncell_RI, nimg, op_ij, &
     646             :                                                             op_jk, op_pos_prv
     647             :       INTEGER(int_8)                                     :: a, b, nblk_per_thread
     648        2420 :       INTEGER(int_8), ALLOCATABLE, DIMENSION(:, :)       :: nblk
     649        2420 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: img_to_RI_cell_prv
     650             :       INTEGER, DIMENSION(3)                              :: blk_idx, cell_j, cell_k, &
     651             :                                                             kp_index_lbounds, kp_index_ubounds
     652             :       LOGICAL                                            :: cell_sym_prv, do_hfx_kpoints_prv, &
     653             :                                                             do_kpoints_prv
     654             :       REAL(KIND=dp)                                      :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
     655             :                                                             kind_radius_i, kind_radius_j, &
     656             :                                                             kind_radius_k
     657             :       REAL(KIND=dp), DIMENSION(3)                        :: rij, rik, rjk
     658        2420 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     659             :       TYPE(cell_type), POINTER                           :: cell
     660             :       TYPE(dft_control_type), POINTER                    :: dft_control
     661             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     662             :       TYPE(neighbor_list_3c_iterator_type)               :: nl_3c_iter
     663             :       TYPE(one_dim_int_array), ALLOCATABLE, &
     664        2420 :          DIMENSION(:, :)                                 :: alloc_i, alloc_j, alloc_k
     665        2420 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     666             : 
     667        2420 :       CALL timeset(routineN, handle)
     668        2420 :       NULLIFY (qs_kind_set, atomic_kind_set, cell)
     669             : 
     670        2420 :       IF (PRESENT(do_kpoints)) THEN
     671         442 :          do_kpoints_prv = do_kpoints
     672             :       ELSE
     673             :          do_kpoints_prv = .FALSE.
     674             :       END IF
     675        2420 :       IF (PRESENT(do_hfx_kpoints)) THEN
     676         196 :          do_hfx_kpoints_prv = do_hfx_kpoints
     677             :       ELSE
     678             :          do_hfx_kpoints_prv = .FALSE.
     679             :       END IF
     680         196 :       IF (do_hfx_kpoints_prv) THEN
     681         196 :          CPASSERT(do_kpoints_prv)
     682         196 :          CPASSERT(PRESENT(RI_range))
     683         196 :          CPASSERT(PRESENT(img_to_RI_cell))
     684             :       END IF
     685             : 
     686        2224 :       IF (PRESENT(img_to_RI_cell)) THEN
     687         588 :          ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
     688        9564 :          img_to_RI_cell_prv(:) = img_to_RI_cell
     689             :       END IF
     690             : 
     691        2420 :       IF (PRESENT(cell_sym)) THEN
     692        1858 :          cell_sym_prv = cell_sym
     693             :       ELSE
     694             :          cell_sym_prv = .FALSE.
     695             :       END IF
     696             : 
     697        2420 :       dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
     698             : 
     699        2420 :       op_ij = do_potential_id; op_jk = do_potential_id
     700             : 
     701        2420 :       IF (PRESENT(op_pos)) THEN
     702        2420 :          op_pos_prv = op_pos
     703             :       ELSE
     704             :          op_pos_prv = 1
     705             :       END IF
     706             : 
     707        2224 :       SELECT CASE (op_pos_prv)
     708             :       CASE (1)
     709        2224 :          op_ij = potential_parameter%potential_type
     710             :       CASE (2)
     711        2420 :          op_jk = potential_parameter%potential_type
     712             :       END SELECT
     713             : 
     714        2420 :       IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
     715        1398 :          dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
     716        1398 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
     717        1022 :       ELSEIF (op_ij == do_potential_coulomb) THEN
     718         436 :          dr_ij = 1000000.0_dp
     719         436 :          dr_ik = 1000000.0_dp
     720             :       END IF
     721             : 
     722        2420 :       IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
     723          20 :          dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
     724          20 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
     725        2400 :       ELSEIF (op_jk == do_potential_coulomb) THEN
     726           0 :          dr_jk = 1000000.0_dp
     727           0 :          dr_ik = 1000000.0_dp
     728             :       END IF
     729             : 
     730             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, natom=natom, &
     731        2420 :                       dft_control=dft_control, para_env=para_env, cell=cell)
     732             : 
     733        2420 :       IF (do_kpoints_prv) THEN
     734         208 :          CPASSERT(PRESENT(cell_to_index))
     735         208 :          CPASSERT(ASSOCIATED(cell_to_index))
     736             : !         nimg = dft_control%nimages
     737       30768 :          nimg = MAXVAL(cell_to_index)
     738         208 :          ncell_RI = nimg
     739         208 :          IF (do_hfx_kpoints_prv) THEN
     740         196 :             nimg = SIZE(t3c, 1)
     741         196 :             ncell_RI = SIZE(t3c, 2)
     742             :          END IF
     743             :       ELSE
     744             :          nimg = 1
     745             :          ncell_RI = 1
     746             :       END IF
     747             : 
     748        2420 :       IF (do_kpoints_prv) THEN
     749         832 :          kp_index_lbounds = LBOUND(cell_to_index)
     750         832 :          kp_index_ubounds = UBOUND(cell_to_index)
     751             :       END IF
     752             : 
     753             :       !Do a first loop over the nl and count the blocks present
     754        9680 :       ALLOCATE (nblk(nimg, ncell_RI))
     755       12820 :       nblk(:, :) = 0
     756             : 
     757        2420 :       CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
     758             : 
     759        2420 :       CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bounds_k)
     760             : 
     761      909645 :       DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
     762             :          CALL get_3c_iterator_info(nl_3c_iter, iatom=iatom, ikind=ikind, jkind=jkind, kkind=kkind, &
     763      907225 :                                    rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
     764             : 
     765     3628900 :          djk = NORM2(rjk)
     766     3628900 :          dij = NORM2(rij)
     767     3628900 :          dik = NORM2(rik)
     768             : 
     769      907225 :          IF (do_kpoints_prv) THEN
     770             : 
     771      619038 :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
     772             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
     773             : 
     774       88434 :             jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
     775       88434 :             IF (jcell > nimg .OR. jcell < 1) CYCLE
     776             : 
     777      529972 :             IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
     778             :                 ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
     779             : 
     780       71837 :             kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
     781       71837 :             IF (kcell > nimg .OR. kcell < 1) CYCLE
     782       51759 :             IF (do_hfx_kpoints_prv) THEN
     783       49497 :                IF (dik > RI_range) CYCLE
     784             :                kcell = 1
     785             :             END IF
     786             :          ELSE
     787             :             jcell = 1; kcell = 1
     788             :          END IF
     789             : 
     790      829279 :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=kind_radius_i)
     791      829279 :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, kind_radius=kind_radius_j)
     792      829279 :          CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, kind_radius=kind_radius_k)
     793             : 
     794      829279 :          IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
     795      829279 :          IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
     796      829279 :          IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
     797             : 
     798      901529 :          nblk(jcell, kcell) = nblk(jcell, kcell) + 1
     799             :       END DO
     800        2420 :       CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
     801             : 
     802             :       !Do a second loop over the nl to give block indices
     803       20080 :       ALLOCATE (alloc_i(nimg, ncell_RI))
     804       17660 :       ALLOCATE (alloc_j(nimg, ncell_RI))
     805       17660 :       ALLOCATE (alloc_k(nimg, ncell_RI))
     806        4902 :       DO k_img = 1, ncell_RI
     807       12820 :          DO j_img = 1, nimg
     808       20864 :             ALLOCATE (alloc_i(j_img, k_img)%array(nblk(j_img, k_img)))
     809       20864 :             ALLOCATE (alloc_j(j_img, k_img)%array(nblk(j_img, k_img)))
     810       23346 :             ALLOCATE (alloc_k(j_img, k_img)%array(nblk(j_img, k_img)))
     811             :          END DO
     812             :       END DO
     813       12820 :       nblk(:, :) = 0
     814             : 
     815        2420 :       CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
     816             : 
     817        2420 :       CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bounds_k)
     818             : 
     819      909645 :       DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
     820             :          CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
     821             :                                    iatom=iatom, jatom=jatom, katom=katom, &
     822      907225 :                                    rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
     823             : 
     824     3628900 :          djk = NORM2(rjk)
     825     3628900 :          dij = NORM2(rij)
     826     3628900 :          dik = NORM2(rik)
     827             : 
     828      907225 :          IF (do_kpoints_prv) THEN
     829             : 
     830      619038 :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
     831             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
     832             : 
     833       88434 :             jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
     834       88434 :             IF (jcell > nimg .OR. jcell < 1) CYCLE
     835             : 
     836      529972 :             IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
     837             :                 ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
     838             : 
     839       71837 :             kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
     840       71837 :             IF (kcell > nimg .OR. kcell < 1) CYCLE
     841       51759 :             IF (do_hfx_kpoints_prv) THEN
     842       49497 :                IF (dik > RI_range) CYCLE
     843        8226 :                kcell = img_to_RI_cell_prv(kcell)
     844             :             END IF
     845             :          ELSE
     846             :             jcell = 1; kcell = 1
     847             :          END IF
     848             : 
     849     3317116 :          blk_idx = [iatom, jatom, katom]
     850      829279 :          IF (do_hfx_kpoints_prv) THEN
     851        8226 :             blk_idx(3) = (kcell - 1)*natom + katom
     852        8226 :             kcell = 1
     853             :          END IF
     854             : 
     855      829279 :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=kind_radius_i)
     856      829279 :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, kind_radius=kind_radius_j)
     857      829279 :          CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, kind_radius=kind_radius_k)
     858             : 
     859      829279 :          IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
     860      829279 :          IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
     861      829279 :          IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
     862             : 
     863      415244 :          nblk(jcell, kcell) = nblk(jcell, kcell) + 1
     864             : 
     865             :          !Note: there may be repeated indices due to periodic images => dbt_reserve_blocks takes care of it
     866      415244 :          alloc_i(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(1)
     867      415244 :          alloc_j(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(2)
     868      901529 :          alloc_k(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(3)
     869             : 
     870             :       END DO
     871        2420 :       CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
     872             : 
     873             : !TODO: Parallelize creation of block list.
     874             : !$OMP PARALLEL DEFAULT(NONE) SHARED(t3c,nimg,nblk,alloc_i,alloc_j,alloc_k,ncell_RI,cell_sym_prv) &
     875        2420 : !$OMP PRIVATE(k_img,j_img,nblk_per_thread,A,b)
     876             :       DO k_img = 1, ncell_RI
     877             :          DO j_img = 1, nimg
     878             :             IF (cell_sym_prv .AND. j_img < k_img) CYCLE
     879             :             IF (ALLOCATED(alloc_i(j_img, k_img)%array)) THEN
     880             :                nblk_per_thread = nblk(j_img, k_img)/omp_get_num_threads() + 1
     881             :                a = omp_get_thread_num()*nblk_per_thread + 1
     882             :                b = MIN(a + nblk_per_thread, nblk(j_img, k_img))
     883             :                CALL dbt_reserve_blocks(t3c(j_img, k_img), &
     884             :                                        alloc_i(j_img, k_img)%array(a:b), &
     885             :                                        alloc_j(j_img, k_img)%array(a:b), &
     886             :                                        alloc_k(j_img, k_img)%array(a:b))
     887             :             END IF
     888             :          END DO
     889             :       END DO
     890             : !$OMP END PARALLEL
     891             : 
     892        2420 :       CALL timestop(handle)
     893             : 
     894       47954 :    END SUBROUTINE
     895             : 
     896             : ! **************************************************************************************************
     897             : !> \brief Build 3-center derivative tensors
     898             : !> \param t3c_der_i empty DBCSR tensor which will contain the 1st center derivatives
     899             : !> \param t3c_der_k empty DBCSR tensor which will contain the 3rd center derivatives
     900             : !> \param filter_eps Filter threshold for tensor blocks
     901             : !> \param qs_env ...
     902             : !> \param nl_3c 3-center neighborlist
     903             : !> \param basis_i ...
     904             : !> \param basis_j ...
     905             : !> \param basis_k ...
     906             : !> \param potential_parameter ...
     907             : !> \param der_eps neglect integrals smaller than der_eps
     908             : !> \param op_pos operator position.
     909             : !>        1: calculate (i|jk) integrals,
     910             : !>        2: calculate (ij|k) integrals
     911             : !> \param do_kpoints ...
     912             : !> this routine requires that libint has been static initialised somewhere else
     913             : !> \param do_hfx_kpoints ...
     914             : !> \param bounds_i ...
     915             : !> \param bounds_j ...
     916             : !> \param bounds_k ...
     917             : !> \param RI_range ...
     918             : !> \param img_to_RI_cell ...
     919             : ! **************************************************************************************************
     920         562 :    SUBROUTINE build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, &
     921         562 :                                    nl_3c, basis_i, basis_j, basis_k, &
     922             :                                    potential_parameter, der_eps, &
     923             :                                    op_pos, do_kpoints, do_hfx_kpoints, &
     924             :                                    bounds_i, bounds_j, bounds_k, &
     925         562 :                                    RI_range, img_to_RI_cell)
     926             : 
     927             :       TYPE(dbt_type), DIMENSION(:, :, :), INTENT(INOUT)  :: t3c_der_i, t3c_der_k
     928             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
     929             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     930             :       TYPE(neighbor_list_3c_type), INTENT(INOUT)         :: nl_3c
     931             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j, basis_k
     932             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
     933             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: der_eps
     934             :       INTEGER, INTENT(IN), OPTIONAL                      :: op_pos
     935             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints, do_hfx_kpoints
     936             :       INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL        :: bounds_i, bounds_j, bounds_k
     937             :       REAL(dp), INTENT(IN), OPTIONAL                     :: RI_range
     938             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: img_to_RI_cell
     939             : 
     940             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'build_3c_derivatives'
     941             : 
     942             :       INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
     943             :          block_start_k, egfi, handle, handle2, i, i_img, i_xyz, iatom, ibasis, ikind, ilist, imax, &
     944             :          iset, j_img, jatom, jcell, jkind, jset, katom, kcell, kkind, kset, m_max, max_ncoj, &
     945             :          max_nset, max_nsgfi, maxli, maxlj, maxlk, mepos, natom, nbasis, ncell_RI, ncoi, ncoj, &
     946             :          ncok, nimg, nseti, nsetj, nsetk, nthread, op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, &
     947             :          unit_id
     948         562 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: img_to_RI_cell_prv
     949             :       INTEGER, DIMENSION(2)                              :: bo
     950             :       INTEGER, DIMENSION(3)                              :: blk_idx, blk_size, cell_j, cell_k, &
     951             :                                                             kp_index_lbounds, kp_index_ubounds, sp
     952         562 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
     953        1124 :                                                             lmin_k, npgfi, npgfj, npgfk, nsgfi, &
     954         562 :                                                             nsgfj, nsgfk
     955         562 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j, first_sgf_k
     956         562 :       INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
     957             :       LOGICAL                                            :: do_hfx_kpoints_prv, do_kpoints_prv, &
     958             :                                                             found, skip
     959             :       LOGICAL, DIMENSION(3)                              :: block_j_not_zero, block_k_not_zero, &
     960             :                                                             der_j_zero, der_k_zero
     961             :       REAL(dp), DIMENSION(3)                             :: der_ext_i, der_ext_j, der_ext_k
     962             :       REAL(KIND=dp)                                      :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
     963             :                                                             kind_radius_i, kind_radius_j, &
     964             :                                                             kind_radius_k, prefac
     965         562 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ccp_buffer, cpp_buffer, &
     966         562 :                                                             max_contraction_i, max_contraction_j, &
     967         562 :                                                             max_contraction_k
     968         562 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: dijk_contr, dummy_block_t
     969         562 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: block_t_i, block_t_j, block_t_k, dijk_j, &
     970        1124 :                                                             dijk_k, tmp_ijk_i, tmp_ijk_j
     971             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rik, rj, rjk, rk
     972         562 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j, set_radius_k
     973         562 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
     974        1124 :                                                             sphi_k, zeti, zetj, zetk
     975         562 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     976        1124 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spi, spk, tspj
     977             :       TYPE(cp_libint_t)                                  :: lib
     978        3934 :       TYPE(dbt_type)                                     :: t3c_tmp
     979         562 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t3c_template
     980         562 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :)    :: t3c_der_j
     981             :       TYPE(dft_control_type), POINTER                    :: dft_control
     982             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set
     983             :       TYPE(kpoint_type), POINTER                         :: kpoints
     984             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     985             :       TYPE(neighbor_list_3c_iterator_type)               :: nl_3c_iter
     986         562 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     987             : 
     988         562 :       CALL timeset(routineN, handle)
     989             : 
     990         562 :       IF (PRESENT(do_kpoints)) THEN
     991          74 :          do_kpoints_prv = do_kpoints
     992             :       ELSE
     993             :          do_kpoints_prv = .FALSE.
     994             :       END IF
     995             : 
     996         562 :       IF (PRESENT(do_hfx_kpoints)) THEN
     997          74 :          do_hfx_kpoints_prv = do_hfx_kpoints
     998             :       ELSE
     999             :          do_hfx_kpoints_prv = .FALSE.
    1000             :       END IF
    1001          74 :       IF (do_hfx_kpoints_prv) THEN
    1002          74 :          CPASSERT(do_kpoints_prv)
    1003          74 :          CPASSERT(PRESENT(RI_range))
    1004          74 :          CPASSERT(PRESENT(img_to_RI_cell))
    1005             :       END IF
    1006             : 
    1007         488 :       IF (PRESENT(img_to_RI_cell)) THEN
    1008         222 :          ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
    1009        3586 :          img_to_RI_cell_prv(:) = img_to_RI_cell
    1010             :       END IF
    1011             : 
    1012         562 :       op_ij = do_potential_id; op_jk = do_potential_id
    1013             : 
    1014         562 :       IF (PRESENT(op_pos)) THEN
    1015         562 :          op_pos_prv = op_pos
    1016             :       ELSE
    1017           0 :          op_pos_prv = 1
    1018             :       END IF
    1019             : 
    1020         488 :       SELECT CASE (op_pos_prv)
    1021             :       CASE (1)
    1022         488 :          op_ij = potential_parameter%potential_type
    1023             :       CASE (2)
    1024         562 :          op_jk = potential_parameter%potential_type
    1025             :       END SELECT
    1026             : 
    1027         562 :       dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
    1028             : 
    1029         562 :       IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
    1030          50 :          dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
    1031          50 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    1032         512 :       ELSEIF (op_ij == do_potential_coulomb) THEN
    1033         244 :          dr_ij = 1000000.0_dp
    1034         244 :          dr_ik = 1000000.0_dp
    1035             :       END IF
    1036             : 
    1037         562 :       IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
    1038           8 :          dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
    1039           8 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    1040         554 :       ELSEIF (op_jk == do_potential_coulomb) THEN
    1041           0 :          dr_jk = 1000000.0_dp
    1042           0 :          dr_ik = 1000000.0_dp
    1043             :       END IF
    1044             : 
    1045         562 :       NULLIFY (qs_kind_set, atomic_kind_set)
    1046             : 
    1047             :       ! get stuff
    1048             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
    1049         562 :                       natom=natom, kpoints=kpoints, dft_control=dft_control, para_env=para_env)
    1050             : 
    1051         562 :       IF (do_kpoints_prv) THEN
    1052          74 :          nimg = dft_control%nimages
    1053          74 :          ncell_RI = nimg
    1054          74 :          IF (do_hfx_kpoints_prv) THEN
    1055          74 :             nimg = SIZE(t3c_der_k, 1)
    1056          74 :             ncell_RI = SIZE(t3c_der_k, 2)
    1057             :          END IF
    1058          74 :          CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
    1059             :       ELSE
    1060             :          nimg = 1
    1061             :          ncell_RI = 1
    1062             :       END IF
    1063             : 
    1064         562 :       IF (do_hfx_kpoints_prv) THEN
    1065          74 :          CPASSERT(op_pos_prv == 2)
    1066             :       ELSE
    1067        1952 :          CPASSERT(ALL(SHAPE(t3c_der_k) == [nimg, ncell_RI, 3]))
    1068             :       END IF
    1069             : 
    1070        8590 :       ALLOCATE (t3c_template(nimg, ncell_RI))
    1071        1124 :       DO j_img = 1, ncell_RI
    1072        3532 :          DO i_img = 1, nimg
    1073        2970 :             CALL dbt_create(t3c_der_i(i_img, j_img, 1), t3c_template(i_img, j_img))
    1074             :          END DO
    1075             :       END DO
    1076             : 
    1077             :       CALL alloc_block_3c(t3c_template, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, &
    1078             :                           op_pos=op_pos_prv, do_kpoints=do_kpoints, do_hfx_kpoints=do_hfx_kpoints, &
    1079             :                           bounds_i=bounds_i, bounds_j=bounds_j, bounds_k=bounds_k, &
    1080        1050 :                           RI_range=RI_range, img_to_RI_cell=img_to_RI_cell, cell_to_index=cell_to_index)
    1081        2248 :       DO i_xyz = 1, 3
    1082        3934 :          DO j_img = 1, ncell_RI
    1083       10596 :             DO i_img = 1, nimg
    1084        7224 :                CALL dbt_copy(t3c_template(i_img, j_img), t3c_der_i(i_img, j_img, i_xyz))
    1085        8910 :                CALL dbt_copy(t3c_template(i_img, j_img), t3c_der_k(i_img, j_img, i_xyz))
    1086             :             END DO
    1087             :          END DO
    1088             :       END DO
    1089             : 
    1090        1124 :       DO j_img = 1, ncell_RI
    1091        3532 :          DO i_img = 1, nimg
    1092        2970 :             CALL dbt_destroy(t3c_template(i_img, j_img))
    1093             :          END DO
    1094             :       END DO
    1095        2970 :       DEALLOCATE (t3c_template)
    1096             : 
    1097         562 :       IF (nl_3c%sym == symmetric_jk) THEN
    1098        9760 :          ALLOCATE (t3c_der_j(nimg, ncell_RI, 3))
    1099        1952 :          DO i_xyz = 1, 3
    1100        3416 :             DO j_img = 1, ncell_RI
    1101        4392 :                DO i_img = 1, nimg
    1102        1464 :                   CALL dbt_create(t3c_der_k(i_img, j_img, i_xyz), t3c_der_j(i_img, j_img, i_xyz))
    1103        2928 :                   CALL dbt_copy(t3c_der_k(i_img, j_img, i_xyz), t3c_der_j(i_img, j_img, i_xyz))
    1104             :                END DO
    1105             :             END DO
    1106             :          END DO
    1107             :       END IF
    1108             : 
    1109             :       !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
    1110         562 :       nbasis = SIZE(basis_i)
    1111         562 :       max_nsgfi = 0
    1112         562 :       max_nset = 0
    1113         562 :       maxli = 0
    1114        1614 :       DO ibasis = 1, nbasis
    1115             :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
    1116        1052 :                                 nset=iset, nsgf_set=nsgfi, npgf=npgfi)
    1117        1052 :          maxli = MAX(maxli, imax)
    1118        1052 :          max_nset = MAX(max_nset, iset)
    1119        5740 :          max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
    1120             :       END DO
    1121             :       max_ncoj = 0
    1122             :       maxlj = 0
    1123        1614 :       DO ibasis = 1, nbasis
    1124             :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
    1125        1052 :                                 nset=jset, nsgf_set=nsgfj, npgf=npgfj)
    1126        1052 :          maxlj = MAX(maxlj, imax)
    1127        1052 :          max_nset = MAX(max_nset, jset)
    1128        4432 :          max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
    1129             :       END DO
    1130             :       maxlk = 0
    1131        1614 :       DO ibasis = 1, nbasis
    1132             :          CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
    1133        1052 :                                 nset=kset, nsgf_set=nsgfk, npgf=npgfk)
    1134        1052 :          maxlk = MAX(maxlk, imax)
    1135        1614 :          max_nset = MAX(max_nset, kset)
    1136             :       END DO
    1137         562 :       m_max = maxli + maxlj + maxlk + 1
    1138             : 
    1139             :       !To minimize expensive memory ops and generally optimize contraction, pre-allocate
    1140             :       !contiguous sphi arrays (and transposed in the case of sphi_i)
    1141             : 
    1142         562 :       NULLIFY (tspj, spi, spk)
    1143       26396 :       ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
    1144             : 
    1145        1614 :       DO ibasis = 1, nbasis
    1146        7862 :          DO iset = 1, max_nset
    1147        6248 :             NULLIFY (spi(iset, ibasis)%array)
    1148        6248 :             NULLIFY (tspj(iset, ibasis)%array)
    1149        7300 :             NULLIFY (spk(iset, ibasis)%array)
    1150             :          END DO
    1151             :       END DO
    1152             : 
    1153        2248 :       DO ilist = 1, 3
    1154        5404 :          DO ibasis = 1, nbasis
    1155        3156 :             IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
    1156        3156 :             IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
    1157        3156 :             IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
    1158             : 
    1159       14404 :             DO iset = 1, basis_set%nset
    1160             : 
    1161        9562 :                ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
    1162        9562 :                sgfi = basis_set%first_sgf(1, iset)
    1163        9562 :                egfi = sgfi + basis_set%nsgf_set(iset) - 1
    1164             : 
    1165       12718 :                IF (ilist == 1) THEN
    1166       16504 :                   ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    1167     3073846 :                   spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    1168             : 
    1169        5436 :                ELSE IF (ilist == 2) THEN
    1170       11272 :                   ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
    1171      176530 :                   tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
    1172             : 
    1173             :                ELSE
    1174       10472 :                   ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    1175      833426 :                   spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    1176             :                END IF
    1177             : 
    1178             :             END DO !iset
    1179             :          END DO !ibasis
    1180             :       END DO !ilist
    1181             : 
    1182             :       !Init the truncated Coulomb operator
    1183         562 :       IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
    1184             : 
    1185          58 :          IF (m_max > get_lmax_init()) THEN
    1186           8 :             IF (para_env%mepos == 0) THEN
    1187           4 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    1188             :             END IF
    1189           8 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    1190           8 :             IF (para_env%mepos == 0) THEN
    1191           4 :                CALL close_file(unit_id)
    1192             :             END IF
    1193             :          END IF
    1194             :       END IF
    1195             : 
    1196         562 :       CALL init_md_ftable(nmax=m_max)
    1197             : 
    1198         562 :       IF (do_kpoints_prv) THEN
    1199         296 :          kp_index_lbounds = LBOUND(cell_to_index)
    1200         296 :          kp_index_ubounds = UBOUND(cell_to_index)
    1201             :       END IF
    1202             : 
    1203         562 :       nthread = 1
    1204         562 : !$    nthread = omp_get_max_threads()
    1205             : 
    1206             : !$OMP PARALLEL DEFAULT(NONE) &
    1207             : !$OMP SHARED (nthread,do_kpoints_prv,kp_index_lbounds,kp_index_ubounds,maxli,maxlk,maxlj,bounds_i,&
    1208             : !$OMP         bounds_j,bounds_k,nimg,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,ncoset,op_pos_prv,&
    1209             : !$OMP         potential_parameter,der_eps,tspj,spi,spk,cell_to_index,RI_range,natom,nl_3c,&
    1210             : !$OMP         t3c_der_i,t3c_der_k,t3c_der_j,ncell_RI,img_to_RI_cell_prv,do_hfx_kpoints_prv) &
    1211             : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,cell_j,cell_k,&
    1212             : !$OMP          prefac,jcell,kcell,first_sgf_i,lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,&
    1213             : !$OMP          sphi_i,zeti,kind_radius_i,first_sgf_j,lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,&
    1214             : !$OMP          set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,lmax_k,lmin_k,npgfk,nsetk,nsgfk,&
    1215             : !$OMP          rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,ncoi,ncoj,ncok,sgfi,sgfj,&
    1216             : !$OMP          sgfk,dijk_j,dijk_k,ri,rj,rk,max_contraction_i,max_contraction_j,blk_idx,&
    1217             : !$OMP          max_contraction_k,iset,jset,kset,block_t_i,blk_size,dijk_contr,cpp_buffer,ccp_buffer,&
    1218             : !$OMP          block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,found,&
    1219             : !$OMP          dummy_block_t,sp,handle2,mepos,bo,block_t_k,der_ext_i,der_ext_j,der_ext_k,tmp_ijk_i,&
    1220         562 : !$OMP          block_k_not_zero,der_k_zero,skip,der_j_zero,block_t_j,block_j_not_zero,tmp_ijk_j,i)
    1221             : 
    1222             :       mepos = 0
    1223             : !$    mepos = omp_get_thread_num()
    1224             : 
    1225             :       CALL cp_libint_init_3eri1(lib, MAX(maxli, maxlj, maxlk))
    1226             :       CALL cp_libint_set_contrdepth(lib, 1)
    1227             :       CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
    1228             : 
    1229             :       !We split the provided bounds among the threads such that each threads works on a different set of atoms
    1230             :       IF (PRESENT(bounds_i)) THEN
    1231             :          bo = get_limit(bounds_i(2) - bounds_i(1) + 1, nthread, mepos)
    1232             :          bo(:) = bo(:) + bounds_i(1) - 1
    1233             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
    1234             :       ELSE IF (PRESENT(bounds_j)) THEN
    1235             :          bo = get_limit(bounds_j(2) - bounds_j(1) + 1, nthread, mepos)
    1236             :          bo(:) = bo(:) + bounds_j(1) - 1
    1237             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bo, bounds_k)
    1238             :       ELSE IF (PRESENT(bounds_k)) THEN
    1239             :          bo = get_limit(bounds_k(2) - bounds_k(1) + 1, nthread, mepos)
    1240             :          bo(:) = bo(:) + bounds_k(1) - 1
    1241             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bo)
    1242             :       ELSE
    1243             :          bo = get_limit(natom, nthread, mepos)
    1244             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
    1245             :       END IF
    1246             : 
    1247             :       skip = .FALSE.
    1248             :       IF (bo(1) > bo(2)) skip = .TRUE.
    1249             : 
    1250             :       DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
    1251             :          CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
    1252             :                                    iatom=iatom, jatom=jatom, katom=katom, &
    1253             :                                    rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
    1254             :          IF (skip) EXIT
    1255             : 
    1256             :          djk = NORM2(rjk)
    1257             :          dij = NORM2(rij)
    1258             :          dik = NORM2(rik)
    1259             : 
    1260             :          IF (do_kpoints_prv) THEN
    1261             :             prefac = 0.5_dp
    1262             :          ELSEIF (nl_3c%sym == symmetric_jk) THEN
    1263             :             IF (jatom == katom) THEN
    1264             :                prefac = 0.5_dp
    1265             :             ELSE
    1266             :                prefac = 1.0_dp
    1267             :             END IF
    1268             :          ELSE
    1269             :             prefac = 1.0_dp
    1270             :          END IF
    1271             :          IF (do_hfx_kpoints_prv) prefac = 1.0_dp
    1272             : 
    1273             :          IF (do_kpoints_prv) THEN
    1274             : 
    1275             :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
    1276             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
    1277             : 
    1278             :             jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
    1279             :             IF (jcell > nimg .OR. jcell < 1) CYCLE
    1280             : 
    1281             :             IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
    1282             :                 ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
    1283             : 
    1284             :             kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
    1285             :             IF (kcell > nimg .OR. kcell < 1) CYCLE
    1286             :             !In case of HFX k-points, we only consider P^k if d_ik <= RI_range
    1287             :             IF (do_hfx_kpoints_prv) THEN
    1288             :                IF (dik > RI_range) CYCLE
    1289             :                kcell = img_to_RI_cell_prv(kcell)
    1290             :             END IF
    1291             :          ELSE
    1292             :             jcell = 1; kcell = 1
    1293             :          END IF
    1294             : 
    1295             :          blk_idx = [iatom, jatom, katom]
    1296             :          IF (do_hfx_kpoints_prv) THEN
    1297             :             blk_idx(3) = (kcell - 1)*natom + katom
    1298             :             kcell = 1
    1299             :          END IF
    1300             : 
    1301             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    1302             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    1303             :                                 sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
    1304             : 
    1305             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    1306             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    1307             :                                 sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
    1308             : 
    1309             :          CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
    1310             :                                 npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
    1311             :                                 sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
    1312             : 
    1313             :          IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
    1314             :          IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
    1315             :          IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
    1316             : 
    1317             :          ALLOCATE (max_contraction_i(nseti))
    1318             :          max_contraction_i = 0.0_dp
    1319             :          DO iset = 1, nseti
    1320             :             sgfi = first_sgf_i(1, iset)
    1321             :             max_contraction_i(iset) = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
    1322             :          END DO
    1323             : 
    1324             :          ALLOCATE (max_contraction_j(nsetj))
    1325             :          max_contraction_j = 0.0_dp
    1326             :          DO jset = 1, nsetj
    1327             :             sgfj = first_sgf_j(1, jset)
    1328             :             max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
    1329             :          END DO
    1330             : 
    1331             :          ALLOCATE (max_contraction_k(nsetk))
    1332             :          max_contraction_k = 0.0_dp
    1333             :          DO kset = 1, nsetk
    1334             :             sgfk = first_sgf_k(1, kset)
    1335             :             max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
    1336             :          END DO
    1337             : 
    1338             :          CALL dbt_blk_sizes(t3c_der_i(jcell, kcell, 1), blk_idx, blk_size)
    1339             : 
    1340             :          ALLOCATE (block_t_i(blk_size(2), blk_size(3), blk_size(1), 3))
    1341             :          ALLOCATE (block_t_j(blk_size(2), blk_size(3), blk_size(1), 3))
    1342             :          ALLOCATE (block_t_k(blk_size(2), blk_size(3), blk_size(1), 3))
    1343             : 
    1344             :          block_t_i = 0.0_dp
    1345             :          block_t_j = 0.0_dp
    1346             :          block_t_k = 0.0_dp
    1347             :          block_j_not_zero = .FALSE.
    1348             :          block_k_not_zero = .FALSE.
    1349             : 
    1350             :          DO iset = 1, nseti
    1351             : 
    1352             :             DO jset = 1, nsetj
    1353             : 
    1354             :                IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
    1355             : 
    1356             :                DO kset = 1, nsetk
    1357             : 
    1358             :                   IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
    1359             :                   IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
    1360             : 
    1361             :                   ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    1362             :                   ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    1363             :                   ncok = npgfk(kset)*ncoset(lmax_k(kset))
    1364             : 
    1365             :                   sgfi = first_sgf_i(1, iset)
    1366             :                   sgfj = first_sgf_j(1, jset)
    1367             :                   sgfk = first_sgf_k(1, kset)
    1368             : 
    1369             :                   IF (ncoj*ncok*ncoi > 0) THEN
    1370             :                      ALLOCATE (dijk_j(ncoj, ncok, ncoi, 3))
    1371             :                      ALLOCATE (dijk_k(ncoj, ncok, ncoi, 3))
    1372             :                      dijk_j(:, :, :, :) = 0.0_dp
    1373             :                      dijk_k(:, :, :, :) = 0.0_dp
    1374             : 
    1375             :                      der_j_zero = .FALSE.
    1376             :                      der_k_zero = .FALSE.
    1377             : 
    1378             :                      !need positions for libint. Only relative positions are needed => set ri to 0.0
    1379             :                      ri = 0.0_dp
    1380             :                      rj = rij ! ri + rij
    1381             :                      rk = rik ! ri + rik
    1382             : 
    1383             :                      IF (op_pos_prv == 1) THEN
    1384             :                         CALL eri_3center_derivs(dijk_j, dijk_k, &
    1385             :                                                 lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
    1386             :                                                 lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
    1387             :                                                 lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
    1388             :                                                 djk, dij, dik, lib, potential_parameter, &
    1389             :                                                 der_abc_1_ext=der_ext_j, der_abc_2_ext=der_ext_k)
    1390             :                      ELSE
    1391             :                         ALLOCATE (tmp_ijk_i(ncoi, ncoj, ncok, 3))
    1392             :                         ALLOCATE (tmp_ijk_j(ncoi, ncoj, ncok, 3))
    1393             :                         tmp_ijk_i(:, :, :, :) = 0.0_dp
    1394             :                         tmp_ijk_j(:, :, :, :) = 0.0_dp
    1395             : 
    1396             :                         CALL eri_3center_derivs(tmp_ijk_i, tmp_ijk_j, &
    1397             :                                                 lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
    1398             :                                                 lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
    1399             :                                                 lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
    1400             :                                                 dij, dik, djk, lib, potential_parameter, &
    1401             :                                                 der_abc_1_ext=der_ext_i, der_abc_2_ext=der_ext_j)
    1402             : 
    1403             :                         !TODO: is that inefficient?
    1404             :                         der_ext_k = 0
    1405             :                         DO i_xyz = 1, 3
    1406             :                            DO i = 1, ncoi
    1407             :                               dijk_j(:, :, i, i_xyz) = tmp_ijk_j(i, :, :, i_xyz)
    1408             :                               dijk_k(:, :, i, i_xyz) = -(dijk_j(:, :, i, i_xyz) + tmp_ijk_i(i, :, :, i_xyz))
    1409             :                               der_ext_k(i_xyz) = MAX(der_ext_k(i_xyz), MAXVAL(ABS(dijk_k(:, :, i, i_xyz))))
    1410             :                            END DO
    1411             :                         END DO
    1412             :                         DEALLOCATE (tmp_ijk_i, tmp_ijk_j)
    1413             : 
    1414             :                      END IF
    1415             : 
    1416             :                      IF (PRESENT(der_eps)) THEN
    1417             :                         DO i_xyz = 1, 3
    1418             :                            IF (der_eps > der_ext_j(i_xyz)*(max_contraction_i(iset)* &
    1419             :                                                            max_contraction_j(jset)* &
    1420             :                                                            max_contraction_k(kset))) THEN
    1421             :                               der_j_zero(i_xyz) = .TRUE.
    1422             :                            END IF
    1423             :                         END DO
    1424             : 
    1425             :                         DO i_xyz = 1, 3
    1426             :                            IF (der_eps > der_ext_k(i_xyz)*(max_contraction_i(iset)* &
    1427             :                                                            max_contraction_j(jset)* &
    1428             :                                                            max_contraction_k(kset))) THEN
    1429             :                               der_k_zero(i_xyz) = .TRUE.
    1430             :                            END IF
    1431             :                         END DO
    1432             :                         IF (ALL(der_j_zero) .AND. ALL(der_k_zero)) THEN
    1433             :                            DEALLOCATE (dijk_j, dijk_k)
    1434             :                            CYCLE
    1435             :                         END IF
    1436             :                      END IF
    1437             : 
    1438             :                      ALLOCATE (dijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
    1439             : 
    1440             :                      block_start_j = sgfj
    1441             :                      block_end_j = sgfj + nsgfj(jset) - 1
    1442             :                      block_start_k = sgfk
    1443             :                      block_end_k = sgfk + nsgfk(kset) - 1
    1444             :                      block_start_i = sgfi
    1445             :                      block_end_i = sgfi + nsgfi(iset) - 1
    1446             : 
    1447             :                      DO i_xyz = 1, 3
    1448             :                         IF (der_j_zero(i_xyz)) CYCLE
    1449             : 
    1450             :                         block_j_not_zero(i_xyz) = .TRUE.
    1451             :                         CALL abc_contract_xsmm(dijk_contr, dijk_j(:, :, :, i_xyz), tspj(jset, jkind)%array, &
    1452             :                                                spk(kset, kkind)%array, spi(iset, ikind)%array, &
    1453             :                                                ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
    1454             :                                                nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
    1455             : 
    1456             :                         block_t_j(block_start_j:block_end_j, &
    1457             :                                   block_start_k:block_end_k, &
    1458             :                                   block_start_i:block_end_i, i_xyz) = &
    1459             :                            block_t_j(block_start_j:block_end_j, &
    1460             :                                      block_start_k:block_end_k, &
    1461             :                                      block_start_i:block_end_i, i_xyz) + &
    1462             :                            dijk_contr(:, :, :)
    1463             : 
    1464             :                      END DO
    1465             : 
    1466             :                      DO i_xyz = 1, 3
    1467             :                         IF (der_k_zero(i_xyz)) CYCLE
    1468             : 
    1469             :                         block_k_not_zero(i_xyz) = .TRUE.
    1470             :                         CALL abc_contract_xsmm(dijk_contr, dijk_k(:, :, :, i_xyz), tspj(jset, jkind)%array, &
    1471             :                                                spk(kset, kkind)%array, spi(iset, ikind)%array, &
    1472             :                                                ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
    1473             :                                                nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
    1474             : 
    1475             :                         block_t_k(block_start_j:block_end_j, &
    1476             :                                   block_start_k:block_end_k, &
    1477             :                                   block_start_i:block_end_i, i_xyz) = &
    1478             :                            block_t_k(block_start_j:block_end_j, &
    1479             :                                      block_start_k:block_end_k, &
    1480             :                                      block_start_i:block_end_i, i_xyz) + &
    1481             :                            dijk_contr(:, :, :)
    1482             : 
    1483             :                      END DO
    1484             : 
    1485             :                      DEALLOCATE (dijk_j, dijk_k, dijk_contr)
    1486             :                   END IF ! number of triples > 0
    1487             :                END DO
    1488             :             END DO
    1489             :          END DO
    1490             : 
    1491             :          CALL timeset(routineN//"_put_dbcsr", handle2)
    1492             : !$OMP CRITICAL
    1493             :          sp = SHAPE(block_t_i(:, :, :, 1))
    1494             :          sp([2, 3, 1]) = sp
    1495             : 
    1496             :          DO i_xyz = 1, 3
    1497             :             !Derivatives wrt to center i can be obtained by translational invariance
    1498             :             IF ((.NOT. block_j_not_zero(i_xyz)) .AND. (.NOT. block_k_not_zero(i_xyz))) CYCLE
    1499             :             block_t_i(:, :, :, i_xyz) = -(block_t_j(:, :, :, i_xyz) + block_t_k(:, :, :, i_xyz))
    1500             : 
    1501             :             CALL dbt_put_block(t3c_der_i(jcell, kcell, i_xyz), blk_idx, sp, &
    1502             :                                RESHAPE(block_t_i(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
    1503             :                                summation=.TRUE.)
    1504             :          END DO
    1505             : 
    1506             :          sp = SHAPE(block_t_k(:, :, :, 1))
    1507             :          sp([2, 3, 1]) = sp
    1508             : 
    1509             :          DO i_xyz = 1, 3
    1510             :             IF (.NOT. block_k_not_zero(i_xyz)) CYCLE
    1511             :             CALL dbt_put_block(t3c_der_k(jcell, kcell, i_xyz), blk_idx, sp, &
    1512             :                                RESHAPE(block_t_k(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
    1513             :                                summation=.TRUE.)
    1514             :          END DO
    1515             : !$OMP END CRITICAL
    1516             : 
    1517             :          IF (nl_3c%sym == symmetric_jk) THEN
    1518             :             sp = SHAPE(block_t_j(:, :, :, 1))
    1519             :             sp([2, 3, 1]) = sp
    1520             : !$OMP CRITICAL
    1521             :             DO i_xyz = 1, 3
    1522             :                IF (.NOT. block_j_not_zero(i_xyz)) CYCLE
    1523             :                CALL dbt_put_block(t3c_der_j(jcell, kcell, i_xyz), blk_idx, sp, &
    1524             :                                   RESHAPE(block_t_j(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
    1525             :                                   summation=.TRUE.)
    1526             :             END DO
    1527             : !$OMP END CRITICAL
    1528             :          END IF
    1529             : 
    1530             :          CALL timestop(handle2)
    1531             : 
    1532             :          DEALLOCATE (block_t_i, block_t_j, block_t_k)
    1533             :          DEALLOCATE (max_contraction_i, max_contraction_j, max_contraction_k)
    1534             :       END DO
    1535             : 
    1536             :       IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
    1537             :       IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
    1538             : 
    1539             :       CALL cp_libint_cleanup_3eri1(lib)
    1540             :       CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
    1541             : !$OMP END PARALLEL
    1542             : 
    1543         562 :       IF (do_kpoints_prv .AND. .NOT. do_hfx_kpoints_prv) THEN
    1544           0 :          DO i_xyz = 1, 3
    1545           0 :             DO kcell = 1, nimg
    1546           0 :                DO jcell = 1, nimg
    1547             :                   ! need half of filter eps because afterwards we add transposed tensor
    1548           0 :                   CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps/2)
    1549           0 :                   CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps/2)
    1550             :                END DO
    1551             :             END DO
    1552             :          END DO
    1553             : 
    1554         562 :       ELSEIF (nl_3c%sym == symmetric_jk) THEN
    1555             :          !Add the transpose of t3c_der_j to t3c_der_k to get the fully populated tensor
    1556         488 :          CALL dbt_create(t3c_der_k(1, 1, 1), t3c_tmp)
    1557        1952 :          DO i_xyz = 1, 3
    1558        3416 :             DO kcell = 1, nimg
    1559        4392 :                DO jcell = 1, nimg
    1560             :                   CALL dbt_copy(t3c_der_j(jcell, kcell, i_xyz), t3c_der_k(jcell, kcell, i_xyz), &
    1561        1464 :                                 order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
    1562        1464 :                   CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps)
    1563             : 
    1564        1464 :                   CALL dbt_copy(t3c_der_i(jcell, kcell, i_xyz), t3c_tmp)
    1565             :                   CALL dbt_copy(t3c_tmp, t3c_der_i(jcell, kcell, i_xyz), &
    1566        1464 :                                 order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
    1567        2928 :                   CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps)
    1568             :                END DO
    1569             :             END DO
    1570             :          END DO
    1571         488 :          CALL dbt_destroy(t3c_tmp)
    1572             : 
    1573          74 :       ELSEIF (nl_3c%sym == symmetric_none) THEN
    1574         296 :          DO i_xyz = 1, 3
    1575         518 :             DO kcell = 1, ncell_RI
    1576        6204 :                DO jcell = 1, nimg
    1577        5760 :                   CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps)
    1578        5982 :                   CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps)
    1579             :                END DO
    1580             :             END DO
    1581             :          END DO
    1582             :       ELSE
    1583           0 :          CPABORT("requested symmetric case not implemented")
    1584             :       END IF
    1585             : 
    1586         562 :       IF (nl_3c%sym == symmetric_jk) THEN
    1587        1952 :          DO i_xyz = 1, 3
    1588        3416 :             DO j_img = 1, nimg
    1589        4392 :                DO i_img = 1, nimg
    1590        2928 :                   CALL dbt_destroy(t3c_der_j(i_img, j_img, i_xyz))
    1591             :                END DO
    1592             :             END DO
    1593             :          END DO
    1594             :       END IF
    1595             : 
    1596        3846 :       DO iset = 1, max_nset
    1597       10094 :          DO ibasis = 1, nbasis
    1598        6248 :             IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
    1599        6248 :             IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
    1600        9532 :             IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
    1601             :          END DO
    1602             :       END DO
    1603             : 
    1604         562 :       DEALLOCATE (spi, tspj, spk)
    1605             : 
    1606         562 :       CALL timestop(handle)
    1607        6522 :    END SUBROUTINE build_3c_derivatives
    1608             : 
    1609             : ! **************************************************************************************************
    1610             : !> \brief Calculates the 3c virial contributions on the fly
    1611             : !> \param work_virial ...
    1612             : !> \param t3c_trace the tensor with which the trace should be taken
    1613             : !> \param pref ...
    1614             : !> \param qs_env ...
    1615             : !> \param nl_3c 3-center neighborlist, with a distribution matching that of t3c_trace
    1616             : !> \param basis_i ...
    1617             : !> \param basis_j ...
    1618             : !> \param basis_k ...
    1619             : !> \param potential_parameter ...
    1620             : !> \param der_eps neglect integrals smaller than der_eps
    1621             : !> \param op_pos operator position.
    1622             : !>        1: calculate (i|jk) integrals,
    1623             : !>        2: calculate (ij|k) integrals
    1624             : !> this routine requires that libint has been static initialised somewhere else
    1625             : ! **************************************************************************************************
    1626          16 :    SUBROUTINE calc_3c_virial(work_virial, t3c_trace, pref, qs_env, &
    1627          16 :                              nl_3c, basis_i, basis_j, basis_k, &
    1628             :                              potential_parameter, der_eps, op_pos)
    1629             : 
    1630             :       REAL(dp), DIMENSION(3, 3), INTENT(INOUT)           :: work_virial
    1631             :       TYPE(dbt_type), INTENT(INOUT)                      :: t3c_trace
    1632             :       REAL(KIND=dp), INTENT(IN)                          :: pref
    1633             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1634             :       TYPE(neighbor_list_3c_type), INTENT(INOUT)         :: nl_3c
    1635             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j, basis_k
    1636             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
    1637             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: der_eps
    1638             :       INTEGER, INTENT(IN), OPTIONAL                      :: op_pos
    1639             : 
    1640             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'calc_3c_virial'
    1641             : 
    1642             :       INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
    1643             :          block_start_k, egfi, handle, i, i_xyz, iatom, ibasis, ikind, ilist, imax, iset, j_xyz, &
    1644             :          jatom, jkind, jset, katom, kkind, kset, m_max, max_ncoj, max_nset, max_nsgfi, maxli, &
    1645             :          maxlj, maxlk, mepos, natom, nbasis, ncoi, ncoj, ncok, nseti, nsetj, nsetk, nthread, &
    1646             :          op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, unit_id
    1647             :       INTEGER, DIMENSION(2)                              :: bo
    1648             :       INTEGER, DIMENSION(3)                              :: blk_size, sp
    1649          16 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
    1650          32 :                                                             lmin_k, npgfi, npgfj, npgfk, nsgfi, &
    1651          16 :                                                             nsgfj, nsgfk
    1652          16 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j, first_sgf_k
    1653             :       LOGICAL                                            :: found, skip
    1654             :       LOGICAL, DIMENSION(3)                              :: block_j_not_zero, block_k_not_zero, &
    1655             :                                                             der_j_zero, der_k_zero
    1656             :       REAL(dp)                                           :: force
    1657             :       REAL(dp), DIMENSION(3)                             :: der_ext_i, der_ext_j, der_ext_k
    1658             :       REAL(KIND=dp)                                      :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
    1659             :                                                             kind_radius_i, kind_radius_j, &
    1660             :                                                             kind_radius_k
    1661          16 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ccp_buffer, cpp_buffer, &
    1662          16 :                                                             max_contraction_i, max_contraction_j, &
    1663          16 :                                                             max_contraction_k
    1664          32 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: ablock, dijk_contr, tmp_block
    1665          16 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: block_t_i, block_t_j, block_t_k, dijk_j, &
    1666          16 :                                                             dijk_k
    1667             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rik, rj, rjk, rk, scoord
    1668          16 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j, set_radius_k
    1669          16 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
    1670          16 :                                                             sphi_k, zeti, zetj, zetk
    1671          16 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    1672             :       TYPE(cell_type), POINTER                           :: cell
    1673          16 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spi, spk, tspj
    1674             :       TYPE(cp_libint_t)                                  :: lib
    1675             :       TYPE(dft_control_type), POINTER                    :: dft_control
    1676             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set
    1677             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1678             :       TYPE(neighbor_list_3c_iterator_type)               :: nl_3c_iter
    1679          16 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1680          16 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1681             : 
    1682          16 :       CALL timeset(routineN, handle)
    1683             : 
    1684          16 :       op_ij = do_potential_id; op_jk = do_potential_id
    1685             : 
    1686          16 :       IF (PRESENT(op_pos)) THEN
    1687          16 :          op_pos_prv = op_pos
    1688             :       ELSE
    1689             :          op_pos_prv = 1
    1690             :       END IF
    1691          16 :       CPASSERT(op_pos == 1)
    1692          16 :       CPASSERT(.NOT. nl_3c%sym == symmetric_jk)
    1693             : 
    1694          16 :       SELECT CASE (op_pos_prv)
    1695             :       CASE (1)
    1696          16 :          op_ij = potential_parameter%potential_type
    1697             :       CASE (2)
    1698          16 :          op_jk = potential_parameter%potential_type
    1699             :       END SELECT
    1700             : 
    1701          16 :       dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
    1702             : 
    1703          16 :       IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
    1704           6 :          dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
    1705           6 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    1706          10 :       ELSEIF (op_ij == do_potential_coulomb) THEN
    1707           0 :          dr_ij = 1000000.0_dp
    1708           0 :          dr_ik = 1000000.0_dp
    1709             :       END IF
    1710             : 
    1711          16 :       IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
    1712           0 :          dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
    1713           0 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    1714          16 :       ELSEIF (op_jk == do_potential_coulomb) THEN
    1715           0 :          dr_jk = 1000000.0_dp
    1716           0 :          dr_ik = 1000000.0_dp
    1717             :       END IF
    1718             : 
    1719          16 :       NULLIFY (qs_kind_set, atomic_kind_set)
    1720             : 
    1721             :       ! get stuff
    1722             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
    1723             :                       natom=natom, dft_control=dft_control, para_env=para_env, &
    1724          16 :                       particle_set=particle_set, cell=cell)
    1725             : 
    1726             :       !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
    1727          16 :       nbasis = SIZE(basis_i)
    1728          16 :       max_nsgfi = 0
    1729          16 :       max_nset = 0
    1730          16 :       maxli = 0
    1731          48 :       DO ibasis = 1, nbasis
    1732             :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
    1733          32 :                                 nset=iset, nsgf_set=nsgfi, npgf=npgfi)
    1734          32 :          maxli = MAX(maxli, imax)
    1735          32 :          max_nset = MAX(max_nset, iset)
    1736         182 :          max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
    1737             :       END DO
    1738             :       max_ncoj = 0
    1739             :       maxlj = 0
    1740          48 :       DO ibasis = 1, nbasis
    1741             :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
    1742          32 :                                 nset=jset, nsgf_set=nsgfj, npgf=npgfj)
    1743          32 :          maxlj = MAX(maxlj, imax)
    1744          32 :          max_nset = MAX(max_nset, jset)
    1745         138 :          max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
    1746             :       END DO
    1747             :       maxlk = 0
    1748          48 :       DO ibasis = 1, nbasis
    1749             :          CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
    1750          32 :                                 nset=kset, nsgf_set=nsgfk, npgf=npgfk)
    1751          32 :          maxlk = MAX(maxlk, imax)
    1752          48 :          max_nset = MAX(max_nset, kset)
    1753             :       END DO
    1754          16 :       m_max = maxli + maxlj + maxlk + 1
    1755             : 
    1756             :       !To minimize expensive memory opsand generally optimize contraction, pre-allocate
    1757             :       !contiguous sphi arrays (and transposed in the cas of sphi_i)
    1758             : 
    1759          16 :       NULLIFY (tspj, spi, spk)
    1760         920 :       ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
    1761             : 
    1762          48 :       DO ibasis = 1, nbasis
    1763         280 :          DO iset = 1, max_nset
    1764         232 :             NULLIFY (spi(iset, ibasis)%array)
    1765         232 :             NULLIFY (tspj(iset, ibasis)%array)
    1766             : 
    1767         264 :             NULLIFY (spk(iset, ibasis)%array)
    1768             :          END DO
    1769             :       END DO
    1770             : 
    1771          64 :       DO ilist = 1, 3
    1772         160 :          DO ibasis = 1, nbasis
    1773          96 :             IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
    1774          96 :             IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
    1775          96 :             IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
    1776             : 
    1777         458 :             DO iset = 1, basis_set%nset
    1778             : 
    1779         314 :                ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
    1780         314 :                sgfi = basis_set%first_sgf(1, iset)
    1781         314 :                egfi = sgfi + basis_set%nsgf_set(iset) - 1
    1782             : 
    1783         410 :                IF (ilist == 1) THEN
    1784         536 :                   ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    1785      177990 :                   spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    1786             : 
    1787         180 :                ELSE IF (ilist == 2) THEN
    1788         360 :                   ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
    1789        5194 :                   tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
    1790             : 
    1791             :                ELSE
    1792         360 :                   ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    1793        4442 :                   spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    1794             :                END IF
    1795             : 
    1796             :             END DO !iset
    1797             :          END DO !ibasis
    1798             :       END DO !ilist
    1799             : 
    1800             :       !Init the truncated Coulomb operator
    1801          16 :       IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
    1802             : 
    1803           6 :          IF (m_max > get_lmax_init()) THEN
    1804           0 :             IF (para_env%mepos == 0) THEN
    1805           0 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    1806             :             END IF
    1807           0 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    1808           0 :             IF (para_env%mepos == 0) THEN
    1809           0 :                CALL close_file(unit_id)
    1810             :             END IF
    1811             :          END IF
    1812             :       END IF
    1813             : 
    1814          16 :       CALL init_md_ftable(nmax=m_max)
    1815             : 
    1816          16 :       nthread = 1
    1817          16 : !$    nthread = omp_get_max_threads()
    1818             : 
    1819             : !$OMP PARALLEL DEFAULT(NONE) &
    1820             : !$OMP SHARED (nthread,maxli,maxlk,maxlj,i,work_virial,pref,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,&
    1821             : !$OMP         ncoset,potential_parameter,der_eps,tspj,spi,spk,natom,nl_3c,t3c_trace,cell,particle_set) &
    1822             : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,i_xyz,j_xyz,first_sgf_i,&
    1823             : !$OMP          lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,sphi_i,zeti,kind_radius_i,first_sgf_j,&
    1824             : !$OMP          lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,&
    1825             : !$OMP          lmax_k,lmin_k,npgfk,nsetk,nsgfk,rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,&
    1826             : !$OMP          ncoi,ncoj,ncok,sgfi,sgfj,sgfk,dijk_j,dijk_k,ri,rj,rk,max_contraction_i,max_contraction_j,&
    1827             : !$OMP          tmp_block,max_contraction_k,iset,jset,kset,block_t_i,blk_size,dijk_contr,found,sp,mepos,&
    1828             : !$OMP          block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,block_t_k,&
    1829             : !$OMP          bo,der_ext_i,der_ext_j,der_ext_k,ablock,force,scoord,skip,cpp_buffer,ccp_buffer,&
    1830          16 : !$OMP          block_k_not_zero,der_k_zero,der_j_zero,block_t_j,block_j_not_zero)
    1831             : 
    1832             :       mepos = 0
    1833             : !$    mepos = omp_get_thread_num()
    1834             : 
    1835             :       CALL cp_libint_init_3eri1(lib, MAX(maxli, maxlj, maxlk))
    1836             :       CALL cp_libint_set_contrdepth(lib, 1)
    1837             :       CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
    1838             : 
    1839             :       !We split the provided bounds among the threads such that each threads works on a different set of atoms
    1840             : 
    1841             :       bo = get_limit(natom, nthread, mepos)
    1842             :       CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i=bo)
    1843             : 
    1844             :       skip = .FALSE.
    1845             :       IF (bo(1) > bo(2)) skip = .TRUE.
    1846             : 
    1847             :       DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
    1848             :          CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
    1849             :                                    iatom=iatom, jatom=jatom, katom=katom, &
    1850             :                                    rij=rij, rjk=rjk, rik=rik)
    1851             :          IF (skip) EXIT
    1852             : 
    1853             :          CALL dbt_get_block(t3c_trace, [iatom, jatom, katom], tmp_block, found)
    1854             :          IF (.NOT. found) CYCLE
    1855             : 
    1856             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    1857             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    1858             :                                 sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
    1859             : 
    1860             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    1861             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    1862             :                                 sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
    1863             : 
    1864             :          CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
    1865             :                                 npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
    1866             :                                 sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
    1867             : 
    1868             :          djk = NORM2(rjk)
    1869             :          dij = NORM2(rij)
    1870             :          dik = NORM2(rik)
    1871             : 
    1872             :          IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
    1873             :          IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
    1874             :          IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
    1875             : 
    1876             :          ALLOCATE (max_contraction_i(nseti))
    1877             :          max_contraction_i = 0.0_dp
    1878             :          DO iset = 1, nseti
    1879             :             sgfi = first_sgf_i(1, iset)
    1880             :             max_contraction_i(iset) = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
    1881             :          END DO
    1882             : 
    1883             :          ALLOCATE (max_contraction_j(nsetj))
    1884             :          max_contraction_j = 0.0_dp
    1885             :          DO jset = 1, nsetj
    1886             :             sgfj = first_sgf_j(1, jset)
    1887             :             max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
    1888             :          END DO
    1889             : 
    1890             :          ALLOCATE (max_contraction_k(nsetk))
    1891             :          max_contraction_k = 0.0_dp
    1892             :          DO kset = 1, nsetk
    1893             :             sgfk = first_sgf_k(1, kset)
    1894             :             max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
    1895             :          END DO
    1896             : 
    1897             :          CALL dbt_blk_sizes(t3c_trace, [iatom, jatom, katom], blk_size)
    1898             : 
    1899             :          ALLOCATE (block_t_i(blk_size(2), blk_size(3), blk_size(1), 3))
    1900             :          ALLOCATE (block_t_j(blk_size(2), blk_size(3), blk_size(1), 3))
    1901             :          ALLOCATE (block_t_k(blk_size(2), blk_size(3), blk_size(1), 3))
    1902             : 
    1903             :          ALLOCATE (ablock(blk_size(2), blk_size(3), blk_size(1)))
    1904             :          DO i = 1, blk_size(1)
    1905             :             ablock(:, :, i) = tmp_block(i, :, :)
    1906             :          END DO
    1907             :          DEALLOCATE (tmp_block)
    1908             : 
    1909             :          block_t_i = 0.0_dp
    1910             :          block_t_j = 0.0_dp
    1911             :          block_t_k = 0.0_dp
    1912             :          block_j_not_zero = .FALSE.
    1913             :          block_k_not_zero = .FALSE.
    1914             : 
    1915             :          DO iset = 1, nseti
    1916             : 
    1917             :             DO jset = 1, nsetj
    1918             : 
    1919             :                IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
    1920             : 
    1921             :                DO kset = 1, nsetk
    1922             : 
    1923             :                   IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
    1924             :                   IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
    1925             : 
    1926             :                   ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    1927             :                   ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    1928             :                   ncok = npgfk(kset)*ncoset(lmax_k(kset))
    1929             : 
    1930             :                   sgfi = first_sgf_i(1, iset)
    1931             :                   sgfj = first_sgf_j(1, jset)
    1932             :                   sgfk = first_sgf_k(1, kset)
    1933             : 
    1934             :                   IF (ncoj*ncok*ncoi > 0) THEN
    1935             :                      ALLOCATE (dijk_j(ncoj, ncok, ncoi, 3))
    1936             :                      ALLOCATE (dijk_k(ncoj, ncok, ncoi, 3))
    1937             :                      dijk_j(:, :, :, :) = 0.0_dp
    1938             :                      dijk_k(:, :, :, :) = 0.0_dp
    1939             : 
    1940             :                      der_j_zero = .FALSE.
    1941             :                      der_k_zero = .FALSE.
    1942             : 
    1943             :                      !need positions for libint. Only relative positions are needed => set ri to 0.0
    1944             :                      ri = 0.0_dp
    1945             :                      rj = rij ! ri + rij
    1946             :                      rk = rik ! ri + rik
    1947             : 
    1948             :                      CALL eri_3center_derivs(dijk_j, dijk_k, &
    1949             :                                              lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
    1950             :                                              lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
    1951             :                                              lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
    1952             :                                              djk, dij, dik, lib, potential_parameter, &
    1953             :                                              der_abc_1_ext=der_ext_j, der_abc_2_ext=der_ext_k)
    1954             : 
    1955             :                      IF (PRESENT(der_eps)) THEN
    1956             :                         DO i_xyz = 1, 3
    1957             :                            IF (der_eps > der_ext_j(i_xyz)*(max_contraction_i(iset)* &
    1958             :                                                            max_contraction_j(jset)* &
    1959             :                                                            max_contraction_k(kset))) THEN
    1960             :                               der_j_zero(i_xyz) = .TRUE.
    1961             :                            END IF
    1962             :                         END DO
    1963             : 
    1964             :                         DO i_xyz = 1, 3
    1965             :                            IF (der_eps > der_ext_k(i_xyz)*(max_contraction_i(iset)* &
    1966             :                                                            max_contraction_j(jset)* &
    1967             :                                                            max_contraction_k(kset))) THEN
    1968             :                               der_k_zero(i_xyz) = .TRUE.
    1969             :                            END IF
    1970             :                         END DO
    1971             :                         IF (ALL(der_j_zero) .AND. ALL(der_k_zero)) THEN
    1972             :                            DEALLOCATE (dijk_j, dijk_k)
    1973             :                            CYCLE
    1974             :                         END IF
    1975             :                      END IF
    1976             : 
    1977             :                      ALLOCATE (dijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
    1978             : 
    1979             :                      block_start_j = sgfj
    1980             :                      block_end_j = sgfj + nsgfj(jset) - 1
    1981             :                      block_start_k = sgfk
    1982             :                      block_end_k = sgfk + nsgfk(kset) - 1
    1983             :                      block_start_i = sgfi
    1984             :                      block_end_i = sgfi + nsgfi(iset) - 1
    1985             : 
    1986             :                      DO i_xyz = 1, 3
    1987             :                         IF (der_j_zero(i_xyz)) CYCLE
    1988             : 
    1989             :                         block_j_not_zero(i_xyz) = .TRUE.
    1990             :                         CALL abc_contract_xsmm(dijk_contr, dijk_j(:, :, :, i_xyz), tspj(jset, jkind)%array, &
    1991             :                                                spk(kset, kkind)%array, spi(iset, ikind)%array, &
    1992             :                                                ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
    1993             :                                                nsgfi(iset), cpp_buffer, ccp_buffer)
    1994             : 
    1995             :                         block_t_j(block_start_j:block_end_j, &
    1996             :                                   block_start_k:block_end_k, &
    1997             :                                   block_start_i:block_end_i, i_xyz) = &
    1998             :                            block_t_j(block_start_j:block_end_j, &
    1999             :                                      block_start_k:block_end_k, &
    2000             :                                      block_start_i:block_end_i, i_xyz) + &
    2001             :                            dijk_contr(:, :, :)
    2002             : 
    2003             :                      END DO
    2004             : 
    2005             :                      DO i_xyz = 1, 3
    2006             :                         IF (der_k_zero(i_xyz)) CYCLE
    2007             : 
    2008             :                         block_k_not_zero(i_xyz) = .TRUE.
    2009             :                         CALL abc_contract_xsmm(dijk_contr, dijk_k(:, :, :, i_xyz), tspj(jset, jkind)%array, &
    2010             :                                                spk(kset, kkind)%array, spi(iset, ikind)%array, &
    2011             :                                                ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
    2012             :                                                nsgfi(iset), cpp_buffer, ccp_buffer)
    2013             : 
    2014             :                         block_t_k(block_start_j:block_end_j, &
    2015             :                                   block_start_k:block_end_k, &
    2016             :                                   block_start_i:block_end_i, i_xyz) = &
    2017             :                            block_t_k(block_start_j:block_end_j, &
    2018             :                                      block_start_k:block_end_k, &
    2019             :                                      block_start_i:block_end_i, i_xyz) + &
    2020             :                            dijk_contr(:, :, :)
    2021             : 
    2022             :                      END DO
    2023             : 
    2024             :                      DEALLOCATE (dijk_j, dijk_k, dijk_contr)
    2025             :                   END IF ! number of triples > 0
    2026             :                END DO
    2027             :             END DO
    2028             :          END DO
    2029             : 
    2030             :          !We obtain the derivative wrt to first center using translational invariance
    2031             :          DO i_xyz = 1, 3
    2032             :             block_t_i(:, :, :, i_xyz) = -block_t_j(:, :, :, i_xyz) - block_t_k(:, :, :, i_xyz)
    2033             :          END DO
    2034             : 
    2035             :          !virial contribution coming from deriv wrt to first center
    2036             :          DO i_xyz = 1, 3
    2037             :             force = pref*SUM(ablock(:, :, :)*block_t_i(:, :, :, i_xyz))
    2038             :             CALL real_to_scaled(scoord, particle_set(iatom)%r, cell)
    2039             :             DO j_xyz = 1, 3
    2040             : !$OMP ATOMIC
    2041             :                work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
    2042             :             END DO
    2043             :          END DO
    2044             : 
    2045             :          !second center
    2046             :          DO i_xyz = 1, 3
    2047             :             force = pref*SUM(ablock(:, :, :)*block_t_j(:, :, :, i_xyz))
    2048             :             CALL real_to_scaled(scoord, particle_set(iatom)%r + rij, cell)
    2049             :             DO j_xyz = 1, 3
    2050             : !$OMP ATOMIC
    2051             :                work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
    2052             :             END DO
    2053             :          END DO
    2054             : 
    2055             :          !third center
    2056             :          DO i_xyz = 1, 3
    2057             :             force = pref*SUM(ablock(:, :, :)*block_t_k(:, :, :, i_xyz))
    2058             :             CALL real_to_scaled(scoord, particle_set(iatom)%r + rik, cell)
    2059             :             DO j_xyz = 1, 3
    2060             : !$OMP ATOMIC
    2061             :                work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
    2062             :             END DO
    2063             :          END DO
    2064             : 
    2065             :          DEALLOCATE (block_t_i, block_t_j, block_t_k)
    2066             :          DEALLOCATE (max_contraction_i, max_contraction_j, max_contraction_k, ablock)
    2067             :       END DO
    2068             : 
    2069             :       IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
    2070             :       IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
    2071             : 
    2072             :       CALL cp_libint_cleanup_3eri1(lib)
    2073             :       CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
    2074             : !$OMP END PARALLEL
    2075             : 
    2076         132 :       DO iset = 1, max_nset
    2077         364 :          DO ibasis = 1, nbasis
    2078         232 :             IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
    2079         232 :             IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
    2080         348 :             IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
    2081             :          END DO
    2082             :       END DO
    2083             : 
    2084          16 :       DEALLOCATE (spi, tspj, spk)
    2085             : 
    2086          16 :       CALL timestop(handle)
    2087         128 :    END SUBROUTINE calc_3c_virial
    2088             : 
    2089             : ! **************************************************************************************************
    2090             : !> \brief Build 3-center integral tensor
    2091             : !> \param t3c empty DBCSR tensor
    2092             : !>            Should be of shape (1,1) if no kpoints are used and of shape (nimages, nimages)
    2093             : !>            if k-points are used
    2094             : !> \param filter_eps Filter threshold for tensor blocks
    2095             : !> \param qs_env ...
    2096             : !> \param nl_3c 3-center neighborlist
    2097             : !> \param basis_i ...
    2098             : !> \param basis_j ...
    2099             : !> \param basis_k ...
    2100             : !> \param potential_parameter ...
    2101             : !> \param int_eps neglect integrals smaller than int_eps
    2102             : !> \param op_pos operator position.
    2103             : !>        1: calculate (i|jk) integrals,
    2104             : !>        2: calculate (ij|k) integrals
    2105             : !> \param do_kpoints ...
    2106             : !> this routine requires that libint has been static initialised somewhere else
    2107             : !> \param do_hfx_kpoints ...
    2108             : !> \param desymmetrize ...
    2109             : !> \param cell_sym ...
    2110             : !> \param bounds_i ...
    2111             : !> \param bounds_j ...
    2112             : !> \param bounds_k ...
    2113             : !> \param RI_range ...
    2114             : !> \param img_to_RI_cell ...
    2115             : !> \param cell_to_index_ext ...
    2116             : ! **************************************************************************************************
    2117        1858 :    SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
    2118        1858 :                                  nl_3c, basis_i, basis_j, basis_k, &
    2119             :                                  potential_parameter, int_eps, &
    2120             :                                  op_pos, do_kpoints, do_hfx_kpoints, desymmetrize, cell_sym, &
    2121             :                                  bounds_i, bounds_j, bounds_k, &
    2122        1858 :                                  RI_range, img_to_RI_cell, cell_to_index_ext)
    2123             : 
    2124             :       TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT)     :: t3c
    2125             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    2126             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2127             :       TYPE(neighbor_list_3c_type), INTENT(INOUT)         :: nl_3c
    2128             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j, basis_k
    2129             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
    2130             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: int_eps
    2131             :       INTEGER, INTENT(IN), OPTIONAL                      :: op_pos
    2132             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints, do_hfx_kpoints, &
    2133             :                                                             desymmetrize, cell_sym
    2134             :       INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL        :: bounds_i, bounds_j, bounds_k
    2135             :       REAL(dp), INTENT(IN), OPTIONAL                     :: RI_range
    2136             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: img_to_RI_cell
    2137             :       INTEGER, DIMENSION(:, :, :), OPTIONAL, POINTER     :: cell_to_index_ext
    2138             : 
    2139             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'build_3c_integrals'
    2140             : 
    2141             :       INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
    2142             :          block_start_k, egfi, handle, handle2, i, iatom, ibasis, ikind, ilist, imax, iset, jatom, &
    2143             :          jcell, jkind, jset, katom, kcell, kkind, kset, m_max, max_ncoj, max_nset, max_nsgfi, &
    2144             :          maxli, maxlj, maxlk, mepos, natom, nbasis, ncell_RI, ncoi, ncoj, ncok, nimg, nseti, &
    2145             :          nsetj, nsetk, nthread, op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, unit_id
    2146        1858 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: img_to_RI_cell_prv
    2147             :       INTEGER, DIMENSION(2)                              :: bo
    2148             :       INTEGER, DIMENSION(3)                              :: blk_idx, blk_size, cell_j, cell_k, &
    2149             :                                                             kp_index_lbounds, kp_index_ubounds, sp
    2150        1858 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
    2151        3716 :                                                             lmin_k, npgfi, npgfj, npgfk, nsgfi, &
    2152        1858 :                                                             nsgfj, nsgfk
    2153        1858 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j, first_sgf_k
    2154        1858 :       INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
    2155             :       LOGICAL                                            :: block_not_zero, cell_sym_prv, debug, &
    2156             :                                                             desymmetrize_prv, do_hfx_kpoints_prv, &
    2157             :                                                             do_kpoints_prv, found, skip
    2158             :       REAL(KIND=dp)                                      :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
    2159             :                                                             kind_radius_i, kind_radius_j, &
    2160             :                                                             kind_radius_k, max_contraction_i, &
    2161             :                                                             prefac, sijk_ext
    2162        1858 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ccp_buffer, cpp_buffer, &
    2163        1858 :                                                             max_contraction_j, max_contraction_k
    2164        3716 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: block_t, dummy_block_t, sijk, &
    2165        3716 :                                                             sijk_contr, tmp_ijk
    2166             :       REAL(KIND=dp), DIMENSION(1, 1, 1)                  :: counter
    2167             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rik, rj, rjk, rk
    2168        1858 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j, set_radius_k
    2169        1858 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
    2170        3716 :                                                             sphi_k, zeti, zetj, zetk
    2171        1858 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    2172             :       TYPE(cell_type), POINTER                           :: cell
    2173        3716 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spi, spk, tspj
    2174             :       TYPE(cp_libint_t)                                  :: lib
    2175       13006 :       TYPE(dbt_type)                                     :: t_3c_tmp
    2176             :       TYPE(dft_control_type), POINTER                    :: dft_control
    2177             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set
    2178             :       TYPE(kpoint_type), POINTER                         :: kpoints
    2179             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2180             :       TYPE(neighbor_list_3c_iterator_type)               :: nl_3c_iter
    2181        1858 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2182             : 
    2183        1858 :       CALL timeset(routineN, handle)
    2184             : 
    2185        1858 :       debug = .FALSE.
    2186             : 
    2187        1858 :       IF (PRESENT(do_kpoints)) THEN
    2188         368 :          do_kpoints_prv = do_kpoints
    2189             :       ELSE
    2190             :          do_kpoints_prv = .FALSE.
    2191             :       END IF
    2192             : 
    2193        1858 :       IF (PRESENT(do_hfx_kpoints)) THEN
    2194         122 :          do_hfx_kpoints_prv = do_hfx_kpoints
    2195             :       ELSE
    2196             :          do_hfx_kpoints_prv = .FALSE.
    2197             :       END IF
    2198         122 :       IF (do_hfx_kpoints_prv) THEN
    2199         122 :          CPASSERT(do_kpoints_prv)
    2200         122 :          CPASSERT(PRESENT(RI_range))
    2201         122 :          CPASSERT(PRESENT(img_to_RI_cell))
    2202             :       END IF
    2203             : 
    2204        1736 :       IF (PRESENT(img_to_RI_cell)) THEN
    2205         366 :          ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
    2206        5978 :          img_to_RI_cell_prv(:) = img_to_RI_cell
    2207             :       END IF
    2208             : 
    2209        1858 :       IF (PRESENT(desymmetrize)) THEN
    2210        1858 :          desymmetrize_prv = desymmetrize
    2211             :       ELSE
    2212             :          desymmetrize_prv = .TRUE.
    2213             :       END IF
    2214             : 
    2215        1858 :       IF (PRESENT(cell_sym)) THEN
    2216           6 :          cell_sym_prv = cell_sym
    2217             :       ELSE
    2218        1852 :          cell_sym_prv = .FALSE.
    2219             :       END IF
    2220             : 
    2221        1858 :       op_ij = do_potential_id; op_jk = do_potential_id
    2222             : 
    2223        1858 :       IF (PRESENT(op_pos)) THEN
    2224         438 :          op_pos_prv = op_pos
    2225             :       ELSE
    2226        1420 :          op_pos_prv = 1
    2227             :       END IF
    2228             : 
    2229        1736 :       SELECT CASE (op_pos_prv)
    2230             :       CASE (1)
    2231        1736 :          op_ij = potential_parameter%potential_type
    2232             :       CASE (2)
    2233        1858 :          op_jk = potential_parameter%potential_type
    2234             :       END SELECT
    2235             : 
    2236        1858 :       dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
    2237             : 
    2238        1858 :       IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
    2239        1348 :          dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
    2240        1348 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    2241         510 :       ELSEIF (op_ij == do_potential_coulomb) THEN
    2242         192 :          dr_ij = 1000000.0_dp
    2243         192 :          dr_ik = 1000000.0_dp
    2244             :       END IF
    2245             : 
    2246        1858 :       IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
    2247          12 :          dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
    2248          12 :          dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
    2249        1846 :       ELSEIF (op_jk == do_potential_coulomb) THEN
    2250           0 :          dr_jk = 1000000.0_dp
    2251           0 :          dr_ik = 1000000.0_dp
    2252             :       END IF
    2253             : 
    2254        1858 :       NULLIFY (qs_kind_set, atomic_kind_set)
    2255             : 
    2256             :       ! get stuff
    2257             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, cell=cell, &
    2258        1858 :                       natom=natom, dft_control=dft_control, para_env=para_env)
    2259             : 
    2260        1858 :       IF (do_kpoints_prv) THEN
    2261         134 :          IF (PRESENT(cell_to_index_ext)) THEN
    2262           6 :             cell_to_index => cell_to_index_ext
    2263         240 :             nimg = MAXVAL(cell_to_index)
    2264             :          ELSE
    2265         128 :             CALL get_qs_env(qs_env, kpoints=kpoints)
    2266         128 :             CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
    2267         128 :             nimg = dft_control%nimages
    2268             :          END IF
    2269         134 :          ncell_RI = nimg
    2270         134 :          IF (do_hfx_kpoints_prv) THEN
    2271         122 :             nimg = SIZE(t3c, 1)
    2272         122 :             ncell_RI = SIZE(t3c, 2)
    2273             :          END IF
    2274             :       ELSE
    2275             :          nimg = 1
    2276             :          ncell_RI = 1
    2277             :       END IF
    2278             : 
    2279             :       CALL alloc_block_3c(t3c, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, &
    2280             :                           op_pos=op_pos_prv, do_kpoints=do_kpoints, do_hfx_kpoints=do_hfx_kpoints, &
    2281             :                           bounds_i=bounds_i, bounds_j=bounds_j, bounds_k=bounds_k, &
    2282             :                           RI_range=RI_range, img_to_RI_cell=img_to_RI_cell, cell_sym=cell_sym_prv, &
    2283        3594 :                           cell_to_index=cell_to_index)
    2284             : 
    2285        1858 :       IF (do_hfx_kpoints_prv) THEN
    2286         122 :          CPASSERT(op_pos_prv == 2)
    2287         122 :          CPASSERT(.NOT. desymmetrize_prv)
    2288        1736 :       ELSE IF (do_kpoints_prv) THEN
    2289          36 :          CPASSERT(ALL(SHAPE(t3c) == [nimg, ncell_RI]))
    2290             :       END IF
    2291             : 
    2292             :       !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
    2293        1858 :       nbasis = SIZE(basis_i)
    2294        1858 :       max_nsgfi = 0
    2295        1858 :       max_nset = 0
    2296        1858 :       maxli = 0
    2297        4798 :       DO ibasis = 1, nbasis
    2298             :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
    2299        2940 :                                 nset=iset, nsgf_set=nsgfi, npgf=npgfi)
    2300        2940 :          maxli = MAX(maxli, imax)
    2301        2940 :          max_nset = MAX(max_nset, iset)
    2302       16592 :          max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
    2303             :       END DO
    2304             :       max_ncoj = 0
    2305             :       maxlj = 0
    2306        4798 :       DO ibasis = 1, nbasis
    2307             :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
    2308        2940 :                                 nset=jset, nsgf_set=nsgfj, npgf=npgfj)
    2309        2940 :          maxlj = MAX(maxlj, imax)
    2310        2940 :          max_nset = MAX(max_nset, jset)
    2311       11338 :          max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
    2312             :       END DO
    2313             :       maxlk = 0
    2314        4798 :       DO ibasis = 1, nbasis
    2315             :          CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
    2316        2940 :                                 nset=kset, nsgf_set=nsgfk, npgf=npgfk)
    2317        2940 :          maxlk = MAX(maxlk, imax)
    2318        4798 :          max_nset = MAX(max_nset, kset)
    2319             :       END DO
    2320        1858 :       m_max = maxli + maxlj + maxlk
    2321             : 
    2322             :       !To minimize expensive memory ops and generally optimize contraction, pre-allocate
    2323             :       !contiguous sphi arrays (and transposed in the case of sphi_i)
    2324             : 
    2325        1858 :       NULLIFY (tspj, spi, spk)
    2326       73304 :       ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
    2327             : 
    2328        4798 :       DO ibasis = 1, nbasis
    2329       21338 :          DO iset = 1, max_nset
    2330       16540 :             NULLIFY (spi(iset, ibasis)%array)
    2331       16540 :             NULLIFY (tspj(iset, ibasis)%array)
    2332             : 
    2333       19480 :             NULLIFY (spk(iset, ibasis)%array)
    2334             :          END DO
    2335             :       END DO
    2336             : 
    2337        7432 :       DO ilist = 1, 3
    2338       16252 :          DO ibasis = 1, nbasis
    2339        8820 :             IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
    2340        8820 :             IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
    2341        8820 :             IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
    2342             : 
    2343       38968 :             DO iset = 1, basis_set%nset
    2344             : 
    2345       24574 :                ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
    2346       24574 :                sgfi = basis_set%first_sgf(1, iset)
    2347       24574 :                egfi = sgfi + basis_set%nsgf_set(iset) - 1
    2348             : 
    2349       33394 :                IF (ilist == 1) THEN
    2350       47176 :                   ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    2351     3277042 :                   spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    2352             : 
    2353       12780 :                ELSE IF (ilist == 2) THEN
    2354       26160 :                   ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
    2355      376292 :                   tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
    2356             : 
    2357             :                ELSE
    2358       24960 :                   ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
    2359     1443220 :                   spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
    2360             :                END IF
    2361             : 
    2362             :             END DO !iset
    2363             :          END DO !ibasis
    2364             :       END DO !ilist
    2365             : 
    2366             :       !Init the truncated Coulomb operator
    2367        1858 :       IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
    2368             : 
    2369        1354 :          IF (m_max > get_lmax_init()) THEN
    2370          86 :             IF (para_env%mepos == 0) THEN
    2371          43 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    2372             :             END IF
    2373          86 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    2374          86 :             IF (para_env%mepos == 0) THEN
    2375          43 :                CALL close_file(unit_id)
    2376             :             END IF
    2377             :          END IF
    2378             :       END IF
    2379             : 
    2380        1858 :       CALL init_md_ftable(nmax=m_max)
    2381             : 
    2382        1858 :       IF (do_kpoints_prv) THEN
    2383         536 :          kp_index_lbounds = LBOUND(cell_to_index)
    2384         536 :          kp_index_ubounds = UBOUND(cell_to_index)
    2385             :       END IF
    2386             : 
    2387        7432 :       counter = 1.0_dp
    2388             : 
    2389        1858 :       nthread = 1
    2390        1858 : !$    nthread = omp_get_max_threads()
    2391             : 
    2392             : !$OMP PARALLEL DEFAULT(NONE) &
    2393             : !$OMP SHARED (nthread,do_kpoints_prv,kp_index_lbounds,kp_index_ubounds,maxli,maxlk,maxlj,bounds_i,&
    2394             : !$OMP         bounds_j,bounds_k,nimg,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,ncoset,&
    2395             : !$OMP         potential_parameter,int_eps,t3c,tspj,spi,spk,debug,cell_to_index,&
    2396             : !$OMP         natom,nl_3c,cell,op_pos_prv,do_hfx_kpoints_prv,RI_range,ncell_RI, &
    2397             : !$OMP         img_to_RI_cell_prv, cell_sym_prv) &
    2398             : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,cell_j,cell_k,&
    2399             : !$OMP          prefac,jcell,kcell,first_sgf_i,lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,&
    2400             : !$OMP          sphi_i,zeti,kind_radius_i,first_sgf_j,lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,&
    2401             : !$OMP          set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,lmax_k,lmin_k,npgfk,nsetk,nsgfk,&
    2402             : !$OMP          rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,ncoi,ncoj,ncok,sgfi,sgfj,&
    2403             : !$OMP          sgfk,sijk,ri,rj,rk,sijk_ext,block_not_zero,max_contraction_i,max_contraction_j,&
    2404             : !$OMP          max_contraction_k,iset,jset,kset,block_t,blk_size,sijk_contr,cpp_buffer,ccp_buffer,&
    2405             : !$OMP          block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,found,&
    2406        1858 : !$OMP          dummy_block_t,sp,handle2,mepos,bo,skip,tmp_ijk,i,blk_idx)
    2407             : 
    2408             :       mepos = 0
    2409             : !$    mepos = omp_get_thread_num()
    2410             : 
    2411             :       CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
    2412             :       CALL cp_libint_set_contrdepth(lib, 1)
    2413             :       CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
    2414             : 
    2415             :       !We split the provided bounds among the threads such that each threads works on a different set of atoms
    2416             :       IF (PRESENT(bounds_i)) THEN
    2417             :          bo = get_limit(bounds_i(2) - bounds_i(1) + 1, nthread, mepos)
    2418             :          bo(:) = bo(:) + bounds_i(1) - 1
    2419             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
    2420             :       ELSE IF (PRESENT(bounds_j)) THEN
    2421             : 
    2422             :          bo = get_limit(bounds_j(2) - bounds_j(1) + 1, nthread, mepos)
    2423             :          bo(:) = bo(:) + bounds_j(1) - 1
    2424             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bo, bounds_k)
    2425             :       ELSE IF (PRESENT(bounds_k)) THEN
    2426             :          bo = get_limit(bounds_k(2) - bounds_k(1) + 1, nthread, mepos)
    2427             :          bo(:) = bo(:) + bounds_k(1) - 1
    2428             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bo)
    2429             :       ELSE
    2430             :          bo = get_limit(natom, nthread, mepos)
    2431             :          CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
    2432             :       END IF
    2433             : 
    2434             :       skip = .FALSE.
    2435             :       IF (bo(1) > bo(2)) skip = .TRUE.
    2436             : 
    2437             :       DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
    2438             :          CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
    2439             :                                    iatom=iatom, jatom=jatom, katom=katom, &
    2440             :                                    rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
    2441             :          IF (skip) EXIT
    2442             : 
    2443             :          djk = NORM2(rjk)
    2444             :          dij = NORM2(rij)
    2445             :          dik = NORM2(rik)
    2446             : 
    2447             :          IF (nl_3c%sym == symmetric_jk) THEN
    2448             :             IF (jatom == katom) THEN
    2449             :                ! factor 0.5 due to double-counting of diagonal blocks
    2450             :                ! (we desymmetrize by adding transpose)
    2451             :                prefac = 0.5_dp
    2452             :             ELSE
    2453             :                prefac = 1.0_dp
    2454             :             END IF
    2455             :          ELSEIF (nl_3c%sym == symmetric_ij) THEN
    2456             :             IF (iatom == jatom) THEN
    2457             :                ! factor 0.5 due to double-counting of diagonal blocks
    2458             :                ! (we desymmetrize by adding transpose)
    2459             :                prefac = 0.5_dp
    2460             :             ELSE
    2461             :                prefac = 1.0_dp
    2462             :             END IF
    2463             :          ELSE
    2464             :             prefac = 1.0_dp
    2465             :          END IF
    2466             :          IF (do_kpoints_prv) prefac = 1.0_dp
    2467             : 
    2468             :          IF (do_kpoints_prv) THEN
    2469             : 
    2470             :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
    2471             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
    2472             : 
    2473             :             jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
    2474             :             IF (jcell > nimg .OR. jcell < 1) CYCLE
    2475             : 
    2476             :             IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
    2477             :                 ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
    2478             : 
    2479             :             kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
    2480             :             IF (kcell > nimg .OR. kcell < 1) CYCLE
    2481             :             !In case of HFX k-points, we only consider P^k if d_ik <= RI_range
    2482             :             IF (do_hfx_kpoints_prv) THEN
    2483             :                IF (dik > RI_range) CYCLE
    2484             :                kcell = img_to_RI_cell_prv(kcell)
    2485             :             END IF
    2486             :          ELSE
    2487             :             jcell = 1; kcell = 1
    2488             :          END IF
    2489             : 
    2490             :          IF (cell_sym_prv .AND. jcell < kcell) CYCLE
    2491             : 
    2492             :          blk_idx = [iatom, jatom, katom]
    2493             :          IF (do_hfx_kpoints_prv) THEN
    2494             :             blk_idx(3) = (kcell - 1)*natom + katom
    2495             :             kcell = 1
    2496             :          END IF
    2497             : 
    2498             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    2499             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    2500             :                                 sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
    2501             : 
    2502             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    2503             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    2504             :                                 sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
    2505             : 
    2506             :          CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
    2507             :                                 npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
    2508             :                                 sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
    2509             : 
    2510             :          IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
    2511             :          IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
    2512             :          IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
    2513             : 
    2514             :          ALLOCATE (max_contraction_j(nsetj))
    2515             :          DO jset = 1, nsetj
    2516             :             sgfj = first_sgf_j(1, jset)
    2517             :             max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
    2518             :          END DO
    2519             : 
    2520             :          ALLOCATE (max_contraction_k(nsetk))
    2521             :          DO kset = 1, nsetk
    2522             :             sgfk = first_sgf_k(1, kset)
    2523             :             max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
    2524             :          END DO
    2525             : 
    2526             :          CALL dbt_blk_sizes(t3c(jcell, kcell), blk_idx, blk_size)
    2527             : 
    2528             :          ALLOCATE (block_t(blk_size(2), blk_size(3), blk_size(1)))
    2529             : 
    2530             :          block_t = 0.0_dp
    2531             :          block_not_zero = .FALSE.
    2532             :          DO iset = 1, nseti
    2533             : 
    2534             :             sgfi = first_sgf_i(1, iset)
    2535             :             max_contraction_i = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
    2536             : 
    2537             :             DO jset = 1, nsetj
    2538             : 
    2539             :                IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
    2540             : 
    2541             :                DO kset = 1, nsetk
    2542             : 
    2543             :                   IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
    2544             :                   IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
    2545             : 
    2546             :                   ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    2547             :                   ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    2548             :                   ncok = npgfk(kset)*ncoset(lmax_k(kset))
    2549             : 
    2550             :                   !ensure non-zero number of triples below
    2551             :                   IF (ncoj*ncok*ncoi == 0) CYCLE
    2552             : 
    2553             :                   !need positions for libint. Only relative positions are needed => set ri to 0.0
    2554             :                   ri = 0.0_dp
    2555             :                   rj = rij ! ri + rij
    2556             :                   rk = rik ! ri + rik
    2557             : 
    2558             :                   ALLOCATE (sijk(ncoj, ncok, ncoi))
    2559             :                   IF (op_pos_prv == 1) THEN
    2560             :                      sijk(:, :, :) = 0.0_dp
    2561             :                      CALL eri_3center(sijk, &
    2562             :                                       lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
    2563             :                                       lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
    2564             :                                       lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
    2565             :                                       djk, dij, dik, lib, potential_parameter, int_abc_ext=sijk_ext)
    2566             :                   ELSE
    2567             :                      ALLOCATE (tmp_ijk(ncoi, ncoj, ncok))
    2568             :                      tmp_ijk(:, :, :) = 0.0_dp
    2569             :                      CALL eri_3center(tmp_ijk, &
    2570             :                                       lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
    2571             :                                       lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
    2572             :                                       lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
    2573             :                                       dij, dik, djk, lib, potential_parameter, int_abc_ext=sijk_ext)
    2574             : 
    2575             :                      !F08: sijk = RESHAPE(tmp_ijk, [ncoj, ncok, ncoi], ORDER=[2, 3, 1]) with sijk not allocated
    2576             :                      DO i = 1, ncoi !TODO: revise/check for efficiency
    2577             :                         sijk(:, :, i) = tmp_ijk(i, :, :)
    2578             :                      END DO
    2579             :                      DEALLOCATE (tmp_ijk)
    2580             :                   END IF
    2581             : 
    2582             :                   IF (PRESENT(int_eps)) THEN
    2583             :                      IF (int_eps > sijk_ext*(max_contraction_i* &
    2584             :                                              max_contraction_j(jset)* &
    2585             :                                              max_contraction_k(kset))) THEN
    2586             :                         DEALLOCATE (sijk)
    2587             :                         CYCLE
    2588             :                      END IF
    2589             :                   END IF
    2590             : 
    2591             :                   block_not_zero = .TRUE.
    2592             :                   ALLOCATE (sijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
    2593             :                   CALL abc_contract_xsmm(sijk_contr, sijk, tspj(jset, jkind)%array, &
    2594             :                                          spk(kset, kkind)%array, spi(iset, ikind)%array, &
    2595             :                                          ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
    2596             :                                          nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
    2597             :                   DEALLOCATE (sijk)
    2598             : 
    2599             :                   sgfj = first_sgf_j(1, jset)
    2600             :                   sgfk = first_sgf_k(1, kset)
    2601             : 
    2602             :                   block_start_j = sgfj
    2603             :                   block_end_j = sgfj + nsgfj(jset) - 1
    2604             :                   block_start_k = sgfk
    2605             :                   block_end_k = sgfk + nsgfk(kset) - 1
    2606             :                   block_start_i = sgfi
    2607             :                   block_end_i = sgfi + nsgfi(iset) - 1
    2608             : 
    2609             :                   block_t(block_start_j:block_end_j, &
    2610             :                           block_start_k:block_end_k, &
    2611             :                           block_start_i:block_end_i) = &
    2612             :                      block_t(block_start_j:block_end_j, &
    2613             :                              block_start_k:block_end_k, &
    2614             :                              block_start_i:block_end_i) + &
    2615             :                      sijk_contr(:, :, :)
    2616             :                   DEALLOCATE (sijk_contr)
    2617             : 
    2618             :                END DO
    2619             : 
    2620             :             END DO
    2621             : 
    2622             :          END DO
    2623             : 
    2624             :          IF (block_not_zero) THEN
    2625             : !$OMP CRITICAL
    2626             :             CALL timeset(routineN//"_put_dbcsr", handle2)
    2627             :             IF (debug) THEN
    2628             :                CALL dbt_get_block(t3c(jcell, kcell), blk_idx, dummy_block_t, found=found)
    2629             :                CPASSERT(found)
    2630             :             END IF
    2631             : 
    2632             :             sp = SHAPE(block_t)
    2633             :             sp([2, 3, 1]) = sp ! sp = sp([2, 3, 1]) performs worse
    2634             : 
    2635             :             CALL dbt_put_block(t3c(jcell, kcell), blk_idx, sp, &
    2636             :                                RESHAPE(block_t, SHAPE=sp, ORDER=[2, 3, 1]), summation=.TRUE.)
    2637             : 
    2638             :             CALL timestop(handle2)
    2639             : !$OMP END CRITICAL
    2640             :          END IF
    2641             : 
    2642             :          DEALLOCATE (block_t)
    2643             :          DEALLOCATE (max_contraction_j, max_contraction_k)
    2644             :       END DO
    2645             : 
    2646             :       IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
    2647             :       IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
    2648             : 
    2649             :       CALL cp_libint_cleanup_3eri(lib)
    2650             :       CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
    2651             : !$OMP END PARALLEL
    2652             : 
    2653             :       !TODO: deal with hfx_kpoints, because should not filter by 1/2
    2654        1858 :       IF (nl_3c%sym == symmetric_jk .OR. do_kpoints_prv) THEN
    2655             : 
    2656         684 :          IF (.NOT. do_hfx_kpoints_prv) THEN
    2657        1186 :             DO kcell = 1, nimg
    2658        2274 :                DO jcell = 1, nimg
    2659             :                   ! need half of filter eps because afterwards we add transposed tensor
    2660        1712 :                   CALL dbt_filter(t3c(jcell, kcell), filter_eps/2)
    2661             :                END DO
    2662             :             END DO
    2663             :          ELSE
    2664         244 :             DO kcell = 1, ncell_RI
    2665        3492 :                DO jcell = 1, nimg
    2666        3370 :                   CALL dbt_filter(t3c(jcell, kcell), filter_eps)
    2667             :                END DO
    2668             :             END DO
    2669             :          END IF
    2670             : 
    2671         684 :          IF (desymmetrize_prv) THEN
    2672             :             ! add transposed of overlap integrals
    2673           0 :             CALL dbt_create(t3c(1, 1), t_3c_tmp)
    2674           0 :             DO kcell = 1, jcell
    2675           0 :                DO jcell = 1, nimg
    2676           0 :                   CALL dbt_copy(t3c(jcell, kcell), t_3c_tmp)
    2677           0 :                   CALL dbt_copy(t_3c_tmp, t3c(kcell, jcell), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
    2678           0 :                   CALL dbt_filter(t3c(kcell, jcell), filter_eps)
    2679             :                END DO
    2680             :             END DO
    2681           0 :             DO kcell = jcell + 1, nimg
    2682           0 :                DO jcell = 1, nimg
    2683           0 :                   CALL dbt_copy(t3c(jcell, kcell), t_3c_tmp)
    2684           0 :                   CALL dbt_copy(t_3c_tmp, t3c(kcell, jcell), order=[1, 3, 2], summation=.FALSE., move_data=.TRUE.)
    2685           0 :                   CALL dbt_filter(t3c(kcell, jcell), filter_eps)
    2686             :                END DO
    2687             :             END DO
    2688           0 :             CALL dbt_destroy(t_3c_tmp)
    2689             :          END IF
    2690        1174 :       ELSEIF (nl_3c%sym == symmetric_ij) THEN
    2691           0 :          DO kcell = 1, nimg
    2692           0 :             DO jcell = 1, nimg
    2693           0 :                CALL dbt_filter(t3c(jcell, kcell), filter_eps/2)
    2694             :             END DO
    2695             :          END DO
    2696        1174 :       ELSEIF (nl_3c%sym == symmetric_none) THEN
    2697        2348 :          DO kcell = 1, nimg
    2698        3522 :             DO jcell = 1, nimg
    2699        2348 :                CALL dbt_filter(t3c(jcell, kcell), filter_eps)
    2700             :             END DO
    2701             :          END DO
    2702             :       ELSE
    2703           0 :          CPABORT("requested symmetric case not implemented")
    2704             :       END IF
    2705             : 
    2706       12036 :       DO iset = 1, max_nset
    2707       28576 :          DO ibasis = 1, nbasis
    2708       16540 :             IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
    2709       16540 :             IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
    2710       26718 :             IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
    2711             :          END DO
    2712             :       END DO
    2713        1858 :       DEALLOCATE (spi, tspj, spk)
    2714             : 
    2715        1858 :       CALL timestop(handle)
    2716       16722 :    END SUBROUTINE build_3c_integrals
    2717             : 
    2718             : ! **************************************************************************************************
    2719             : !> \brief Calculates the derivatives of 2-center integrals, wrt to the first center
    2720             : !> \param t2c_der ...
    2721             : !> this routine requires that libint has been static initialised somewhere else
    2722             : !> \param filter_eps Filter threshold for matrix blocks
    2723             : !> \param qs_env ...
    2724             : !> \param nl_2c 2-center neighborlist
    2725             : !> \param basis_i ...
    2726             : !> \param basis_j ...
    2727             : !> \param potential_parameter ...
    2728             : !> \param do_kpoints ...
    2729             : ! **************************************************************************************************
    2730         372 :    SUBROUTINE build_2c_derivatives(t2c_der, filter_eps, qs_env, &
    2731         372 :                                    nl_2c, basis_i, basis_j, &
    2732             :                                    potential_parameter, do_kpoints)
    2733             : 
    2734             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT)   :: t2c_der
    2735             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    2736             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2737             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    2738             :          POINTER                                         :: nl_2c
    2739             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j
    2740             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
    2741             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints
    2742             : 
    2743             :       CHARACTER(len=*), PARAMETER :: routineN = 'build_2c_derivatives'
    2744             : 
    2745             :       INTEGER :: handle, i_xyz, iatom, ibasis, icol, ikind, imax, img, irow, iset, jatom, jkind, &
    2746             :          jset, m_max, maxli, maxlj, natom, ncoi, ncoj, nimg, nseti, nsetj, op_prv, sgfi, sgfj, &
    2747             :          unit_id
    2748             :       INTEGER, DIMENSION(3)                              :: cell_j, kp_index_lbounds, &
    2749             :                                                             kp_index_ubounds
    2750         372 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
    2751         372 :                                                             npgfj, nsgfi, nsgfj
    2752         372 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j
    2753         372 :       INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
    2754             :       LOGICAL                                            :: do_kpoints_prv, do_symmetric, found, &
    2755             :                                                             trans
    2756             :       REAL(KIND=dp)                                      :: dab
    2757         372 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dij_contr, dij_rs
    2758         372 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: dij
    2759             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rj
    2760         372 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j
    2761         372 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
    2762         372 :                                                             zetj
    2763         372 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    2764        1488 :       TYPE(block_p_type), DIMENSION(3)                   :: block_t
    2765             :       TYPE(cp_libint_t)                                  :: lib
    2766             :       TYPE(dft_control_type), POINTER                    :: dft_control
    2767             :       TYPE(kpoint_type), POINTER                         :: kpoints
    2768             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2769             :       TYPE(neighbor_list_iterator_p_type), &
    2770         372 :          DIMENSION(:), POINTER                           :: nl_iterator
    2771         372 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2772             : 
    2773         372 :       CALL timeset(routineN, handle)
    2774             : 
    2775         372 :       IF (PRESENT(do_kpoints)) THEN
    2776          84 :          do_kpoints_prv = do_kpoints
    2777             :       ELSE
    2778             :          do_kpoints_prv = .FALSE.
    2779             :       END IF
    2780             : 
    2781         372 :       op_prv = potential_parameter%potential_type
    2782             : 
    2783         372 :       NULLIFY (qs_kind_set, atomic_kind_set, block_t(1)%block, block_t(2)%block, block_t(3)%block, cell_to_index)
    2784             : 
    2785             :       ! get stuff
    2786             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
    2787         372 :                       natom=natom, kpoints=kpoints, dft_control=dft_control, para_env=para_env)
    2788             : 
    2789         372 :       IF (do_kpoints_prv) THEN
    2790          84 :          nimg = SIZE(t2c_der, 1)
    2791          84 :          CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
    2792         336 :          kp_index_lbounds = LBOUND(cell_to_index)
    2793         336 :          kp_index_ubounds = UBOUND(cell_to_index)
    2794             :       ELSE
    2795             :          nimg = 1
    2796             :       END IF
    2797             : 
    2798             :       ! check for symmetry
    2799         372 :       CPASSERT(SIZE(nl_2c) > 0)
    2800         372 :       CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
    2801             : 
    2802         372 :       IF (do_symmetric) THEN
    2803         576 :          DO img = 1, nimg
    2804             :             !Derivtive matrix is assymetric
    2805        1440 :             DO i_xyz = 1, 3
    2806        1152 :                CPASSERT(dbcsr_get_matrix_type(t2c_der(img, i_xyz)) == dbcsr_type_antisymmetric)
    2807             :             END DO
    2808             :          END DO
    2809             :       ELSE
    2810        2104 :          DO img = 1, nimg
    2811        8164 :             DO i_xyz = 1, 3
    2812        8080 :                CPASSERT(dbcsr_get_matrix_type(t2c_der(img, i_xyz)) == dbcsr_type_no_symmetry)
    2813             :             END DO
    2814             :          END DO
    2815             :       END IF
    2816             : 
    2817        2680 :       DO img = 1, nimg
    2818        9604 :          DO i_xyz = 1, 3
    2819        9232 :             CALL cp_dbcsr_alloc_block_from_nbl(t2c_der(img, i_xyz), nl_2c)
    2820             :          END DO
    2821             :       END DO
    2822             : 
    2823         372 :       maxli = 0
    2824        1062 :       DO ibasis = 1, SIZE(basis_i)
    2825         690 :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
    2826        1062 :          maxli = MAX(maxli, imax)
    2827             :       END DO
    2828         372 :       maxlj = 0
    2829        1062 :       DO ibasis = 1, SIZE(basis_j)
    2830         690 :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
    2831        1062 :          maxlj = MAX(maxlj, imax)
    2832             :       END DO
    2833             : 
    2834         372 :       m_max = maxli + maxlj + 1
    2835             : 
    2836             :       !Init the truncated Coulomb operator
    2837         372 :       IF (op_prv == do_potential_truncated) THEN
    2838             : 
    2839          54 :          IF (m_max > get_lmax_init()) THEN
    2840          18 :             IF (para_env%mepos == 0) THEN
    2841           9 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    2842             :             END IF
    2843          18 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    2844          18 :             IF (para_env%mepos == 0) THEN
    2845           9 :                CALL close_file(unit_id)
    2846             :             END IF
    2847             :          END IF
    2848             :       END IF
    2849             : 
    2850         372 :       CALL init_md_ftable(nmax=m_max)
    2851             : 
    2852         372 :       CALL cp_libint_init_2eri1(lib, MAX(maxli, maxlj))
    2853         372 :       CALL cp_libint_set_contrdepth(lib, 1)
    2854             : 
    2855         372 :       CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
    2856       14603 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
    2857             : 
    2858             :          CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
    2859       14231 :                                 iatom=iatom, jatom=jatom, r=rij, cell=cell_j)
    2860       14231 :          IF (do_kpoints_prv) THEN
    2861       10010 :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
    2862             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
    2863        1430 :             img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
    2864        1430 :             IF (img > nimg .OR. img < 1) CYCLE
    2865             :          ELSE
    2866             :             img = 1
    2867             :          END IF
    2868             : 
    2869             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    2870             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    2871       14203 :                                 sphi=sphi_i, zet=zeti)
    2872             : 
    2873             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    2874             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    2875       14203 :                                 sphi=sphi_j, zet=zetj)
    2876             : 
    2877       14203 :          IF (do_symmetric) THEN
    2878       12801 :             IF (iatom <= jatom) THEN
    2879        7824 :                irow = iatom
    2880        7824 :                icol = jatom
    2881             :             ELSE
    2882        4977 :                irow = jatom
    2883        4977 :                icol = iatom
    2884             :             END IF
    2885             :          ELSE
    2886        1402 :             irow = iatom
    2887        1402 :             icol = jatom
    2888             :          END IF
    2889             : 
    2890       56812 :          dab = NORM2(rij)
    2891       14203 :          trans = do_symmetric .AND. (iatom > jatom)
    2892             : 
    2893       56812 :          DO i_xyz = 1, 3
    2894             :             CALL dbcsr_get_block_p(matrix=t2c_der(img, i_xyz), &
    2895       42609 :                                    row=irow, col=icol, BLOCK=block_t(i_xyz)%block, found=found)
    2896       56812 :             CPASSERT(found)
    2897             :          END DO
    2898             : 
    2899       48405 :          DO iset = 1, nseti
    2900             : 
    2901       33830 :             ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    2902       33830 :             sgfi = first_sgf_i(1, iset)
    2903             : 
    2904      165921 :             DO jset = 1, nsetj
    2905             : 
    2906      117860 :                ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    2907      117860 :                sgfj = first_sgf_j(1, jset)
    2908             : 
    2909      151690 :                IF (ncoi*ncoj > 0) THEN
    2910      471440 :                   ALLOCATE (dij_contr(nsgfi(iset), nsgfj(jset)))
    2911      589300 :                   ALLOCATE (dij(ncoi, ncoj, 3))
    2912   312725222 :                   dij(:, :, :) = 0.0_dp
    2913             : 
    2914      117860 :                   ri = 0.0_dp
    2915      117860 :                   rj = rij
    2916             : 
    2917             :                   CALL eri_2center_derivs(dij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
    2918             :                                           rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
    2919      117860 :                                           rpgf_j(:, jset), rj, dab, lib, potential_parameter)
    2920             : 
    2921      471440 :                   DO i_xyz = 1, 3
    2922             : 
    2923   123133524 :                      dij_contr(:, :) = 0.0_dp
    2924             :                      CALL ab_contract(dij_contr, dij(:, :, i_xyz), &
    2925             :                                       sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
    2926      353580 :                                       ncoi, ncoj, nsgfi(iset), nsgfj(jset))
    2927             : 
    2928      353580 :                      IF (trans) THEN
    2929             :                         !if transpose, then -1 factor for antisymmetry
    2930      448116 :                         ALLOCATE (dij_rs(nsgfj(jset), nsgfi(iset)))
    2931    44937846 :                         dij_rs(:, :) = -1.0_dp*TRANSPOSE(dij_contr)
    2932             :                      ELSE
    2933      966204 :                         ALLOCATE (dij_rs(nsgfi(iset), nsgfj(jset)))
    2934    78086691 :                         dij_rs(:, :) = dij_contr
    2935             :                      END IF
    2936             : 
    2937             :                      CALL block_add("IN", dij_rs, &
    2938             :                                     nsgfi(iset), nsgfj(jset), block_t(i_xyz)%block, &
    2939      353580 :                                     sgfi, sgfj, trans=trans)
    2940      471440 :                      DEALLOCATE (dij_rs)
    2941             :                   END DO
    2942             : 
    2943      117860 :                   DEALLOCATE (dij, dij_contr)
    2944             :                END IF
    2945             :             END DO
    2946             :          END DO
    2947             :       END DO
    2948             : 
    2949         372 :       CALL cp_libint_cleanup_2eri1(lib)
    2950         372 :       CALL neighbor_list_iterator_release(nl_iterator)
    2951             : 
    2952        2680 :       DO img = 1, nimg
    2953        9604 :          DO i_xyz = 1, 3
    2954        6924 :             CALL dbcsr_finalize(t2c_der(img, i_xyz))
    2955        9232 :             CALL dbcsr_filter(t2c_der(img, i_xyz), filter_eps)
    2956             :          END DO
    2957             :       END DO
    2958             : 
    2959         372 :       CALL timestop(handle)
    2960             : 
    2961        1116 :    END SUBROUTINE build_2c_derivatives
    2962             : 
    2963             : ! **************************************************************************************************
    2964             : !> \brief Calculates the virial coming from 2c derivatives on the fly
    2965             : !> \param work_virial ...
    2966             : !> \param t2c_trace the 2c tensor that we should trace with the derivatives
    2967             : !> \param pref ...
    2968             : !> \param qs_env ...
    2969             : !> \param nl_2c 2-center neighborlist. Assumed to have compatible distribution with t2c_trace,
    2970             : !>              and to be non-symmetric
    2971             : !> \param basis_i ...
    2972             : !> \param basis_j ...
    2973             : !> \param potential_parameter ...
    2974             : ! **************************************************************************************************
    2975          12 :    SUBROUTINE calc_2c_virial(work_virial, t2c_trace, pref, qs_env, nl_2c, basis_i, basis_j, potential_parameter)
    2976             :       REAL(dp), DIMENSION(3, 3), INTENT(INOUT)           :: work_virial
    2977             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: t2c_trace
    2978             :       REAL(KIND=dp), INTENT(IN)                          :: pref
    2979             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2980             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    2981             :          POINTER                                         :: nl_2c
    2982             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j
    2983             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
    2984             : 
    2985             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'calc_2c_virial'
    2986             : 
    2987             :       INTEGER :: handle, i_xyz, iatom, ibasis, ikind, imax, iset, j_xyz, jatom, jkind, jset, &
    2988             :          m_max, maxli, maxlj, natom, ncoi, ncoj, nseti, nsetj, op_prv, sgfi, sgfj, unit_id
    2989          12 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
    2990          12 :                                                             npgfj, nsgfi, nsgfj
    2991          12 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j
    2992             :       LOGICAL                                            :: do_symmetric, found
    2993             :       REAL(dp)                                           :: force
    2994          12 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2995             :       REAL(KIND=dp)                                      :: dab
    2996          12 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dij_contr
    2997          12 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: dij
    2998             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rj, scoord
    2999          12 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j
    3000          12 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
    3001          12 :                                                             zetj
    3002          12 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    3003             :       TYPE(cell_type), POINTER                           :: cell
    3004             :       TYPE(cp_libint_t)                                  :: lib
    3005             :       TYPE(dft_control_type), POINTER                    :: dft_control
    3006             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3007             :       TYPE(neighbor_list_iterator_p_type), &
    3008          12 :          DIMENSION(:), POINTER                           :: nl_iterator
    3009          12 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3010          12 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3011             : 
    3012          12 :       CALL timeset(routineN, handle)
    3013             : 
    3014          12 :       op_prv = potential_parameter%potential_type
    3015             : 
    3016          12 :       NULLIFY (qs_kind_set, atomic_kind_set, pblock, particle_set, cell)
    3017             : 
    3018             :       ! get stuff
    3019             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
    3020             :                       natom=natom, dft_control=dft_control, para_env=para_env, &
    3021          12 :                       particle_set=particle_set, cell=cell)
    3022             : 
    3023             :       ! check for symmetry
    3024          12 :       CPASSERT(SIZE(nl_2c) > 0)
    3025          12 :       CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
    3026          12 :       CPASSERT(.NOT. do_symmetric)
    3027             : 
    3028          12 :       maxli = 0
    3029          36 :       DO ibasis = 1, SIZE(basis_i)
    3030          24 :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
    3031          36 :          maxli = MAX(maxli, imax)
    3032             :       END DO
    3033          12 :       maxlj = 0
    3034          36 :       DO ibasis = 1, SIZE(basis_j)
    3035          24 :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
    3036          36 :          maxlj = MAX(maxlj, imax)
    3037             :       END DO
    3038             : 
    3039          12 :       m_max = maxli + maxlj + 1
    3040             : 
    3041             :       !Init the truncated Coulomb operator
    3042          12 :       IF (op_prv == do_potential_truncated) THEN
    3043             : 
    3044           2 :          IF (m_max > get_lmax_init()) THEN
    3045           0 :             IF (para_env%mepos == 0) THEN
    3046           0 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    3047             :             END IF
    3048           0 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    3049           0 :             IF (para_env%mepos == 0) THEN
    3050           0 :                CALL close_file(unit_id)
    3051             :             END IF
    3052             :          END IF
    3053             :       END IF
    3054             : 
    3055          12 :       CALL init_md_ftable(nmax=m_max)
    3056             : 
    3057          12 :       CALL cp_libint_init_2eri1(lib, MAX(maxli, maxlj))
    3058          12 :       CALL cp_libint_set_contrdepth(lib, 1)
    3059             : 
    3060          12 :       CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
    3061        1644 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
    3062             : 
    3063             :          CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
    3064        1632 :                                 iatom=iatom, jatom=jatom, r=rij)
    3065             : 
    3066             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    3067             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    3068        1632 :                                 sphi=sphi_i, zet=zeti)
    3069             : 
    3070             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    3071             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    3072        1632 :                                 sphi=sphi_j, zet=zetj)
    3073             : 
    3074        6528 :          dab = NORM2(rij)
    3075             : 
    3076        1632 :          CALL dbcsr_get_block_p(t2c_trace, iatom, jatom, pblock, found)
    3077        1632 :          IF (.NOT. found) CYCLE
    3078             : 
    3079        6113 :          DO iset = 1, nseti
    3080             : 
    3081        4469 :             ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    3082        4469 :             sgfi = first_sgf_i(1, iset)
    3083             : 
    3084       25808 :             DO jset = 1, nsetj
    3085             : 
    3086       19707 :                ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    3087       19707 :                sgfj = first_sgf_j(1, jset)
    3088             : 
    3089       24176 :                IF (ncoi*ncoj > 0) THEN
    3090       78828 :                   ALLOCATE (dij_contr(nsgfi(iset), nsgfj(jset)))
    3091       98535 :                   ALLOCATE (dij(ncoi, ncoj, 3))
    3092     5649186 :                   dij(:, :, :) = 0.0_dp
    3093             : 
    3094       19707 :                   ri = 0.0_dp
    3095       19707 :                   rj = rij
    3096             : 
    3097             :                   CALL eri_2center_derivs(dij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
    3098             :                                           rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
    3099       19707 :                                           rpgf_j(:, jset), rj, dab, lib, potential_parameter)
    3100             : 
    3101       78828 :                   DO i_xyz = 1, 3
    3102             : 
    3103     2522610 :                      dij_contr(:, :) = 0.0_dp
    3104             :                      CALL ab_contract(dij_contr, dij(:, :, i_xyz), &
    3105             :                                       sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
    3106       59121 :                                       ncoi, ncoj, nsgfi(iset), nsgfj(jset))
    3107             : 
    3108     2522610 :                      force = SUM(pblock(sgfi:sgfi + nsgfi(iset) - 1, sgfj:sgfj + nsgfj(jset) - 1)*dij_contr(:, :))
    3109       59121 :                      force = pref*force
    3110             : 
    3111             :                      !iatom virial
    3112       59121 :                      CALL real_to_scaled(scoord, particle_set(iatom)%r, cell)
    3113      236484 :                      DO j_xyz = 1, 3
    3114      236484 :                         work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
    3115             :                      END DO
    3116             : 
    3117             :                      !jatom virial
    3118      236484 :                      CALL real_to_scaled(scoord, particle_set(iatom)%r + rij, cell)
    3119      256191 :                      DO j_xyz = 1, 3
    3120      236484 :                         work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - force*scoord(j_xyz)
    3121             :                      END DO
    3122             :                   END DO
    3123             : 
    3124       19707 :                   DEALLOCATE (dij, dij_contr)
    3125             :                END IF
    3126             :             END DO
    3127             :          END DO
    3128             :       END DO
    3129             : 
    3130          12 :       CALL neighbor_list_iterator_release(nl_iterator)
    3131          12 :       CALL cp_libint_cleanup_2eri1(lib)
    3132             : 
    3133          12 :       CALL timestop(handle)
    3134             : 
    3135          24 :    END SUBROUTINE calc_2c_virial
    3136             : 
    3137             : ! **************************************************************************************************
    3138             : !> \brief ...
    3139             : !> \param t2c empty DBCSR matrix
    3140             : !> \param filter_eps Filter threshold for matrix blocks
    3141             : !> \param qs_env ...
    3142             : !> \param nl_2c 2-center neighborlist
    3143             : !> \param basis_i ...
    3144             : !> \param basis_j ...
    3145             : !> \param potential_parameter ...
    3146             : !> \param do_kpoints ...
    3147             : !> \param do_hfx_kpoints ...
    3148             : !> this routine requires that libint has been static initialised somewhere else
    3149             : !> \param ext_kpoints ...
    3150             : !> \param regularization_RI ...
    3151             : ! **************************************************************************************************
    3152         908 :    SUBROUTINE build_2c_integrals(t2c, filter_eps, qs_env, &
    3153         908 :                                  nl_2c, basis_i, basis_j, &
    3154             :                                  potential_parameter, do_kpoints, &
    3155             :                                  do_hfx_kpoints, ext_kpoints, regularization_RI)
    3156             : 
    3157             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: t2c
    3158             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    3159             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3160             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3161             :          POINTER                                         :: nl_2c
    3162             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_i, basis_j
    3163             :       TYPE(libint_potential_type), INTENT(IN)            :: potential_parameter
    3164             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints, do_hfx_kpoints
    3165             :       TYPE(kpoint_type), OPTIONAL, POINTER               :: ext_kpoints
    3166             :       REAL(KIND=dp), OPTIONAL                            :: regularization_RI
    3167             : 
    3168             :       CHARACTER(len=*), PARAMETER :: routineN = 'build_2c_integrals'
    3169             : 
    3170             :       INTEGER :: handle, i_diag, iatom, ibasis, icol, ikind, imax, img, irow, iset, jatom, jkind, &
    3171             :          jset, m_max, maxli, maxlj, natom, ncoi, ncoj, nimg, nseti, nsetj, op_prv, sgfi, sgfj, &
    3172             :          unit_id
    3173             :       INTEGER, DIMENSION(3)                              :: cell_j, kp_index_lbounds, &
    3174             :                                                             kp_index_ubounds
    3175         908 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
    3176         908 :                                                             npgfj, nsgfi, nsgfj
    3177         908 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf_i, first_sgf_j
    3178         908 :       INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
    3179             :       LOGICAL                                            :: do_hfx_kpoints_prv, do_kpoints_prv, &
    3180             :                                                             do_symmetric, found, trans
    3181             :       REAL(KIND=dp)                                      :: dab, min_zet
    3182         908 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: sij, sij_contr, sij_rs
    3183             :       REAL(KIND=dp), DIMENSION(3)                        :: ri, rij, rj
    3184         908 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_i, set_radius_j
    3185         908 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
    3186         908 :                                                             zetj
    3187         908 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    3188             :       TYPE(block_p_type)                                 :: block_t
    3189             :       TYPE(cell_type), POINTER                           :: cell
    3190             :       TYPE(cp_libint_t)                                  :: lib
    3191             :       TYPE(dft_control_type), POINTER                    :: dft_control
    3192             :       TYPE(kpoint_type), POINTER                         :: kpoints
    3193             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3194             :       TYPE(neighbor_list_iterator_p_type), &
    3195         908 :          DIMENSION(:), POINTER                           :: nl_iterator
    3196         908 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3197             : 
    3198         908 :       CALL timeset(routineN, handle)
    3199             : 
    3200         908 :       IF (PRESENT(do_kpoints)) THEN
    3201         858 :          do_kpoints_prv = do_kpoints
    3202             :       ELSE
    3203             :          do_kpoints_prv = .FALSE.
    3204             :       END IF
    3205             : 
    3206         908 :       IF (PRESENT(do_hfx_kpoints)) THEN
    3207         266 :          do_hfx_kpoints_prv = do_hfx_kpoints
    3208             :       ELSE
    3209             :          do_hfx_kpoints_prv = .FALSE.
    3210             :       END IF
    3211         266 :       IF (do_hfx_kpoints_prv) THEN
    3212         140 :          CPASSERT(do_kpoints_prv)
    3213             :       END IF
    3214             : 
    3215         908 :       op_prv = potential_parameter%potential_type
    3216             : 
    3217         908 :       NULLIFY (qs_kind_set, atomic_kind_set, block_t%block, cell_to_index)
    3218             : 
    3219             :       ! get stuff
    3220             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, cell=cell, &
    3221         908 :                       natom=natom, dft_control=dft_control, para_env=para_env, kpoints=kpoints)
    3222             : 
    3223         908 :       IF (PRESENT(ext_kpoints)) kpoints => ext_kpoints
    3224             : 
    3225         908 :       IF (do_kpoints_prv) THEN
    3226         504 :          nimg = SIZE(t2c)
    3227         504 :          CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
    3228        2016 :          kp_index_lbounds = LBOUND(cell_to_index)
    3229        2016 :          kp_index_ubounds = UBOUND(cell_to_index)
    3230             :       ELSE
    3231             :          nimg = 1
    3232             :       END IF
    3233             : 
    3234             :       ! check for symmetry
    3235         908 :       CPASSERT(SIZE(nl_2c) > 0)
    3236         908 :       CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
    3237             : 
    3238         908 :       IF (do_symmetric) THEN
    3239        2454 :          DO img = 1, nimg
    3240        2454 :             CPASSERT(dbcsr_has_symmetry(t2c(img)))
    3241             :          END DO
    3242             :       ELSE
    3243        4380 :          DO img = 1, nimg
    3244        4380 :             CPASSERT(.NOT. dbcsr_has_symmetry(t2c(img)))
    3245             :          END DO
    3246             :       END IF
    3247             : 
    3248        6834 :       DO img = 1, nimg
    3249        6834 :          CALL cp_dbcsr_alloc_block_from_nbl(t2c(img), nl_2c)
    3250             :       END DO
    3251             : 
    3252         908 :       maxli = 0
    3253        2578 :       DO ibasis = 1, SIZE(basis_i)
    3254        1670 :          CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
    3255        2578 :          maxli = MAX(maxli, imax)
    3256             :       END DO
    3257         908 :       maxlj = 0
    3258        2578 :       DO ibasis = 1, SIZE(basis_j)
    3259        1670 :          CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
    3260        2578 :          maxlj = MAX(maxlj, imax)
    3261             :       END DO
    3262             : 
    3263         908 :       m_max = maxli + maxlj
    3264             : 
    3265             :       !Init the truncated Coulomb operator
    3266         908 :       IF (op_prv == do_potential_truncated) THEN
    3267             : 
    3268         586 :          IF (m_max > get_lmax_init()) THEN
    3269         100 :             IF (para_env%mepos == 0) THEN
    3270          50 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
    3271             :             END IF
    3272         100 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
    3273         100 :             IF (para_env%mepos == 0) THEN
    3274          50 :                CALL close_file(unit_id)
    3275             :             END IF
    3276             :          END IF
    3277             :       END IF
    3278             : 
    3279         908 :       CALL init_md_ftable(nmax=m_max)
    3280             : 
    3281         908 :       CALL cp_libint_init_2eri(lib, MAX(maxli, maxlj))
    3282         908 :       CALL cp_libint_set_contrdepth(lib, 1)
    3283             : 
    3284         908 :       CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
    3285       26850 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
    3286             : 
    3287             :          CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
    3288       25942 :                                 iatom=iatom, jatom=jatom, r=rij, cell=cell_j)
    3289       25942 :          IF (do_kpoints_prv) THEN
    3290       34674 :             IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
    3291             :                 ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
    3292        4930 :             img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
    3293        4930 :             IF (img > nimg .OR. img < 1) CYCLE
    3294             :          ELSE
    3295             :             img = 1
    3296             :          END IF
    3297             : 
    3298             :          CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
    3299             :                                 npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
    3300       25860 :                                 sphi=sphi_i, zet=zeti)
    3301             : 
    3302             :          CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
    3303             :                                 npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
    3304       25860 :                                 sphi=sphi_j, zet=zetj)
    3305             : 
    3306       25860 :          IF (do_symmetric) THEN
    3307       22706 :             IF (iatom <= jatom) THEN
    3308       13668 :                irow = iatom
    3309       13668 :                icol = jatom
    3310             :             ELSE
    3311        9038 :                irow = jatom
    3312        9038 :                icol = iatom
    3313             :             END IF
    3314             :          ELSE
    3315        3154 :             irow = iatom
    3316        3154 :             icol = jatom
    3317             :          END IF
    3318             : 
    3319      103440 :          dab = NORM2(rij)
    3320             : 
    3321             :          CALL dbcsr_get_block_p(matrix=t2c(img), &
    3322       25860 :                                 row=irow, col=icol, BLOCK=block_t%block, found=found)
    3323       25860 :          CPASSERT(found)
    3324       25860 :          trans = do_symmetric .AND. (iatom > jatom)
    3325             : 
    3326      100744 :          DO iset = 1, nseti
    3327             : 
    3328       73976 :             ncoi = npgfi(iset)*ncoset(lmax_i(iset))
    3329       73976 :             sgfi = first_sgf_i(1, iset)
    3330             : 
    3331      484451 :             DO jset = 1, nsetj
    3332             : 
    3333      384565 :                ncoj = npgfj(jset)*ncoset(lmax_j(jset))
    3334      384565 :                sgfj = first_sgf_j(1, jset)
    3335             : 
    3336      458541 :                IF (ncoi*ncoj > 0) THEN
    3337     1538260 :                   ALLOCATE (sij_contr(nsgfi(iset), nsgfj(jset)))
    3338    76490739 :                   sij_contr(:, :) = 0.0_dp
    3339             : 
    3340     1538260 :                   ALLOCATE (sij(ncoi, ncoj))
    3341   199917867 :                   sij(:, :) = 0.0_dp
    3342             : 
    3343      384565 :                   ri = 0.0_dp
    3344      384565 :                   rj = rij
    3345             : 
    3346             :                   CALL eri_2center(sij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
    3347             :                                    rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
    3348      384565 :                                    rpgf_j(:, jset), rj, dab, lib, potential_parameter)
    3349             : 
    3350             :                   CALL ab_contract(sij_contr, sij, &
    3351             :                                    sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
    3352      384565 :                                    ncoi, ncoj, nsgfi(iset), nsgfj(jset))
    3353             : 
    3354      384565 :                   DEALLOCATE (sij)
    3355      384565 :                   IF (trans) THEN
    3356      479264 :                      ALLOCATE (sij_rs(nsgfj(jset), nsgfi(iset)))
    3357    29082749 :                      sij_rs(:, :) = TRANSPOSE(sij_contr)
    3358             :                   ELSE
    3359     1058996 :                      ALLOCATE (sij_rs(nsgfi(iset), nsgfj(jset)))
    3360    47335804 :                      sij_rs(:, :) = sij_contr
    3361             :                   END IF
    3362             : 
    3363      384565 :                   DEALLOCATE (sij_contr)
    3364             : 
    3365             :                   ! RI regularization
    3366             :                   IF (.NOT. do_hfx_kpoints_prv .AND. PRESENT(regularization_RI) .AND. &
    3367             :                       iatom == jatom .AND. iset == jset .AND. &
    3368      384565 :                       cell_j(1) == 0 .AND. cell_j(2) == 0 .AND. cell_j(3) == 0) THEN
    3369       12174 :                      DO i_diag = 1, nsgfi(iset)
    3370       26346 :                         min_zet = MINVAL(zeti(:, iset))
    3371        8782 :                         CPASSERT(min_zet > 1.0E-10_dp)
    3372             :                         sij_rs(i_diag, i_diag) = sij_rs(i_diag, i_diag) + &
    3373       12174 :                                                  regularization_RI*MAX(1.0_dp, 1.0_dp/min_zet)
    3374             :                      END DO
    3375             :                   END IF
    3376             : 
    3377             :                   CALL block_add("IN", sij_rs, &
    3378             :                                  nsgfi(iset), nsgfj(jset), block_t%block, &
    3379      384565 :                                  sgfi, sgfj, trans=trans)
    3380      384565 :                   DEALLOCATE (sij_rs)
    3381             : 
    3382             :                END IF
    3383             :             END DO
    3384             :          END DO
    3385             :       END DO
    3386             : 
    3387         908 :       CALL cp_libint_cleanup_2eri(lib)
    3388             : 
    3389         908 :       CALL neighbor_list_iterator_release(nl_iterator)
    3390        6834 :       DO img = 1, nimg
    3391        5926 :          CALL dbcsr_finalize(t2c(img))
    3392        6834 :          CALL dbcsr_filter(t2c(img), filter_eps)
    3393             :       END DO
    3394             : 
    3395         908 :       CALL timestop(handle)
    3396             : 
    3397        1816 :    END SUBROUTINE build_2c_integrals
    3398             : 
    3399             : ! **************************************************************************************************
    3400             : !> \brief ...
    3401             : !> \param tensor tensor with data. Data is cleared after compression.
    3402             : !> \param blk_indices ...
    3403             : !> \param compressed compressed tensor data
    3404             : !> \param eps all entries < eps are discarded
    3405             : !> \param memory ...
    3406             : ! **************************************************************************************************
    3407       20870 :    SUBROUTINE compress_tensor(tensor, blk_indices, compressed, eps, memory)
    3408             :       TYPE(dbt_type), INTENT(INOUT)                      :: tensor
    3409             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), &
    3410             :          INTENT(INOUT)                                   :: blk_indices
    3411             :       TYPE(hfx_compression_type), INTENT(INOUT)          :: compressed
    3412             :       REAL(dp), INTENT(IN)                               :: eps
    3413             :       REAL(dp), INTENT(INOUT)                            :: memory
    3414             : 
    3415             :       INTEGER                                            :: buffer_left, buffer_size, buffer_start, &
    3416             :                                                             i, iblk, memory_usage, nbits, nblk, &
    3417             :                                                             nints, offset, shared_offset
    3418             :       INTEGER(int_8)                                     :: estimate_to_store_int, &
    3419             :                                                             storage_counter_integrals
    3420             :       INTEGER, DIMENSION(3)                              :: ind
    3421             :       LOGICAL                                            :: found
    3422             :       REAL(dp)                                           :: spherical_estimate
    3423       20870 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET  :: blk_data
    3424       20870 :       REAL(dp), DIMENSION(:), POINTER                    :: blk_data_1d
    3425             :       TYPE(dbt_iterator_type)                            :: iter
    3426       20870 :       TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches
    3427             :       TYPE(hfx_cache_type), POINTER                      :: maxval_cache
    3428       20870 :       TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers
    3429             :       TYPE(hfx_container_type), POINTER                  :: maxval_container
    3430             : 
    3431       20870 :       CALL dealloc_containers(compressed, memory_usage)
    3432       20870 :       CALL alloc_containers(compressed, 1)
    3433             : 
    3434       20870 :       maxval_container => compressed%maxval_container(1)
    3435       20870 :       integral_containers => compressed%integral_containers(:, 1)
    3436             : 
    3437       20870 :       CALL hfx_init_container(maxval_container, memory_usage, .FALSE.)
    3438     1356550 :       DO i = 1, 64
    3439     1356550 :          CALL hfx_init_container(integral_containers(i), memory_usage, .FALSE.)
    3440             :       END DO
    3441             : 
    3442       20870 :       maxval_cache => compressed%maxval_cache(1)
    3443       20870 :       integral_caches => compressed%integral_caches(:, 1)
    3444             : 
    3445       20870 :       IF (ALLOCATED(blk_indices)) DEALLOCATE (blk_indices)
    3446       55485 :       ALLOCATE (blk_indices(dbt_get_num_blocks(tensor), 3))
    3447       20870 :       shared_offset = 0
    3448             : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor,blk_indices,shared_offset) &
    3449       20870 : !$OMP PRIVATE(iter,ind,offset,nblk,iblk)
    3450             :       CALL dbt_iterator_start(iter, tensor)
    3451             :       nblk = dbt_iterator_num_blocks(iter)
    3452             : !$OMP CRITICAL
    3453             :       offset = shared_offset
    3454             :       shared_offset = shared_offset + nblk
    3455             : !$OMP END CRITICAL
    3456             :       DO iblk = 1, nblk
    3457             :          CALL dbt_iterator_next_block(iter, ind)
    3458             :          blk_indices(offset + iblk, :) = ind(:)
    3459             :       END DO
    3460             :       CALL dbt_iterator_stop(iter)
    3461             : !$OMP END PARALLEL
    3462             : 
    3463             :       ! Can not use the tensor iterator here because the order of the blocks is not guaranteed.
    3464      315126 :       DO i = 1, SIZE(blk_indices, 1)
    3465     1177024 :          ind = blk_indices(i, :)
    3466      294256 :          CALL dbt_get_block(tensor, ind, blk_data, found)
    3467      294256 :          CPASSERT(found)
    3468     1177024 :          nints = SIZE(blk_data)
    3469      294256 :          blk_data_1d(1:nints) => blk_data
    3470   143160217 :          spherical_estimate = MAXVAL(ABS(blk_data_1d))
    3471      294256 :          IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
    3472      294256 :          estimate_to_store_int = EXPONENT(spherical_estimate)
    3473      294256 :          estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
    3474             : 
    3475             :          CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
    3476             :                                            maxval_cache, maxval_container, memory_usage, &
    3477      294256 :                                            .FALSE.)
    3478             : 
    3479      294256 :          spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    3480             : 
    3481      294256 :          nbits = EXPONENT(ANINT(spherical_estimate/eps)) + 1
    3482      294256 :          IF (nbits > 64) THEN
    3483             :             CALL cp_abort(__LOCATION__, &
    3484           0 :                           "Overflow during tensor compression. Please use a larger EPS_FILTER or EPS_STORAGE_SCALING")
    3485             :          END IF
    3486             : 
    3487             :          buffer_left = nints
    3488             :          buffer_start = 1
    3489             : 
    3490      645931 :          DO WHILE (buffer_left > 0)
    3491      351675 :             buffer_size = MIN(buffer_left, cache_size)
    3492             :             CALL hfx_add_mult_cache_elements(blk_data_1d(buffer_start:), &
    3493             :                                              buffer_size, nbits, &
    3494             :                                              integral_caches(nbits), &
    3495             :                                              integral_containers(nbits), &
    3496             :                                              eps, 1.0_dp, &
    3497             :                                              memory_usage, &
    3498      351675 :                                              .FALSE.)
    3499      351675 :             buffer_left = buffer_left - buffer_size
    3500      351675 :             buffer_start = buffer_start + buffer_size
    3501             :          END DO
    3502             : 
    3503      609382 :          NULLIFY (blk_data_1d); DEALLOCATE (blk_data)
    3504             :       END DO
    3505             : 
    3506       20870 :       CALL dbt_clear(tensor)
    3507             : 
    3508       20870 :       storage_counter_integrals = memory_usage*cache_size
    3509       20870 :       memory = memory + REAL(storage_counter_integrals, dp)/1024/128
    3510             :       !WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    3511             :       !   "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]:            ", memory
    3512             : 
    3513             :       CALL hfx_flush_last_cache(6, maxval_cache, maxval_container, memory_usage, &
    3514       20870 :                                 .FALSE.)
    3515     1356550 :       DO i = 1, 64
    3516             :          CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
    3517     1356550 :                                    memory_usage, .FALSE.)
    3518             :       END DO
    3519             : 
    3520       20870 :       CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_usage, .FALSE.)
    3521     1356550 :       DO i = 1, 64
    3522             :          CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
    3523     1356550 :                                             memory_usage, .FALSE.)
    3524             :       END DO
    3525             : 
    3526       41740 :    END SUBROUTINE
    3527             : 
    3528             : ! **************************************************************************************************
    3529             : !> \brief ...
    3530             : !> \param tensor empty tensor which is filled by decompressed data
    3531             : !> \param blk_indices indices of blocks to be reserved
    3532             : !> \param compressed compressed data
    3533             : !> \param eps all entries < eps are discarded
    3534             : ! **************************************************************************************************
    3535       61068 :    SUBROUTINE decompress_tensor(tensor, blk_indices, compressed, eps)
    3536             : 
    3537             :       TYPE(dbt_type), INTENT(INOUT)                      :: tensor
    3538             :       INTEGER, DIMENSION(:, :)                           :: blk_indices
    3539             :       TYPE(hfx_compression_type), INTENT(INOUT)          :: compressed
    3540             :       REAL(dp), INTENT(IN)                               :: eps
    3541             : 
    3542             :       INTEGER                                            :: A, b, buffer_left, buffer_size, &
    3543             :                                                             buffer_start, i, memory_usage, nbits, &
    3544             :                                                             nblk_per_thread, nints
    3545             :       INTEGER(int_8)                                     :: estimate_to_store_int
    3546             :       INTEGER, DIMENSION(3)                              :: blk_size, ind
    3547             :       REAL(dp)                                           :: spherical_estimate
    3548       61068 :       REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET        :: blk_data
    3549       61068 :       REAL(dp), DIMENSION(:, :, :), POINTER              :: blk_data_3d
    3550       61068 :       TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches
    3551             :       TYPE(hfx_cache_type), POINTER                      :: maxval_cache
    3552       61068 :       TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers
    3553             :       TYPE(hfx_container_type), POINTER                  :: maxval_container
    3554             : 
    3555       61068 :       maxval_cache => compressed%maxval_cache(1)
    3556       61068 :       maxval_container => compressed%maxval_container(1)
    3557       61068 :       integral_caches => compressed%integral_caches(:, 1)
    3558       61068 :       integral_containers => compressed%integral_containers(:, 1)
    3559             : 
    3560       61068 :       memory_usage = 0
    3561             : 
    3562       61068 :       CALL hfx_decompress_first_cache(6, maxval_cache, maxval_container, memory_usage, .FALSE.)
    3563             : 
    3564     3969420 :       DO i = 1, 64
    3565             :          CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
    3566     3969420 :                                          memory_usage, .FALSE.)
    3567             :       END DO
    3568             : 
    3569             : !TODO: Parallelize creation of block list.
    3570       61068 : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor,blk_indices) PRIVATE(nblk_per_thread,A,b)
    3571             :       nblk_per_thread = SIZE(blk_indices, 1)/omp_get_num_threads() + 1
    3572             :       a = omp_get_thread_num()*nblk_per_thread + 1
    3573             :       b = MIN(a + nblk_per_thread, SIZE(blk_indices, 1))
    3574             :       CALL dbt_reserve_blocks(tensor, blk_indices(a:b, :))
    3575             : !$OMP END PARALLEL
    3576             : 
    3577             :       ! Can not use the tensor iterator here because the order of the blocks is not guaranteed.
    3578     1227609 :       DO i = 1, SIZE(blk_indices, 1)
    3579     4666164 :          ind = blk_indices(i, :)
    3580     1166541 :          CALL dbt_blk_sizes(tensor, ind, blk_size)
    3581     4666164 :          nints = PRODUCT(blk_size)
    3582             :          CALL hfx_get_single_cache_element( &
    3583             :             estimate_to_store_int, 6, &
    3584             :             maxval_cache, maxval_container, memory_usage, &
    3585     1166541 :             .FALSE.)
    3586             : 
    3587     1166541 :          spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    3588             : 
    3589     1166541 :          nbits = EXPONENT(ANINT(spherical_estimate/eps)) + 1
    3590             : 
    3591     1166541 :          buffer_left = nints
    3592     1166541 :          buffer_start = 1
    3593             : 
    3594     3499623 :          ALLOCATE (blk_data(nints))
    3595     2448223 :          DO WHILE (buffer_left > 0)
    3596     1281682 :             buffer_size = MIN(buffer_left, cache_size)
    3597             :             CALL hfx_get_mult_cache_elements(blk_data(buffer_start), &
    3598             :                                              buffer_size, nbits, &
    3599             :                                              integral_caches(nbits), &
    3600             :                                              integral_containers(nbits), &
    3601             :                                              eps, 1.0_dp, &
    3602             :                                              memory_usage, &
    3603     1281682 :                                              .FALSE.)
    3604     1281682 :             buffer_left = buffer_left - buffer_size
    3605     1281682 :             buffer_start = buffer_start + buffer_size
    3606             :          END DO
    3607             : 
    3608     1166541 :          blk_data_3d(1:blk_size(1), 1:blk_size(2), 1:blk_size(3)) => blk_data
    3609     1166541 :          CALL dbt_put_block(tensor, ind, blk_size, blk_data_3d)
    3610     1227609 :          NULLIFY (blk_data_3d); DEALLOCATE (blk_data)
    3611             :       END DO
    3612             : 
    3613       61068 :       CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_usage, .FALSE.)
    3614     3969420 :       DO i = 1, 64
    3615             :          CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
    3616     3969420 :                                             memory_usage, .FALSE.)
    3617             :       END DO
    3618      122136 :    END SUBROUTINE
    3619             : 
    3620             : ! **************************************************************************************************
    3621             : !> \brief ...
    3622             : !> \param tensor ...
    3623             : !> \param nze ...
    3624             : !> \param occ ...
    3625             : ! **************************************************************************************************
    3626      113501 :    SUBROUTINE get_tensor_occupancy(tensor, nze, occ)
    3627             :       TYPE(dbt_type), INTENT(IN)                         :: tensor
    3628             :       INTEGER(int_8), INTENT(OUT)                        :: nze
    3629             :       REAL(dp), INTENT(OUT)                              :: occ
    3630             : 
    3631      227002 :       INTEGER, DIMENSION(dbt_ndims(tensor))              :: dims
    3632             : 
    3633      113501 :       nze = dbt_get_nze_total(tensor)
    3634      113501 :       CALL dbt_get_info(tensor, nfull_total=dims)
    3635      437238 :       occ = REAL(nze, dp)/PRODUCT(REAL(dims, dp))
    3636             : 
    3637      113501 :    END SUBROUTINE
    3638             : 
    3639           0 : END MODULE

Generated by: LCOV version 1.15