LCOV - code coverage report
Current view: top level - src - hfx_derivatives.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 277 285 97.2 %
Date: 2024-11-21 06:45:46 Functions: 6 6 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines to calculate derivatives with respect to basis function origin
      10             : !> \par History
      11             : !>      04.2008 created [Manuel Guidon]
      12             : !> \author Manuel Guidon
      13             : ! **************************************************************************************************
      14             : MODULE hfx_derivatives
      15             :    USE admm_types, ONLY: admm_type
      16             :    USE atomic_kind_types, ONLY: atomic_kind_type, &
      17             :                                 get_atomic_kind_set
      18             :    USE cell_types, ONLY: cell_type, &
      19             :                          pbc
      20             :    USE cp_control_types, ONLY: dft_control_type
      21             :    USE cp_files, ONLY: close_file, &
      22             :                        open_file
      23             :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      24             :                               cp_logger_type
      25             :    USE cp_output_handling, ONLY: cp_p_file, &
      26             :                                  cp_print_key_finished_output, &
      27             :                                  cp_print_key_should_output, &
      28             :                                  cp_print_key_unit_nr
      29             :    USE message_passing, ONLY: mp_para_env_type
      30             :    USE cp_dbcsr_api, ONLY: dbcsr_get_matrix_type, &
      31             :                            dbcsr_p_type, &
      32             :                            dbcsr_type_antisymmetric
      33             :    USE gamma, ONLY: init_md_ftable
      34             :    USE hfx_communication, ONLY: get_full_density
      35             :    USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
      36             :                                       hfx_add_single_cache_element, &
      37             :                                       hfx_decompress_first_cache, &
      38             :                                       hfx_flush_last_cache, &
      39             :                                       hfx_get_mult_cache_elements, &
      40             :                                       hfx_get_single_cache_element, &
      41             :                                       hfx_reset_cache_and_container
      42             :    USE hfx_libint_interface, ONLY: evaluate_deriv_eri
      43             :    USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
      44             :                                        hfx_load_balance, &
      45             :                                        hfx_update_load_balance
      46             :    USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
      47             :                                     build_pair_list, &
      48             :                                     build_pair_list_pgf, &
      49             :                                     build_pgf_product_list, &
      50             :                                     pgf_product_list_size
      51             :    USE hfx_screening_methods, ONLY: update_pmax_mat
      52             :    USE hfx_types, ONLY: &
      53             :       alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
      54             :       hfx_cell_type, hfx_container_type, hfx_distribution, hfx_general_type, hfx_init_container, &
      55             :       hfx_load_balance_type, hfx_memory_type, hfx_p_kind, hfx_pgf_list, hfx_pgf_product_list, &
      56             :       hfx_potential_type, hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, &
      57             :       hfx_type, init_t_c_g0_lmax, log_zero, pair_list_type, pair_set_list_type
      58             :    USE input_constants, ONLY: do_potential_mix_cl_trunc, &
      59             :                               do_potential_truncated, &
      60             :                               hfx_do_eval_forces
      61             :    USE input_section_types, ONLY: section_vals_type
      62             :    USE kinds, ONLY: dp, &
      63             :                     int_8
      64             :    USE libint_wrapper, ONLY: cp_libint_t
      65             :    USE machine, ONLY: m_walltime
      66             :    USE mathconstants, ONLY: fac
      67             :    USE orbital_pointers, ONLY: nco, &
      68             :                                ncoset, &
      69             :                                nso
      70             :    USE particle_types, ONLY: particle_type
      71             :    USE qs_environment_types, ONLY: get_qs_env, &
      72             :                                    qs_environment_type
      73             :    USE qs_force_types, ONLY: qs_force_type
      74             :    USE t_c_g0, ONLY: init
      75             :    USE util, ONLY: sort
      76             :    USE virial_types, ONLY: virial_type
      77             : 
      78             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      79             : 
      80             : #include "./base/base_uses.f90"
      81             : 
      82             :    IMPLICIT NONE
      83             :    PRIVATE
      84             : 
      85             :    PUBLIC :: derivatives_four_center
      86             : 
      87             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
      88             : 
      89             : !***
      90             : 
      91             : CONTAINS
      92             : 
      93             : ! **************************************************************************************************
      94             : !> \brief computes four center derivatives for a full basis set and updates the
      95             : !>      forces%fock_4c arrays. Uses all 8 eri symmetries
      96             : !> \param qs_env ...
      97             : !> \param rho_ao density matrix
      98             : !> \param rho_ao_resp relaxed density matrix from response
      99             : !> \param hfx_section HFX input section
     100             : !> \param para_env para_env
     101             : !> \param irep ID of HFX replica
     102             : !> \param use_virial ...
     103             : !> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
     104             : !> \param resp_only Calculate only forces from response density
     105             : !> \par History
     106             : !>      06.2007 created [Manuel Guidon]
     107             : !>      08.2007 optimized load balance [Manuel Guidon]
     108             : !>      02.2009 completely rewritten screening part [Manuel Guidon]
     109             : !>      12.2017 major bug fix. removed wrong cycle that was causing segfault.
     110             : !>              see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
     111             : !>              [Tobias Binninger + Valery Weber]
     112             : !> \author Manuel Guidon
     113             : ! **************************************************************************************************
     114        1788 :    SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
     115             :                                       irep, use_virial, adiabatic_rescale_factor, resp_only, &
     116             :                                       external_x_data)
     117             : 
     118             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     119             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
     120             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: rho_ao_resp
     121             :       TYPE(section_vals_type), POINTER                   :: hfx_section
     122             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     123             :       INTEGER, INTENT(IN)                                :: irep
     124             :       LOGICAL, INTENT(IN)                                :: use_virial
     125             :       REAL(dp), INTENT(IN), OPTIONAL                     :: adiabatic_rescale_factor
     126             :       LOGICAL, INTENT(IN), OPTIONAL                      :: resp_only
     127             :       TYPE(hfx_type), DIMENSION(:, :), POINTER, OPTIONAL  :: external_x_data
     128             : 
     129             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'derivatives_four_center'
     130             : 
     131             :       INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
     132             :                  bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
     133             :                  forces_map(4, 2), handle, handle_bin, handle_getP, handle_load, handle_main, i, i_atom, &
     134             :                  i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
     135             :                  i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
     136             :                  iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
     137             :                  jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
     138             :       INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
     139             :                  latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
     140             :                  n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
     141             :                  nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
     142             :                  sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
     143             :                  sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
     144             :       INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
     145             :                         mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
     146             :                         n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
     147             :                         nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
     148             :                         shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
     149             :                         storage_counter_integrals, tmp_block, tmp_i8(6)
     150        1788 :       INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: tmp_task_list_cost
     151        1788 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, kind_of, last_sgf_global, &
     152        1788 :                                                             nimages, tmp_index
     153        1788 :       INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
     154        3576 :                                         ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
     155        3576 :       INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
     156        1788 :                                            offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
     157             :       INTEGER, DIMENSION(:, :), POINTER, SAVE            :: shm_is_assoc_atomic_block
     158        1788 :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: shm_set_offset
     159             :       INTEGER, SAVE                                      :: shm_number_of_p_entries
     160             :       LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
     161             :                  do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
     162             :                  treat_forces_in_core, use_disk_storage, with_resp_density
     163        1788 :       LOGICAL, DIMENSION(:, :), POINTER                  :: shm_atomic_pair_list
     164             :       REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
     165             :                   compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
     166             :                   log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
     167             :                   my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
     168             :                   rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
     169             :                   tmp_virial(3, 3)
     170        1788 :       REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
     171        1788 :                                              ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
     172        3576 :                                              ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
     173        1788 :                                              pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
     174        1788 :                                              pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
     175        1788 :       REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET        :: primitive_forces, primitive_forces_virial
     176        1788 :       REAL(dp), DIMENSION(:), POINTER                    :: full_density_resp, &
     177        3576 :                                                             full_density_resp_beta, T2
     178        1788 :       REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
     179        1788 :                                             max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
     180        1788 :                                             sphi_b, zeta, zetb, zetc, zetd
     181        1788 :       REAL(dp), DIMENSION(:, :, :), POINTER              :: sphi_a_ext_set, sphi_b_ext_set, &
     182        1788 :                                                             sphi_c_ext_set, sphi_d_ext_set
     183        1788 :       REAL(dp), DIMENSION(:, :, :, :), POINTER           :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
     184        1788 :                                                             sphi_d_ext
     185             :       REAL(KIND=dp)                                      :: coeffs_kind_max0
     186             :       TYPE(admm_type), POINTER                           :: admm_env
     187        1788 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     188             :       TYPE(cell_type), POINTER                           :: cell
     189             :       TYPE(cp_libint_t)                                  :: private_deriv
     190             :       TYPE(cp_logger_type), POINTER                      :: logger
     191             :       TYPE(dft_control_type), POINTER                    :: dft_control
     192             :       TYPE(hfx_basis_info_type), POINTER                 :: basis_info
     193        1788 :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     194        1788 :       TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches
     195             :       TYPE(hfx_cache_type), POINTER                      :: maxval_cache
     196        1788 :       TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers
     197             :       TYPE(hfx_container_type), POINTER                  :: maxval_container
     198             :       TYPE(hfx_distribution), POINTER                    :: distribution_forces
     199             :       TYPE(hfx_general_type)                             :: general_parameter
     200             :       TYPE(hfx_load_balance_type), POINTER               :: load_balance_parameter
     201             :       TYPE(hfx_memory_type), POINTER                     :: memory_parameter
     202        1788 :       TYPE(hfx_p_kind), DIMENSION(:), POINTER            :: shm_initial_p
     203        1788 :       TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:)      :: pgf_list_ij, pgf_list_kl
     204             :       TYPE(hfx_pgf_product_list), ALLOCATABLE, &
     205        1788 :          DIMENSION(:)                                    :: pgf_product_list
     206             :       TYPE(hfx_potential_type)                           :: potential_parameter
     207             :       TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
     208        1788 :          POINTER                                         :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
     209        1788 :                                                             tmp_screen_pgf1, tmp_screen_pgf2
     210             :       TYPE(hfx_screen_coeff_type), &
     211        1788 :          DIMENSION(:, :, :, :), POINTER                  :: screen_coeffs_set
     212             :       TYPE(hfx_screen_coeff_type), &
     213        1788 :          DIMENSION(:, :, :, :, :, :), POINTER            :: radii_pgf, screen_coeffs_pgf
     214             :       TYPE(hfx_screening_type)                           :: screening_parameter
     215        1788 :       TYPE(hfx_task_list_type), DIMENSION(:), POINTER    :: shm_task_list, tmp_task_list
     216             :       TYPE(hfx_type), POINTER                            :: actual_x_data
     217        1788 :       TYPE(hfx_type), DIMENSION(:, :), POINTER            :: my_x_data
     218             :       TYPE(pair_list_type)                               :: list_ij, list_kl
     219             :       TYPE(pair_set_list_type), ALLOCATABLE, &
     220        1788 :          DIMENSION(:)                                    :: set_list_ij, set_list_kl
     221        1788 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     222        1788 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
     223             :       TYPE(virial_type), POINTER                         :: virial
     224             : 
     225        1788 :       NULLIFY (dft_control, admm_env)
     226             : 
     227        1788 :       with_resp_density = .FALSE.
     228         622 :       IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .TRUE.
     229             : 
     230        1788 :       my_resp_only = .FALSE.
     231        1788 :       IF (PRESENT(resp_only)) my_resp_only = resp_only
     232             : 
     233        1788 :       is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
     234             : 
     235             :       CALL get_qs_env(qs_env, &
     236             :                       admm_env=admm_env, &
     237             :                       particle_set=particle_set, &
     238             :                       atomic_kind_set=atomic_kind_set, &
     239             :                       cell=cell, &
     240        1788 :                       dft_control=dft_control)
     241             : 
     242        1788 :       nspins = dft_control%nspins
     243        1788 :       nkimages = dft_control%nimages
     244        1788 :       CPASSERT(nkimages == 1)
     245             : 
     246        1788 :       my_x_data => qs_env%x_data
     247        1788 :       IF (PRESENT(external_x_data)) my_x_data => external_x_data
     248             : 
     249             :       !! One atom systems have no contribution to forces
     250        1788 :       IF (SIZE(particle_set, 1) == 1) THEN
     251           4 :          IF (.NOT. use_virial) RETURN
     252             :       END IF
     253             : 
     254        1784 :       CALL timeset(routineN, handle)
     255             :       !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
     256        1784 :       nkind = SIZE(atomic_kind_set, 1)
     257        1784 :       l_max = 0
     258        5106 :       DO ikind = 1, nkind
     259       14080 :          l_max = MAX(l_max, MAXVAL(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
     260             :       END DO
     261        1784 :       l_max = 4*l_max + 1
     262        1784 :       CALL init_md_ftable(l_max)
     263             : 
     264        1784 :       IF (my_x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
     265             :           my_x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
     266         720 :          IF (l_max > init_t_c_g0_lmax) THEN
     267         194 :             CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
     268         194 :             CALL init(l_max, unit_id, para_env%mepos, para_env)
     269         194 :             CALL close_file(unit_id)
     270         194 :             init_t_c_g0_lmax = l_max
     271             :          END IF
     272             :       END IF
     273             : 
     274        1784 :       n_threads = 1
     275        1784 : !$    n_threads = omp_get_max_threads()
     276             : 
     277        1784 :       shm_neris_total = 0
     278        1784 :       shm_nprim_ints = 0
     279        1784 :       shm_neris_onthefly = 0
     280        1784 :       shm_storage_counter_integrals = 0
     281        1784 :       shm_neris_incore = 0
     282        1784 :       shm_stor_count_max_val = 0
     283             : 
     284             :       !! get force array
     285        1784 :       CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
     286             : 
     287        1784 :       my_adiabatic_rescale_factor = 1.0_dp
     288        1784 :       IF (PRESENT(adiabatic_rescale_factor)) THEN
     289         208 :          my_adiabatic_rescale_factor = adiabatic_rescale_factor
     290             :       END IF
     291             : 
     292             :       !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
     293             : !$OMP SHARED(qs_env,&
     294             : !$OMP                                  rho_ao,&
     295             : !$OMP                                  rho_ao_resp,&
     296             : !$OMP                                  with_resp_density,&
     297             : !$OMP                                  hfx_section,&
     298             : !$OMP                                  para_env,&
     299             : !$OMP                                  irep,&
     300             : !$OMP                                  my_resp_only,&
     301             : !$OMP                                  ncoset,&
     302             : !$OMP                                  nco,&
     303             : !$OMP                                  nso,&
     304             : !$OMP                                  n_threads,&
     305             : !$OMP                                  full_density_alpha,&
     306             : !$OMP                                  full_density_resp,&
     307             : !$OMP                                  full_density_resp_beta,&
     308             : !$OMP                                  full_density_beta,&
     309             : !$OMP                                  shm_initial_p,&
     310             : !$OMP                                  shm_is_assoc_atomic_block,&
     311             : !$OMP                                  shm_number_of_p_entries,&
     312             : !$OMP                                  force,&
     313             : !$OMP                                  virial,&
     314             : !$OMP                                  my_adiabatic_rescale_factor,&
     315             : !$OMP                                  shm_neris_total,&
     316             : !$OMP                                  shm_neris_onthefly,&
     317             : !$OMP                                  shm_storage_counter_integrals,&
     318             : !$OMP                                  shm_neris_incore,&
     319             : !$OMP                                  shm_nprim_ints,&
     320             : !$OMP                                  shm_stor_count_max_val,&
     321             : !$OMP                                  cell,&
     322             : !$OMP                                  screen_coeffs_set,&
     323             : !$OMP                                  screen_coeffs_kind,&
     324             : !$OMP                                  screen_coeffs_pgf,&
     325             : !$OMP                                  pgf_product_list_size,&
     326             : !$OMP                                  nkind,&
     327             : !$OMP                                  is_anti_symmetric,&
     328             : !$OMP                                  radii_pgf,&
     329             : !$OMP                                  shm_atomic_block_offset,&
     330             : !$OMP                                  shm_set_offset,&
     331             : !$OMP                                  shm_block_offset,&
     332             : !$OMP                                  shm_task_counter,&
     333             : !$OMP                                  shm_task_list,&
     334             : !$OMP                                  shm_total_bins,&
     335             : !$OMP                                  shm_pmax_block,&
     336             : !$OMP                                  shm_atomic_pair_list,&
     337             : !$OMP                                  shm_pmax_atom,&
     338             : !$OMP                                  use_virial,&
     339             : !$OMP                                  do_print_load_balance_info,&
     340             : !$OMP                                  nkimages,nspins,my_x_data)&
     341             : !$OMP  PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
     342             : !$OMP  atomic_offset_bd,atom_of_kind,basis_info,&
     343             : !$OMP  basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
     344             : !$OMP  buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
     345             : !$OMP  coeffs_kind_max0,compression_factor,counter,current_counter,&
     346             : !$OMP  distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
     347             : !$OMP  ede_primitives_tmp,ede_primitives_tmp_virial,&
     348             : !$OMP  ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
     349             : !$OMP  fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
     350             : !$OMP  handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
     351             : !$OMP  i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
     352             : !$OMP  jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
     353             : !$OMP  kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
     354             : !$OMP  latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
     355             : !$OMP  ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
     356             : !$OMP  log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
     357             : !$OMP  max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
     358             : !$OMP  mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
     359             : !$OMP  nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
     360             : !$OMP  neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
     361             : !$OMP  nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
     362             : !$OMP  nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
     363             : !$OMP  offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
     364             : !$OMP  pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
     365             : !$OMP  pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
     366             : !$OMP  pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
     367             : !$OMP  primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
     368             : !$OMP  rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
     369             : !$OMP  sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
     370             : !$OMP  sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
     371             : !$OMP  sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
     372             : !$OMP  storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
     373             : !$OMP  tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
     374        1784 : !$OMP  treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
     375             : 
     376             :       i_thread = 0
     377             : !$    i_thread = omp_get_thread_num()
     378             : 
     379             :       ln_10 = LOG(10.0_dp)
     380             :       log_2 = LOG10(2.0_dp)
     381             :       actual_x_data => my_x_data(irep, i_thread + 1)
     382             :       do_periodic = actual_x_data%periodic_parameter%do_periodic
     383             : 
     384             :       screening_parameter = actual_x_data%screening_parameter
     385             :       general_parameter = actual_x_data%general_parameter
     386             :       potential_parameter = actual_x_data%potential_parameter
     387             :       basis_info => actual_x_data%basis_info
     388             : 
     389             :       load_balance_parameter => actual_x_data%load_balance_parameter
     390             :       basis_parameter => actual_x_data%basis_parameter
     391             : 
     392             :       ! IF(with_resp_density) THEN
     393             :       !   ! here we also do a copy of the original load_balance_parameter
     394             :       !   ! since if MP2 forces are required after the calculation of the HFX
     395             :       !   ! derivatives we need to calculate again the SCF energy.
     396             :       !   ALLOCATE(load_balance_parameter_energy)
     397             :       !   load_balance_parameter_energy = actual_x_data%load_balance_parameter
     398             :       ! END IF
     399             : 
     400             :       memory_parameter => actual_x_data%memory_parameter
     401             : 
     402             :       cache_size = memory_parameter%cache_size
     403             :       bits_max_val = memory_parameter%bits_max_val
     404             : 
     405             :       use_disk_storage = .FALSE.
     406             : 
     407             :       CALL get_qs_env(qs_env=qs_env, &
     408             :                       atomic_kind_set=atomic_kind_set, &
     409             :                       particle_set=particle_set)
     410             : 
     411             :       max_set = basis_info%max_set
     412             :       max_am = basis_info%max_am
     413             :       natom = SIZE(particle_set, 1)
     414             : 
     415             :       treat_forces_in_core = memory_parameter%treat_forces_in_core
     416             : 
     417             :       hf_fraction = general_parameter%fraction
     418             :       hf_fraction = hf_fraction*my_adiabatic_rescale_factor
     419             :       eps_schwarz = screening_parameter%eps_schwarz_forces
     420             :       IF (eps_schwarz < 0.0_dp) THEN
     421             :          log10_eps_schwarz = log_zero
     422             :       ELSE
     423             :          !! ** make eps_schwarz tighter in case of a stress calculation
     424             :          IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
     425             :          log10_eps_schwarz = LOG10(eps_schwarz)
     426             :       END IF
     427             :       screen_pmat_forces = screening_parameter%do_p_screening_forces
     428             :       ! don't do density screening in case of response forces
     429             :       IF (with_resp_density) screen_pmat_forces = .FALSE.
     430             : 
     431             :       eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
     432             : 
     433             :       counter = 0_int_8
     434             : 
     435             :       !! Copy the libint_guy
     436             :       private_deriv = actual_x_data%lib_deriv
     437             : 
     438             :       !! Get screening parameter
     439             : 
     440             :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
     441             :                                atom_of_kind=atom_of_kind, &
     442             :                                kind_of=kind_of)
     443             : 
     444             :       !! Create helper arrray for mapping local basis functions to global ones
     445             :       ALLOCATE (last_sgf_global(0:natom))
     446             :       last_sgf_global(0) = 0
     447             :       DO iatom = 1, natom
     448             :          ikind = kind_of(iatom)
     449             :          last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
     450             :       END DO
     451             : 
     452             :       ALLOCATE (max_contraction(max_set, natom))
     453             : 
     454             :       max_contraction = 0.0_dp
     455             :       max_pgf = 0
     456             :       DO jatom = 1, natom
     457             :          jkind = kind_of(jatom)
     458             :          lb_max => basis_parameter(jkind)%lmax
     459             :          npgfb => basis_parameter(jkind)%npgf
     460             :          nsetb = basis_parameter(jkind)%nset
     461             :          first_sgfb => basis_parameter(jkind)%first_sgf
     462             :          sphi_b => basis_parameter(jkind)%sphi
     463             :          nsgfb => basis_parameter(jkind)%nsgf
     464             :          DO jset = 1, nsetb
     465             :             ! takes the primitive to contracted transformation into account
     466             :             ncob = npgfb(jset)*ncoset(lb_max(jset))
     467             :             sgfb = first_sgfb(1, jset)
     468             :             ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
     469             :             ! the maximum value after multiplication with sphi_b
     470             :             max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
     471             :             max_pgf = MAX(max_pgf, npgfb(jset))
     472             :          END DO
     473             :       END DO
     474             : 
     475             :       ncpu = para_env%num_pe
     476             :       n_processes = ncpu*n_threads
     477             : 
     478             :       !! initialize some counters
     479             :       neris_total = 0_int_8
     480             :       neris_incore = 0_int_8
     481             :       neris_onthefly = 0_int_8
     482             :       mem_eris = 0_int_8
     483             :       mem_max_val = 0_int_8
     484             :       compression_factor = 0.0_dp
     485             :       nprim_ints = 0_int_8
     486             :       neris_tmp = 0_int_8
     487             :       max_val_memory = 1_int_8
     488             : 
     489             : !$OMP MASTER
     490             :       !! Set pointer for is_assoc helper
     491             :       shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
     492             :       shm_number_of_p_entries = actual_x_data%number_of_p_entries
     493             :       shm_atomic_block_offset => actual_x_data%atomic_block_offset
     494             :       shm_set_offset => actual_x_data%set_offset
     495             :       shm_block_offset => actual_x_data%block_offset
     496             : 
     497             :       NULLIFY (full_density_alpha)
     498             :       NULLIFY (full_density_beta)
     499             :       ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
     500             : 
     501             :       !! Get the full density from all the processors
     502             :       CALL timeset(routineN//"_getP", handle_getP)
     503             :       CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
     504             :                             shm_block_offset, &
     505             :                             kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
     506             :                             antisymmetric=is_anti_symmetric)
     507             :       ! for now only closed shell case
     508             :       IF (with_resp_density) THEN
     509             :          NULLIFY (full_density_resp)
     510             :          ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
     511             :          CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
     512             :                                shm_block_offset, &
     513             :                                kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
     514             :                                antisymmetric=is_anti_symmetric)
     515             :       END IF
     516             : 
     517             :       IF (nspins == 2) THEN
     518             :          ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
     519             :          CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
     520             :                                shm_block_offset, &
     521             :                                kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
     522             :                                antisymmetric=is_anti_symmetric)
     523             :          ! With resp density
     524             :          IF (with_resp_density) THEN
     525             :             NULLIFY (full_density_resp_beta)
     526             :             ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
     527             :             CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
     528             :                                   shm_block_offset, &
     529             :                                   kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
     530             :                                   antisymmetric=is_anti_symmetric)
     531             :          END IF
     532             : 
     533             :       END IF
     534             :       CALL timestop(handle_getP)
     535             : 
     536             :       !! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
     537             :       !! matrix is initialized to 1.0
     538             :       IF (screen_pmat_forces) THEN
     539             :          NULLIFY (shm_initial_p)
     540             :          shm_initial_p => actual_x_data%initial_p_forces
     541             :          shm_pmax_atom => actual_x_data%pmax_atom_forces
     542             :          IF (memory_parameter%recalc_forces) THEN
     543             :             CALL update_pmax_mat(shm_initial_p, &
     544             :                                  actual_x_data%map_atom_to_kind_atom, &
     545             :                                  actual_x_data%set_offset, &
     546             :                                  actual_x_data%atomic_block_offset, &
     547             :                                  shm_pmax_atom, &
     548             :                                  full_density_alpha, full_density_beta, &
     549             :                                  natom, kind_of, basis_parameter, &
     550             :                                  nkind, actual_x_data%is_assoc_atomic_block)
     551             :          END IF
     552             :       END IF
     553             : 
     554             :       ! restore as full density the HF density
     555             :       ! maybe in the future
     556             :       IF (with_resp_density .AND. .NOT. my_resp_only) THEN
     557             :          full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
     558             :          IF (nspins == 2) THEN
     559             :             full_density_beta(:, 1) = &
     560             :                full_density_beta(:, 1) - full_density_resp_beta
     561             :          END IF
     562             :          ! full_density_resp=full_density+full_density_resp
     563             :       END IF
     564             : 
     565             :       screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
     566             :       screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
     567             :       screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
     568             :       radii_pgf => actual_x_data%pair_dist_radii_pgf
     569             :       shm_pmax_block => actual_x_data%pmax_block
     570             : 
     571             : !$OMP END MASTER
     572             : !$OMP BARRIER
     573             : 
     574             : !$OMP MASTER
     575             :       CALL timeset(routineN//"_load", handle_load)
     576             : !$OMP END MASTER
     577             :       !! Load balance the work
     578             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
     579             :          IF (actual_x_data%b_first_load_balance_forces) THEN
     580             :             CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
     581             :                                   screen_coeffs_set, screen_coeffs_kind, &
     582             :                                   shm_is_assoc_atomic_block, do_periodic, &
     583             :                                   load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
     584             :                                   shm_pmax_atom, i_thread, n_threads, &
     585             :                                   cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
     586             :                                   nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
     587             :             actual_x_data%b_first_load_balance_forces = .FALSE.
     588             :          ELSE
     589             :             CALL hfx_update_load_balance(actual_x_data, para_env, &
     590             :                                          load_balance_parameter, &
     591             :                                          i_thread, n_threads, hfx_do_eval_forces)
     592             :          END IF
     593             :       END IF
     594             : !$OMP MASTER
     595             :       CALL timestop(handle_load)
     596             : !$OMP END MASTER
     597             : 
     598             :       !! precompute maximum nco and allocate scratch
     599             :       ncos_max = 0
     600             :       nsgf_max = 0
     601             :       DO iatom = 1, natom
     602             :          ikind = kind_of(iatom)
     603             :          nseta = basis_parameter(ikind)%nset
     604             :          npgfa => basis_parameter(ikind)%npgf
     605             :          la_max => basis_parameter(ikind)%lmax
     606             :          nsgfa => basis_parameter(ikind)%nsgf
     607             :          DO iset = 1, nseta
     608             :             ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
     609             :             nsgf_max = MAX(nsgf_max, nsgfa(iset))
     610             :          END DO
     611             :       END DO
     612             : 
     613             :       !! Allocate work arrays
     614             :       ALLOCATE (primitive_forces(12*nsgf_max**4))
     615             :       primitive_forces = 0.0_dp
     616             : 
     617             :       ! ** Allocate buffers for pgf_lists
     618             :       nneighbors = SIZE(actual_x_data%neighbor_cells)
     619             :       ALLOCATE (pgf_list_ij(max_pgf**2))
     620             :       ALLOCATE (pgf_list_kl(max_pgf**2))
     621             : !$OMP     ATOMIC READ
     622             :       tmp_i4 = pgf_product_list_size
     623             :       ALLOCATE (pgf_product_list(tmp_i4))
     624             :       ALLOCATE (nimages(max_pgf**2))
     625             : 
     626             :       DO i = 1, max_pgf**2
     627             :          ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
     628             :          ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
     629             :       END DO
     630             : 
     631             :       ALLOCATE (pbd_buf(nsgf_max**2))
     632             :       ALLOCATE (pbc_buf(nsgf_max**2))
     633             :       ALLOCATE (pad_buf(nsgf_max**2))
     634             :       ALLOCATE (pac_buf(nsgf_max**2))
     635             :       IF (with_resp_density) THEN
     636             :          ALLOCATE (pbd_buf_resp(nsgf_max**2))
     637             :          ALLOCATE (pbc_buf_resp(nsgf_max**2))
     638             :          ALLOCATE (pad_buf_resp(nsgf_max**2))
     639             :          ALLOCATE (pac_buf_resp(nsgf_max**2))
     640             :       END IF
     641             :       IF (nspins == 2) THEN
     642             :          ALLOCATE (pbd_buf_beta(nsgf_max**2))
     643             :          ALLOCATE (pbc_buf_beta(nsgf_max**2))
     644             :          ALLOCATE (pad_buf_beta(nsgf_max**2))
     645             :          ALLOCATE (pac_buf_beta(nsgf_max**2))
     646             :          IF (with_resp_density) THEN
     647             :             ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
     648             :             ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
     649             :             ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
     650             :             ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
     651             :          END IF
     652             :       END IF
     653             : 
     654             :       ALLOCATE (ede_work(ncos_max**4*12))
     655             :       ALLOCATE (ede_work2(ncos_max**4*12))
     656             :       ALLOCATE (ede_work_forces(ncos_max**4*12))
     657             :       ALLOCATE (ede_buffer1(ncos_max**4))
     658             :       ALLOCATE (ede_buffer2(ncos_max**4))
     659             :       ALLOCATE (ede_primitives_tmp(nsgf_max**4))
     660             : 
     661             :       IF (use_virial) THEN
     662             :          ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
     663             :          primitive_forces_virial = 0.0_dp
     664             :          ALLOCATE (ede_work_virial(ncos_max**4*12*3))
     665             :          ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
     666             :          ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
     667             :          tmp_virial = 0.0_dp
     668             :       ELSE
     669             :          ! dummy allocation
     670             :          ALLOCATE (primitive_forces_virial(1))
     671             :          primitive_forces_virial = 0.0_dp
     672             :          ALLOCATE (ede_work_virial(1))
     673             :          ALLOCATE (ede_work2_virial(1))
     674             :          ALLOCATE (ede_primitives_tmp_virial(1))
     675             :       END IF
     676             : 
     677             :       !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
     678             :       !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
     679             :       !!
     680             :       !!   (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
     681             :       !!
     682             :       !! by iterating in the following way
     683             :       !!
     684             :       !! DO iatom=1,natom               and       DO katom=1,natom
     685             :       !!   DO jatom=iatom,natom                     DO latom=katom,natom
     686             :       !!
     687             :       !! The third symmetry
     688             :       !!
     689             :       !!  (ab|cd) = (cd|ab)
     690             :       !!
     691             :       !! is taken into account by the following criterion:
     692             :       !!
     693             :       !! IF(katom+latom<=iatom+jatom)  THEN
     694             :       !!   IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
     695             :       !!
     696             :       !! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
     697             :       !!
     698             :       !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
     699             :       !! factor ( symm_fac ).
     700             :       !!
     701             :       !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
     702             :       !! different hierarchies of short range screening matrices.
     703             :       !!
     704             :       !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
     705             :       !! defined as :
     706             :       !!
     707             :       !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
     708             :       !!
     709             :       !! This tells the process where to start the main loops and how many bunches of integrals it has to
     710             :       !! calculate. The original parallelization is a simple modulo distribution that is binned and
     711             :       !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
     712             :       !! we need to know which was the initial cpu_id.
     713             :       !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
     714             :       !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
     715             :       !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
     716             : 
     717             : !$OMP BARRIER
     718             : !$OMP MASTER
     719             :       CALL timeset(routineN//"_main", handle_main)
     720             : !$OMP END MASTER
     721             : !$OMP BARRIER
     722             : 
     723             :       coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
     724             :       ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
     725             :       ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
     726             : 
     727             : !$OMP BARRIER
     728             : 
     729             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
     730             :          actual_x_data%distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
     731             :          memory_parameter%ram_counter_forces = HUGE(memory_parameter%ram_counter_forces)
     732             :       END IF
     733             : 
     734             :       IF (treat_forces_in_core) THEN
     735             :          buffer_overflow = .FALSE.
     736             :       ELSE
     737             :          buffer_overflow = .TRUE.
     738             :       END IF
     739             : 
     740             :       do_dynamic_load_balancing = .TRUE.
     741             :       IF (n_threads == 1) do_dynamic_load_balancing = .FALSE.
     742             : 
     743             :       IF (do_dynamic_load_balancing) THEN
     744             :          my_bin_size = SIZE(actual_x_data%distribution_forces)
     745             :       ELSE
     746             :          my_bin_size = 1
     747             :       END IF
     748             :       !! We do not need the containers if MAX_MEM = 0
     749             :       IF (treat_forces_in_core) THEN
     750             :          !! IF new md step -> reinitialize containers
     751             :          IF (actual_x_data%memory_parameter%recalc_forces) THEN
     752             :             CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
     753             :             CALL alloc_containers(actual_x_data%store_forces, my_bin_size)
     754             : 
     755             :             DO bin = 1, my_bin_size
     756             :                maxval_container => actual_x_data%store_forces%maxval_container(bin)
     757             :                integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
     758             :                CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
     759             :                DO i = 1, 64
     760             :                   CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
     761             :                END DO
     762             :             END DO
     763             :          END IF
     764             : 
     765             :          !! Decompress the first cache for maxvals and integrals
     766             :          IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
     767             :             DO bin = 1, my_bin_size
     768             :                maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
     769             :                maxval_container => actual_x_data%store_forces%maxval_container(bin)
     770             :                integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
     771             :                integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
     772             :                CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
     773             :                                                memory_parameter%actual_memory_usage, .FALSE.)
     774             :                DO i = 1, 64
     775             :                   CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
     776             :                                                   memory_parameter%actual_memory_usage, .FALSE.)
     777             :                END DO
     778             :             END DO
     779             :          END IF
     780             :       END IF
     781             : 
     782             : !$OMP BARRIER
     783             : !$OMP MASTER
     784             :       IF (do_dynamic_load_balancing) THEN
     785             :          ! ** Lets construct the task list
     786             :          shm_total_bins = 0
     787             :          DO i = 1, n_threads
     788             :             shm_total_bins = shm_total_bins + SIZE(my_x_data(irep, i)%distribution_forces)
     789             :          END DO
     790             :          ALLOCATE (tmp_task_list(shm_total_bins))
     791             :          shm_task_counter = 0
     792             :          DO i = 1, n_threads
     793             :             DO bin = 1, SIZE(my_x_data(irep, i)%distribution_forces)
     794             :                shm_task_counter = shm_task_counter + 1
     795             :                tmp_task_list(shm_task_counter)%thread_id = i
     796             :                tmp_task_list(shm_task_counter)%bin_id = bin
     797             :                tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
     798             :             END DO
     799             :          END DO
     800             : 
     801             :          ! ** Now sort the task list
     802             :          ALLOCATE (tmp_task_list_cost(shm_total_bins))
     803             :          ALLOCATE (tmp_index(shm_total_bins))
     804             : 
     805             :          DO i = 1, shm_total_bins
     806             :             tmp_task_list_cost(i) = tmp_task_list(i)%cost
     807             :          END DO
     808             : 
     809             :          CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
     810             : 
     811             :          ALLOCATE (actual_x_data%task_list(shm_total_bins))
     812             : 
     813             :          DO i = 1, shm_total_bins
     814             :             actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
     815             :          END DO
     816             : 
     817             :          shm_task_list => actual_x_data%task_list
     818             :          shm_task_counter = 0
     819             : 
     820             :          DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
     821             :       END IF
     822             : 
     823             :       !! precalculate maximum density matrix elements in blocks
     824             :       shm_pmax_block => actual_x_data%pmax_block
     825             :       actual_x_data%pmax_block = 0.0_dp
     826             :       IF (screen_pmat_forces) THEN
     827             :          DO iatom_block = 1, SIZE(actual_x_data%blocks)
     828             :             iatom_start = actual_x_data%blocks(iatom_block)%istart
     829             :             iatom_end = actual_x_data%blocks(iatom_block)%iend
     830             :             DO jatom_block = 1, SIZE(actual_x_data%blocks)
     831             :                jatom_start = actual_x_data%blocks(jatom_block)%istart
     832             :                jatom_end = actual_x_data%blocks(jatom_block)%iend
     833             :                shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
     834             :             END DO
     835             :          END DO
     836             :       END IF
     837             :       shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
     838             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
     839             :          CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
     840             :                                      do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
     841             :                                      actual_x_data%blocks)
     842             :       END IF
     843             : 
     844             :       my_bin_size = SIZE(actual_x_data%distribution_forces)
     845             :       DO bin = 1, my_bin_size
     846             :          actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
     847             :       END DO
     848             : !$OMP END MASTER
     849             : !$OMP BARRIER
     850             : 
     851             :       IF (treat_forces_in_core) THEN
     852             :          IF (my_bin_size > 0) THEN
     853             :             maxval_container => actual_x_data%store_forces%maxval_container(1)
     854             :             maxval_cache => actual_x_data%store_forces%maxval_cache(1)
     855             : 
     856             :             integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
     857             :             integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
     858             :          END IF
     859             :       END IF
     860             : 
     861             :       nblocks = load_balance_parameter%nblocks
     862             : 
     863             :       bins_left = .TRUE.
     864             :       do_it = .TRUE.
     865             :       bin = 0
     866             :       DO WHILE (bins_left)
     867             :          IF (.NOT. do_dynamic_load_balancing) THEN
     868             :             bin = bin + 1
     869             :             IF (bin > my_bin_size) THEN
     870             :                do_it = .FALSE.
     871             :                bins_left = .FALSE.
     872             :             ELSE
     873             :                do_it = .TRUE.
     874             :                bins_left = .TRUE.
     875             :                distribution_forces => actual_x_data%distribution_forces(bin)
     876             :             END IF
     877             :          ELSE
     878             : !$OMP CRITICAL(hfxderivatives_critical)
     879             :             shm_task_counter = shm_task_counter + 1
     880             :             IF (shm_task_counter <= shm_total_bins) THEN
     881             :                my_thread_id = shm_task_list(shm_task_counter)%thread_id
     882             :                my_bin_id = shm_task_list(shm_task_counter)%bin_id
     883             :                IF (treat_forces_in_core) THEN
     884             :                   maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
     885             :                   maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
     886             :                   integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
     887             :                   integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
     888             :                END IF
     889             :                distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
     890             :                do_it = .TRUE.
     891             :                bins_left = .TRUE.
     892             :                IF (actual_x_data%memory_parameter%recalc_forces) THEN
     893             :                   distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
     894             :                END IF
     895             :                counter = 0_Int_8
     896             :             ELSE
     897             :                do_it = .FALSE.
     898             :                bins_left = .FALSE.
     899             :             END IF
     900             : !$OMP END CRITICAL(hfxderivatives_critical)
     901             :          END IF
     902             : 
     903             :          IF (.NOT. do_it) CYCLE
     904             : !$OMP MASTER
     905             :          CALL timeset(routineN//"_bin", handle_bin)
     906             : !$OMP END MASTER
     907             :          bintime_start = m_walltime()
     908             :          my_istart = distribution_forces%istart
     909             :          my_current_counter = 0
     910             :          IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
     911             :              my_istart == -1_int_8) my_istart = nblocks**4
     912             :          atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
     913             :             latom_block = INT(MODULO(atom_block, nblocks)) + 1
     914             :             tmp_block = atom_block/nblocks
     915             :             katom_block = INT(MODULO(tmp_block, nblocks)) + 1
     916             :             IF (latom_block < katom_block) CYCLE
     917             :             tmp_block = tmp_block/nblocks
     918             :             jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
     919             :             tmp_block = tmp_block/nblocks
     920             :             iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
     921             :             IF (jatom_block < iatom_block) CYCLE
     922             :             my_current_counter = my_current_counter + 1
     923             :             IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
     924             : 
     925             :             iatom_start = actual_x_data%blocks(iatom_block)%istart
     926             :             iatom_end = actual_x_data%blocks(iatom_block)%iend
     927             :             jatom_start = actual_x_data%blocks(jatom_block)%istart
     928             :             jatom_end = actual_x_data%blocks(jatom_block)%iend
     929             :             katom_start = actual_x_data%blocks(katom_block)%istart
     930             :             katom_end = actual_x_data%blocks(katom_block)%iend
     931             :             latom_start = actual_x_data%blocks(latom_block)%istart
     932             :             latom_end = actual_x_data%blocks(latom_block)%iend
     933             : 
     934             :             pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block) + &
     935             :                               shm_pmax_block(latom_block, jatom_block), &
     936             :                               shm_pmax_block(latom_block, iatom_block) + &
     937             :                               shm_pmax_block(katom_block, jatom_block))
     938             : 
     939             :             IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
     940             : 
     941             :             CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
     942             :                                  jatom_start, jatom_end, &
     943             :                                  kind_of, basis_parameter, particle_set, &
     944             :                                  do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
     945             :                                  log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
     946             : 
     947             :             CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
     948             :                                  latom_start, latom_end, &
     949             :                                  kind_of, basis_parameter, particle_set, &
     950             :                                  do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
     951             :                                  log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
     952             : 
     953             :             DO i_list_ij = 1, list_ij%n_element
     954             :                iatom = list_ij%elements(i_list_ij)%pair(1)
     955             :                jatom = list_ij%elements(i_list_ij)%pair(2)
     956             :                i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
     957             :                i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
     958             :                ikind = list_ij%elements(i_list_ij)%kind_pair(1)
     959             :                jkind = list_ij%elements(i_list_ij)%kind_pair(2)
     960             :                ra = list_ij%elements(i_list_ij)%r1
     961             :                rb = list_ij%elements(i_list_ij)%r2
     962             :                rab2 = list_ij%elements(i_list_ij)%dist2
     963             : 
     964             :                la_max => basis_parameter(ikind)%lmax
     965             :                la_min => basis_parameter(ikind)%lmin
     966             :                npgfa => basis_parameter(ikind)%npgf
     967             :                nseta = basis_parameter(ikind)%nset
     968             :                zeta => basis_parameter(ikind)%zet
     969             :                nsgfa => basis_parameter(ikind)%nsgf
     970             :                sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
     971             :                nsgfl_a => basis_parameter(ikind)%nsgfl
     972             :                sphi_a_u1 = UBOUND(sphi_a_ext, 1)
     973             :                sphi_a_u2 = UBOUND(sphi_a_ext, 2)
     974             :                sphi_a_u3 = UBOUND(sphi_a_ext, 3)
     975             : 
     976             :                lb_max => basis_parameter(jkind)%lmax
     977             :                lb_min => basis_parameter(jkind)%lmin
     978             :                npgfb => basis_parameter(jkind)%npgf
     979             :                nsetb = basis_parameter(jkind)%nset
     980             :                zetb => basis_parameter(jkind)%zet
     981             :                nsgfb => basis_parameter(jkind)%nsgf
     982             :                sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
     983             :                nsgfl_b => basis_parameter(jkind)%nsgfl
     984             :                sphi_b_u1 = UBOUND(sphi_b_ext, 1)
     985             :                sphi_b_u2 = UBOUND(sphi_b_ext, 2)
     986             :                sphi_b_u3 = UBOUND(sphi_b_ext, 3)
     987             : 
     988             :                i_atom = atom_of_kind(iatom)
     989             :                forces_map(1, 1) = ikind
     990             :                forces_map(1, 2) = i_atom
     991             :                j_atom = atom_of_kind(jatom)
     992             :                forces_map(2, 1) = jkind
     993             :                forces_map(2, 2) = j_atom
     994             : 
     995             :                DO i_list_kl = 1, list_kl%n_element
     996             :                   katom = list_kl%elements(i_list_kl)%pair(1)
     997             :                   latom = list_kl%elements(i_list_kl)%pair(2)
     998             : 
     999             :                   !All four centers equivalent => zero-contribution
    1000             : !VIIRAL            IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE
    1001             :                   IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
    1002             :                   IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) CYCLE
    1003             : 
    1004             :                   i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
    1005             :                   i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
    1006             :                   kkind = list_kl%elements(i_list_kl)%kind_pair(1)
    1007             :                   lkind = list_kl%elements(i_list_kl)%kind_pair(2)
    1008             :                   rc = list_kl%elements(i_list_kl)%r1
    1009             :                   rd = list_kl%elements(i_list_kl)%r2
    1010             :                   rcd2 = list_kl%elements(i_list_kl)%dist2
    1011             : 
    1012             :                   IF (screen_pmat_forces) THEN
    1013             :                      pmax_atom = MAX(shm_pmax_atom(katom, iatom) + &
    1014             :                                      shm_pmax_atom(latom, jatom), &
    1015             :                                      shm_pmax_atom(latom, iatom) + &
    1016             :                                      shm_pmax_atom(katom, jatom))
    1017             :                   ELSE
    1018             :                      pmax_atom = 0.0_dp
    1019             :                   END IF
    1020             : 
    1021             :                   IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
    1022             :                        screen_coeffs_kind(jkind, ikind)%x(2)) + &
    1023             :                       (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
    1024             :                        screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
    1025             : 
    1026             :                   IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
    1027             :                              shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
    1028             :                              shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
    1029             :                              shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
    1030             :                   k_atom = atom_of_kind(katom)
    1031             :                   forces_map(3, 1) = kkind
    1032             :                   forces_map(3, 2) = k_atom
    1033             : 
    1034             :                   l_atom = atom_of_kind(latom)
    1035             :                   forces_map(4, 1) = lkind
    1036             :                   forces_map(4, 2) = l_atom
    1037             : 
    1038             :                   IF (nspins == 1) THEN
    1039             :                      fac = 0.25_dp*hf_fraction
    1040             :                   ELSE
    1041             :                      fac = 0.5_dp*hf_fraction
    1042             :                   END IF
    1043             :                   !calculate symmetry_factor
    1044             :                   symm_fac = 0.25_dp
    1045             :                   IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
    1046             :                   IF (katom == latom) symm_fac = symm_fac*2.0_dp
    1047             :                   IF (iatom == katom .AND. jatom == latom .AND. &
    1048             :                       iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
    1049             :                   IF (iatom == katom .AND. iatom == jatom .AND. &
    1050             :                       katom == latom) symm_fac = symm_fac*2.0_dp
    1051             : 
    1052             :                   symm_fac = 1.0_dp/symm_fac
    1053             :                   fac = fac*symm_fac
    1054             : 
    1055             :                   lc_max => basis_parameter(kkind)%lmax
    1056             :                   lc_min => basis_parameter(kkind)%lmin
    1057             :                   npgfc => basis_parameter(kkind)%npgf
    1058             :                   zetc => basis_parameter(kkind)%zet
    1059             :                   nsgfc => basis_parameter(kkind)%nsgf
    1060             :                   sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
    1061             :                   nsgfl_c => basis_parameter(kkind)%nsgfl
    1062             :                   sphi_c_u1 = UBOUND(sphi_c_ext, 1)
    1063             :                   sphi_c_u2 = UBOUND(sphi_c_ext, 2)
    1064             :                   sphi_c_u3 = UBOUND(sphi_c_ext, 3)
    1065             : 
    1066             :                   ld_max => basis_parameter(lkind)%lmax
    1067             :                   ld_min => basis_parameter(lkind)%lmin
    1068             :                   npgfd => basis_parameter(lkind)%npgf
    1069             :                   zetd => basis_parameter(lkind)%zet
    1070             :                   nsgfd => basis_parameter(lkind)%nsgf
    1071             :                   sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
    1072             :                   nsgfl_d => basis_parameter(lkind)%nsgfl
    1073             :                   sphi_d_u1 = UBOUND(sphi_d_ext, 1)
    1074             :                   sphi_d_u2 = UBOUND(sphi_d_ext, 2)
    1075             :                   sphi_d_u3 = UBOUND(sphi_d_ext, 3)
    1076             : 
    1077             :                   atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
    1078             :                   atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
    1079             :                   atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
    1080             :                   atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
    1081             : 
    1082             :                   IF (jatom < latom) THEN
    1083             :                      offset_bd_set => shm_set_offset(:, :, lkind, jkind)
    1084             :                   ELSE
    1085             :                      offset_bd_set => shm_set_offset(:, :, jkind, lkind)
    1086             :                   END IF
    1087             :                   IF (jatom < katom) THEN
    1088             :                      offset_bc_set => shm_set_offset(:, :, kkind, jkind)
    1089             :                   ELSE
    1090             :                      offset_bc_set => shm_set_offset(:, :, jkind, kkind)
    1091             :                   END IF
    1092             :                   IF (iatom < latom) THEN
    1093             :                      offset_ad_set => shm_set_offset(:, :, lkind, ikind)
    1094             :                   ELSE
    1095             :                      offset_ad_set => shm_set_offset(:, :, ikind, lkind)
    1096             :                   END IF
    1097             :                   IF (iatom < katom) THEN
    1098             :                      offset_ac_set => shm_set_offset(:, :, kkind, ikind)
    1099             :                   ELSE
    1100             :                      offset_ac_set => shm_set_offset(:, :, ikind, kkind)
    1101             :                   END IF
    1102             : 
    1103             :                   IF (screen_pmat_forces) THEN
    1104             :                      swap_id = 16
    1105             :                      kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
    1106             :                      IF (ikind >= kkind) THEN
    1107             :                         ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1108             :                                                                        actual_x_data%map_atom_to_kind_atom(katom), &
    1109             :                                                                        actual_x_data%map_atom_to_kind_atom(iatom))
    1110             :                      ELSE
    1111             :                         ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1112             :                                                                        actual_x_data%map_atom_to_kind_atom(iatom), &
    1113             :                                                                        actual_x_data%map_atom_to_kind_atom(katom))
    1114             :                         swap_id = swap_id + 1
    1115             :                      END IF
    1116             :                      kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
    1117             :                      IF (jkind >= lkind) THEN
    1118             :                         ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1119             :                                                                        actual_x_data%map_atom_to_kind_atom(latom), &
    1120             :                                                                        actual_x_data%map_atom_to_kind_atom(jatom))
    1121             :                      ELSE
    1122             :                         ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1123             :                                                                        actual_x_data%map_atom_to_kind_atom(jatom), &
    1124             :                                                                        actual_x_data%map_atom_to_kind_atom(latom))
    1125             :                         swap_id = swap_id + 2
    1126             :                      END IF
    1127             :                      kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
    1128             :                      IF (ikind >= lkind) THEN
    1129             :                         ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1130             :                                                                        actual_x_data%map_atom_to_kind_atom(latom), &
    1131             :                                                                        actual_x_data%map_atom_to_kind_atom(iatom))
    1132             :                      ELSE
    1133             :                         ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1134             :                                                                        actual_x_data%map_atom_to_kind_atom(iatom), &
    1135             :                                                                        actual_x_data%map_atom_to_kind_atom(latom))
    1136             :                         swap_id = swap_id + 4
    1137             :                      END IF
    1138             :                      kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
    1139             :                      IF (jkind >= kkind) THEN
    1140             :                         ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1141             :                                                                        actual_x_data%map_atom_to_kind_atom(katom), &
    1142             :                                                                        actual_x_data%map_atom_to_kind_atom(jatom))
    1143             :                      ELSE
    1144             :                         ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1145             :                                                                        actual_x_data%map_atom_to_kind_atom(jatom), &
    1146             :                                                                        actual_x_data%map_atom_to_kind_atom(katom))
    1147             :                         swap_id = swap_id + 8
    1148             :                      END IF
    1149             :                   END IF
    1150             : 
    1151             :                   !! At this stage, check for memory used in compression
    1152             : 
    1153             :                   IF (actual_x_data%memory_parameter%recalc_forces) THEN
    1154             :                      IF (treat_forces_in_core) THEN
    1155             :                         ! ** We know the maximum amount of integrals that we can store per MPI-process
    1156             :                         ! ** Now we need to sum the current memory usage among all openMP threads
    1157             :                         ! ** We can just read what is currently stored on the corresponding x_data type
    1158             :                         ! ** If this is thread i and it tries to read the data from thread j, that is
    1159             :                         ! ** currently writing that data, we just dont care, because the possible error
    1160             :                         ! ** is of the order of CACHE_SIZE
    1161             :                         mem_compression_counter = 0
    1162             :                         DO i = 1, n_threads
    1163             : !$OMP                   ATOMIC READ
    1164             :                            tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
    1165             :                            mem_compression_counter = mem_compression_counter + &
    1166             :                                                      tmp_i4*memory_parameter%cache_size
    1167             :                         END DO
    1168             :                         IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
    1169             :                             > memory_parameter%max_compression_counter) THEN
    1170             :                            buffer_overflow = .TRUE.
    1171             :                            IF (do_dynamic_load_balancing) THEN
    1172             :                               distribution_forces%ram_counter = counter
    1173             :                            ELSE
    1174             :                               memory_parameter%ram_counter_forces = counter
    1175             :                            END IF
    1176             :                         ELSE
    1177             :                            counter = counter + 1
    1178             :                            buffer_overflow = .FALSE.
    1179             :                         END IF
    1180             :                      END IF
    1181             :                   ELSE
    1182             :                      IF (treat_forces_in_core) THEN
    1183             :                         IF (do_dynamic_load_balancing) THEN
    1184             :                            IF (distribution_forces%ram_counter == counter) THEN
    1185             :                               buffer_overflow = .TRUE.
    1186             :                            ELSE
    1187             :                               counter = counter + 1
    1188             :                               buffer_overflow = .FALSE.
    1189             :                            END IF
    1190             :                         ELSE
    1191             :                            IF (memory_parameter%ram_counter_forces == counter) THEN
    1192             :                               buffer_overflow = .TRUE.
    1193             :                            ELSE
    1194             :                               counter = counter + 1
    1195             :                               buffer_overflow = .FALSE.
    1196             :                            END IF
    1197             :                         END IF
    1198             :                      END IF
    1199             :                   END IF
    1200             : 
    1201             :                   DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
    1202             :                      iset = set_list_ij(i_set_list_ij)%pair(1)
    1203             :                      jset = set_list_ij(i_set_list_ij)%pair(2)
    1204             : 
    1205             :                      ncob = npgfb(jset)*ncoset(lb_max(jset))
    1206             :                      max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
    1207             :                                 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
    1208             :                      !! Near field screening
    1209             :                      IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
    1210             :                                      screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
    1211             :                      sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
    1212             :                      sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
    1213             : 
    1214             :                      DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
    1215             :                         kset = set_list_kl(i_set_list_kl)%pair(1)
    1216             :                         lset = set_list_kl(i_set_list_kl)%pair(2)
    1217             : 
    1218             :                         max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
    1219             :                                         screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
    1220             :                         max_val2 = max_val1 + max_val2_set
    1221             : 
    1222             :                         !! Near field screening
    1223             :                         IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
    1224             :                         sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
    1225             :                         sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
    1226             : 
    1227             :                         IF (screen_pmat_forces) THEN
    1228             :                            CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
    1229             :                                              iset, jset, kset, lset, &
    1230             :                                              pmax_tmp, swap_id)
    1231             : 
    1232             :                            log10_pmax = log_2 + pmax_tmp
    1233             :                         ELSE
    1234             :                            log10_pmax = 0.0_dp
    1235             :                         END IF
    1236             : 
    1237             :                         max_val2 = max_val2 + log10_pmax
    1238             :                         IF (max_val2 < log10_eps_schwarz) CYCLE
    1239             : 
    1240             :                         pmax_entry = EXP(log10_pmax*ln_10)
    1241             :                         !! store current number of integrals, update total number and number of integrals in buffer
    1242             :                         current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
    1243             :                         IF (buffer_overflow) THEN
    1244             :                            neris_onthefly = neris_onthefly + current_counter
    1245             :                         END IF
    1246             : 
    1247             :                         !! Get integrals from buffer and update Kohn-Sham matrix
    1248             :                         IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces) THEN
    1249             :                            nints = current_counter
    1250             :                            CALL hfx_get_single_cache_element(estimate_to_store_int, 6, &
    1251             :                                                              maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    1252             :                                                              use_disk_storage)
    1253             :                            spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    1254             : !                           IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
    1255             :                            IF (.NOT. use_virial) THEN
    1256             :                               IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
    1257             :                            END IF
    1258             :                            nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
    1259             :                            buffer_left = nints
    1260             :                            buffer_start = 1
    1261             :                            neris_incore = neris_incore + INT(nints, int_8)
    1262             :                            DO WHILE (buffer_left > 0)
    1263             :                               buffer_size = MIN(buffer_left, cache_size)
    1264             :                               CALL hfx_get_mult_cache_elements(primitive_forces(buffer_start), &
    1265             :                                                                buffer_size, nbits, &
    1266             :                                                                integral_caches(nbits), &
    1267             :                                                                integral_containers(nbits), &
    1268             :                                                                eps_storage, pmax_entry, &
    1269             :                                                                memory_parameter%actual_memory_usage, &
    1270             :                                                                use_disk_storage)
    1271             :                               buffer_left = buffer_left - buffer_size
    1272             :                               buffer_start = buffer_start + buffer_size
    1273             :                            END DO
    1274             :                         END IF
    1275             :                         !! Calculate integrals if we run out of buffer or the geometry did change
    1276             :                         IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow) THEN
    1277             :                            max_contraction_val = max_contraction(iset, iatom)* &
    1278             :                                                  max_contraction(jset, jatom)* &
    1279             :                                                  max_contraction(kset, katom)* &
    1280             :                                                  max_contraction(lset, latom)* &
    1281             :                                                  pmax_entry
    1282             :                            tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
    1283             :                            tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
    1284             :                            tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
    1285             :                            tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
    1286             : 
    1287             :                            CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
    1288             :                                         npgfc(kset), npgfd(lset), &
    1289             :                                         la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
    1290             :                                         lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
    1291             :                                         nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1292             :                                         sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    1293             :                                         sphi_b_u1, sphi_b_u2, sphi_b_u3, &
    1294             :                                         sphi_c_u1, sphi_c_u2, sphi_c_u3, &
    1295             :                                         sphi_d_u1, sphi_d_u2, sphi_d_u3, &
    1296             :                                         zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
    1297             :                                         zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
    1298             :                                         primitive_forces, &
    1299             :                                         potential_parameter, &
    1300             :                                         actual_x_data%neighbor_cells, &
    1301             :                                         screen_coeffs_set(jset, iset, jkind, ikind)%x, &
    1302             :                                         screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
    1303             :                                         max_contraction_val, cartesian_estimate, cell, neris_tmp, &
    1304             :                                         log10_pmax, log10_eps_schwarz, &
    1305             :                                         tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
    1306             :                                         pgf_list_ij, pgf_list_kl, pgf_product_list, &
    1307             :                                         nsgfl_a(:, iset), nsgfl_b(:, jset), &
    1308             :                                         nsgfl_c(:, kset), nsgfl_d(:, lset), &
    1309             :                                         sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
    1310             :                                         ede_work, ede_work2, ede_work_forces, &
    1311             :                                         ede_buffer1, ede_buffer2, ede_primitives_tmp, &
    1312             :                                         nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
    1313             :                                         ede_primitives_tmp_virial, primitive_forces_virial, &
    1314             :                                         cartesian_estimate_virial)
    1315             : 
    1316             :                            nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
    1317             :                            neris_total = neris_total + nints
    1318             :                            nprim_ints = nprim_ints + neris_tmp
    1319             : !                           IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
    1320             : !                           estimate_to_store_int = EXPONENT(cartesian_estimate)
    1321             : !                           estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
    1322             : !                           cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
    1323             : !                           IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
    1324             : !                              IF (cartesian_estimate < eps_schwarz) THEN
    1325             : !                                 CALL hfx_add_single_cache_element( &
    1326             : !                                    estimate_to_store_int, 6, &
    1327             : !                                    maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    1328             : !                                    use_disk_storage, max_val_memory)
    1329             : !                              END IF
    1330             : !                           END IF
    1331             : !
    1332             : !                           IF (.NOT. use_virial) THEN
    1333             : !                              IF (cartesian_estimate < eps_schwarz) CYCLE
    1334             : !                           END IF
    1335             : 
    1336             :                            !! Compress the array for storage
    1337             :                            spherical_estimate = 0.0_dp
    1338             :                            DO i = 1, nints
    1339             :                               spherical_estimate = MAX(spherical_estimate, ABS(primitive_forces(i)))
    1340             :                            END DO
    1341             : 
    1342             :                            IF (use_virial) THEN
    1343             :                               spherical_estimate_virial = 0.0_dp
    1344             :                               DO i = 1, 3*nints
    1345             :                                  spherical_estimate_virial = MAX(spherical_estimate_virial, ABS(primitive_forces_virial(i)))
    1346             :                               END DO
    1347             :                            END IF
    1348             : 
    1349             :                            IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
    1350             :                            estimate_to_store_int = EXPONENT(spherical_estimate)
    1351             :                            estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
    1352             :                            IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
    1353             :                               CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
    1354             :                                                                 maxval_cache, maxval_container, &
    1355             :                                                                 memory_parameter%actual_memory_usage, &
    1356             :                                                                 use_disk_storage, max_val_memory)
    1357             :                            END IF
    1358             :                            spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    1359             :                            IF (.NOT. use_virial) THEN
    1360             :                               IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
    1361             :                            END IF
    1362             :                            IF (.NOT. buffer_overflow) THEN
    1363             :                               nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
    1364             :                               buffer_left = nints
    1365             :                               buffer_start = 1
    1366             : !                              neris_incore = neris_incore+nints
    1367             :                               neris_incore = neris_incore + INT(nints, int_8)
    1368             : 
    1369             :                               DO WHILE (buffer_left > 0)
    1370             :                                  buffer_size = MIN(buffer_left, CACHE_SIZE)
    1371             :                                  CALL hfx_add_mult_cache_elements(primitive_forces(buffer_start), &
    1372             :                                                                   buffer_size, nbits, &
    1373             :                                                                   integral_caches(nbits), &
    1374             :                                                                   integral_containers(nbits), &
    1375             :                                                                   eps_storage, pmax_entry, &
    1376             :                                                                   memory_parameter%actual_memory_usage, &
    1377             :                                                                   use_disk_storage)
    1378             :                                  buffer_left = buffer_left - buffer_size
    1379             :                                  buffer_start = buffer_start + buffer_size
    1380             :                               END DO
    1381             :                            ELSE
    1382             :                               !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
    1383             :                               !! but only if we treat forces in-core
    1384             :                               IF (treat_forces_in_core) THEN
    1385             :                                  DO i = 1, nints
    1386             :                                     primitive_forces(i) = primitive_forces(i)*pmax_entry
    1387             :                                     IF (ABS(primitive_forces(i)) > eps_storage) THEN
    1388             :                                        primitive_forces(i) = ANINT(primitive_forces(i)/eps_storage, dp)*eps_storage/pmax_entry
    1389             :                                     ELSE
    1390             :                                        primitive_forces(i) = 0.0_dp
    1391             :                                     END IF
    1392             :                                  END DO
    1393             :                               END IF
    1394             :                            END IF
    1395             :                         END IF
    1396             :                         IF (with_resp_density) THEN
    1397             :                            CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1398             :                                                         full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
    1399             :                                                         iatom, jatom, katom, latom, &
    1400             :                                                         iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1401             :                                                         offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1402             :                                                         atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1403             :                            CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1404             :                                                         full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
    1405             :                                                         iatom, jatom, katom, latom, &
    1406             :                                                         iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1407             :                                                         offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1408             :                                                         atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1409             :                         ELSE
    1410             :                            CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1411             :                                                         full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
    1412             :                                                         iatom, jatom, katom, latom, &
    1413             :                                                         iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1414             :                                                         offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1415             :                                                         atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1416             :                         END IF
    1417             :                         IF (nspins == 2) THEN
    1418             :                            IF (with_resp_density) THEN
    1419             :                               CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1420             :                                                            full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
    1421             :                                                            pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
    1422             :                                                            iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1423             :                                                            offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1424             :                                                            atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1425             :                               CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1426             :                                                            full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
    1427             :                                                            pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
    1428             :                                                            iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1429             :                                                            offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1430             :                                                            atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1431             :                            ELSE
    1432             :                               CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1433             :                                                            full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
    1434             :                                                            pac_buf_beta, iatom, jatom, katom, latom, &
    1435             :                                                            iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    1436             :                                                            offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    1437             :                                                            atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
    1438             :                            END IF
    1439             :                         END IF
    1440             :                         IF (spherical_estimate*pmax_entry >= eps_schwarz) THEN
    1441             :                            DO coord = 1, 12
    1442             :                               T2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
    1443             :                                                      coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
    1444             : 
    1445             :                               IF (with_resp_density) THEN
    1446             :                                  CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1447             :                                                     pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
    1448             :                                                     T2, force, forces_map, coord, &
    1449             :                                                     pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
    1450             :                                                     my_resp_only)
    1451             :                               ELSE
    1452             :                                  CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1453             :                                                     pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
    1454             :                                                     T2, force, forces_map, coord)
    1455             :                               END IF
    1456             :                               IF (nspins == 2) THEN
    1457             :                                  IF (with_resp_density) THEN
    1458             :                                     CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1459             :                                                        pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
    1460             :                                                        fac, T2, force, forces_map, coord, &
    1461             :                                                        pbd_buf_resp_beta, pbc_buf_resp_beta, &
    1462             :                                                        pad_buf_resp_beta, pac_buf_resp_beta, &
    1463             :                                                        my_resp_only)
    1464             :                                  ELSE
    1465             :                                     CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1466             :                                                        pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, fac, &
    1467             :                                                        T2, force, forces_map, coord)
    1468             :                                  END IF
    1469             :                               END IF
    1470             :                            END DO !coord
    1471             :                         END IF
    1472             :                         IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz) THEN
    1473             :                            DO coord = 1, 12
    1474             :                               DO i = 1, 3
    1475             :                                  T2 => primitive_forces_virial( &
    1476             :                                        ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
    1477             :                                        ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
    1478             :                                  IF (with_resp_density) THEN
    1479             :                                     CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1480             :                                                        pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
    1481             :                                                        T2, tmp_virial, coord, i, &
    1482             :                                                        pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
    1483             :                                                        pac_buf_resp, my_resp_only)
    1484             :                                  ELSE
    1485             :                                     CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1486             :                                                        pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
    1487             :                                                        T2, tmp_virial, coord, i)
    1488             :                                  END IF
    1489             :                                  IF (nspins == 2) THEN
    1490             :                                     IF (with_resp_density) THEN
    1491             :                                        CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1492             :                                                           pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
    1493             :                                                           fac, T2, tmp_virial, coord, i, &
    1494             :                                                           pbd_buf_resp_beta, pbc_buf_resp_beta, &
    1495             :                                                           pad_buf_resp_beta, pac_buf_resp_beta, &
    1496             :                                                           my_resp_only)
    1497             :                                     ELSE
    1498             :                                        CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1499             :                                                           pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
    1500             :                                                           fac, T2, tmp_virial, coord, i)
    1501             :                                     END IF
    1502             :                                  END IF
    1503             :                               END DO
    1504             :                            END DO !coord
    1505             :                         END IF
    1506             : 
    1507             :                      END DO !i_set_list_kl
    1508             :                   END DO !i_set_list_ij
    1509             :                END DO !i_list_kl
    1510             :             END DO !i_list_ij
    1511             :          END DO atomic_blocks
    1512             :          bintime_stop = m_walltime()
    1513             :          distribution_forces%time_forces = bintime_stop - bintime_start
    1514             : !$OMP MASTER
    1515             :          CALL timestop(handle_bin)
    1516             : !$OMP END MASTER
    1517             :       END DO !bin
    1518             : 
    1519             : !$OMP MASTER
    1520             :       logger => cp_get_default_logger()
    1521             :       do_print_load_balance_info = .FALSE.
    1522             :       do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
    1523             :                                                                     "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
    1524             : !$OMP END MASTER
    1525             : !$OMP BARRIER
    1526             :       IF (do_print_load_balance_info) THEN
    1527             :          iw = -1
    1528             : !$OMP MASTER
    1529             :          iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
    1530             :                                    extension=".scfLog")
    1531             : !$OMP END MASTER
    1532             : 
    1533             :          CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
    1534             :                                         hfx_do_eval_forces)
    1535             : 
    1536             : !$OMP MASTER
    1537             :          CALL cp_print_key_finished_output(iw, logger, hfx_section, &
    1538             :                                            "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
    1539             : !$OMP END MASTER
    1540             :       END IF
    1541             : 
    1542             :       IF (use_virial) THEN
    1543             :          DO i = 1, 3
    1544             :             DO j = 1, 3
    1545             :                DO k = 1, 3
    1546             : !$OMP                 ATOMIC
    1547             :                   virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
    1548             :                END DO
    1549             :             END DO
    1550             :          END DO
    1551             :       END IF
    1552             : 
    1553             : !$OMP BARRIER
    1554             :       !! Get some number about ERIS
    1555             : !$OMP ATOMIC
    1556             :       shm_neris_total = shm_neris_total + neris_total
    1557             : !$OMP ATOMIC
    1558             :       shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
    1559             : !$OMP ATOMIC
    1560             :       shm_nprim_ints = shm_nprim_ints + nprim_ints
    1561             : 
    1562             :       storage_counter_integrals = memory_parameter%actual_memory_usage* &
    1563             :                                   memory_parameter%cache_size
    1564             :       stor_count_max_val = max_val_memory*memory_parameter%cache_size
    1565             : !$OMP ATOMIC
    1566             :       shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
    1567             : !$OMP ATOMIC
    1568             :       shm_neris_incore = shm_neris_incore + neris_incore
    1569             : !$OMP ATOMIC
    1570             :       shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
    1571             : !$OMP BARRIER
    1572             : 
    1573             :       ! IF(with_resp_density) THEN
    1574             :       !   ! restore original load_balance_parameter
    1575             :       !   actual_x_data%load_balance_parameter = load_balance_parameter_energy
    1576             :       !   DEALLOCATE(load_balance_parameter_energy)
    1577             :       ! END IF
    1578             : 
    1579             : !$OMP MASTER
    1580             :       !! Print some memeory information if this is the first step
    1581             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
    1582             :          tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
    1583             :                          shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
    1584             :          CALL para_env%sum(tmp_i8)
    1585             :          shm_storage_counter_integrals = tmp_i8(1)
    1586             :          shm_neris_onthefly = tmp_i8(2)
    1587             :          shm_neris_incore = tmp_i8(3)
    1588             :          shm_neris_total = tmp_i8(4)
    1589             :          shm_nprim_ints = tmp_i8(5)
    1590             :          shm_stor_count_max_val = tmp_i8(6)
    1591             :          mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
    1592             :          compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
    1593             :          mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
    1594             : 
    1595             :          IF (shm_neris_incore == 0) THEN
    1596             :             mem_eris = 0
    1597             :             compression_factor = 0.0_dp
    1598             :          END IF
    1599             : 
    1600             :          logger => cp_get_default_logger()
    1601             : 
    1602             :          iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
    1603             :                                    extension=".scfLog")
    1604             :          IF (iw > 0) THEN
    1605             :             WRITE (UNIT=iw, FMT="(/,(T3,A,T65,I16))") &
    1606             :                "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated:      ", shm_nprim_ints
    1607             : 
    1608             :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    1609             :                "HFX_MEM_INFO| Number of sph. DERIV's calculated:           ", shm_neris_total
    1610             : 
    1611             :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    1612             :                "HFX_MEM_INFO| Number of sph. DERIV's stored in-core:        ", shm_neris_incore
    1613             : 
    1614             :             WRITE (UNIT=iw, FMT="((T3,A,T62,I19))") &
    1615             :                "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
    1616             : 
    1617             :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    1618             :                "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]:            ", mem_eris
    1619             : 
    1620             :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    1621             :                "HFX_MEM_INFO| Whereof max-vals [MiB]:            ", mem_max_val
    1622             : 
    1623             :             WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2),/)") &
    1624             :                "HFX_MEM_INFO| Total compression factor DERIV's RAM:                  ", compression_factor
    1625             :          END IF
    1626             : 
    1627             :          CALL cp_print_key_finished_output(iw, logger, hfx_section, &
    1628             :                                            "HF_INFO")
    1629             :       END IF
    1630             : !$OMP END MASTER
    1631             : 
    1632             :       !! flush caches if the geometry changed
    1633             :       IF (do_dynamic_load_balancing) THEN
    1634             :          my_bin_size = SIZE(actual_x_data%distribution_forces)
    1635             :       ELSE
    1636             :          my_bin_size = 1
    1637             :       END IF
    1638             : 
    1639             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
    1640             :          IF (treat_forces_in_core) THEN
    1641             :             DO bin = 1, my_bin_size
    1642             :                maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
    1643             :                maxval_container => actual_x_data%store_forces%maxval_container(bin)
    1644             :                integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
    1645             :                integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
    1646             :                CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    1647             :                                          .FALSE.)
    1648             :                DO i = 1, 64
    1649             :                   CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
    1650             :                                             memory_parameter%actual_memory_usage, .FALSE.)
    1651             :                END DO
    1652             :             END DO
    1653             :          END IF
    1654             :       END IF
    1655             :       !! reset all caches except we calculate all on the fly
    1656             :       IF (treat_forces_in_core) THEN
    1657             :          DO bin = 1, my_bin_size
    1658             :             maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
    1659             :             maxval_container => actual_x_data%store_forces%maxval_container(bin)
    1660             :             integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
    1661             :             integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
    1662             : 
    1663             :             CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
    1664             :             DO i = 1, 64
    1665             :                CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
    1666             :                                                   memory_parameter%actual_memory_usage, &
    1667             :                                                   .FALSE.)
    1668             :             END DO
    1669             :          END DO
    1670             :       END IF
    1671             : 
    1672             :       IF (actual_x_data%memory_parameter%recalc_forces) THEN
    1673             :          actual_x_data%memory_parameter%recalc_forces = .FALSE.
    1674             :       END IF
    1675             : 
    1676             : !$OMP BARRIER
    1677             : !$OMP MASTER
    1678             :       CALL timestop(handle_main)
    1679             : !$OMP END MASTER
    1680             : !$OMP BARRIER
    1681             :       DEALLOCATE (last_sgf_global)
    1682             : !$OMP MASTER
    1683             :       DEALLOCATE (full_density_alpha)
    1684             :       IF (with_resp_density) THEN
    1685             :          DEALLOCATE (full_density_resp)
    1686             :       END IF
    1687             :       IF (nspins == 2) THEN
    1688             :          DEALLOCATE (full_density_beta)
    1689             :          IF (with_resp_density) THEN
    1690             :             DEALLOCATE (full_density_resp_beta)
    1691             :          END IF
    1692             :       END IF
    1693             :       IF (do_dynamic_load_balancing) THEN
    1694             :          DEALLOCATE (actual_x_data%task_list)
    1695             :       END IF
    1696             : !$OMP END MASTER
    1697             :       DEALLOCATE (primitive_forces)
    1698             :       DEALLOCATE (atom_of_kind, kind_of)
    1699             :       DEALLOCATE (max_contraction)
    1700             :       DEALLOCATE (pbd_buf)
    1701             :       DEALLOCATE (pbc_buf)
    1702             :       DEALLOCATE (pad_buf)
    1703             :       DEALLOCATE (pac_buf)
    1704             : 
    1705             :       IF (with_resp_density) THEN
    1706             :          DEALLOCATE (pbd_buf_resp)
    1707             :          DEALLOCATE (pbc_buf_resp)
    1708             :          DEALLOCATE (pad_buf_resp)
    1709             :          DEALLOCATE (pac_buf_resp)
    1710             :       END IF
    1711             : 
    1712             :       DO i = 1, max_pgf**2
    1713             :          DEALLOCATE (pgf_list_ij(i)%image_list)
    1714             :          DEALLOCATE (pgf_list_kl(i)%image_list)
    1715             :       END DO
    1716             : 
    1717             :       DEALLOCATE (pgf_list_ij)
    1718             :       DEALLOCATE (pgf_list_kl)
    1719             :       DEALLOCATE (pgf_product_list)
    1720             : 
    1721             :       DEALLOCATE (set_list_ij, set_list_kl)
    1722             : 
    1723             :       DEALLOCATE (primitive_forces_virial)
    1724             : 
    1725             :       DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
    1726             : 
    1727             :       DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
    1728             : 
    1729             :       DEALLOCATE (nimages)
    1730             : 
    1731             :       IF (nspins == 2) THEN
    1732             :          DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
    1733             :          IF (with_resp_density) THEN
    1734             :             DEALLOCATE (pbd_buf_resp_beta)
    1735             :             DEALLOCATE (pbc_buf_resp_beta)
    1736             :             DEALLOCATE (pad_buf_resp_beta)
    1737             :             DEALLOCATE (pac_buf_resp_beta)
    1738             :          END IF
    1739             :       END IF
    1740             : 
    1741             :       !
    1742             :       ! this wraps to free_libint, but currently causes a segfault
    1743             :       ! as a result, we don't call it, but some memory remains allocated
    1744             :       !
    1745             :       ! CALL terminate_libderiv(private_deriv)
    1746             : 
    1747             : !$OMP END PARALLEL
    1748             : 
    1749        1784 :       CALL timestop(handle)
    1750             : 
    1751     3708312 :    END SUBROUTINE derivatives_four_center
    1752             : 
    1753             : ! **************************************************************************************************
    1754             : !> \brief ...
    1755             : !> \param deriv ...
    1756             : !> \param ra ...
    1757             : !> \param rb ...
    1758             : !> \param rc ...
    1759             : !> \param rd ...
    1760             : !> \param npgfa ...
    1761             : !> \param npgfb ...
    1762             : !> \param npgfc ...
    1763             : !> \param npgfd ...
    1764             : !> \param la_min ...
    1765             : !> \param la_max ...
    1766             : !> \param lb_min ...
    1767             : !> \param lb_max ...
    1768             : !> \param lc_min ...
    1769             : !> \param lc_max ...
    1770             : !> \param ld_min ...
    1771             : !> \param ld_max ...
    1772             : !> \param nsgfa ...
    1773             : !> \param nsgfb ...
    1774             : !> \param nsgfc ...
    1775             : !> \param nsgfd ...
    1776             : !> \param sphi_a_u1 ...
    1777             : !> \param sphi_a_u2 ...
    1778             : !> \param sphi_a_u3 ...
    1779             : !> \param sphi_b_u1 ...
    1780             : !> \param sphi_b_u2 ...
    1781             : !> \param sphi_b_u3 ...
    1782             : !> \param sphi_c_u1 ...
    1783             : !> \param sphi_c_u2 ...
    1784             : !> \param sphi_c_u3 ...
    1785             : !> \param sphi_d_u1 ...
    1786             : !> \param sphi_d_u2 ...
    1787             : !> \param sphi_d_u3 ...
    1788             : !> \param zeta ...
    1789             : !> \param zetb ...
    1790             : !> \param zetc ...
    1791             : !> \param zetd ...
    1792             : !> \param primitive_integrals ...
    1793             : !> \param potential_parameter ...
    1794             : !> \param neighbor_cells ...
    1795             : !> \param screen1 ...
    1796             : !> \param screen2 ...
    1797             : !> \param eps_schwarz ...
    1798             : !> \param max_contraction_val ...
    1799             : !> \param cart_estimate ...
    1800             : !> \param cell ...
    1801             : !> \param neris_tmp ...
    1802             : !> \param log10_pmax ...
    1803             : !> \param log10_eps_schwarz ...
    1804             : !> \param R1_pgf ...
    1805             : !> \param R2_pgf ...
    1806             : !> \param pgf1 ...
    1807             : !> \param pgf2 ...
    1808             : !> \param pgf_list_ij ...
    1809             : !> \param pgf_list_kl ...
    1810             : !> \param pgf_product_list ...
    1811             : !> \param nsgfl_a ...
    1812             : !> \param nsgfl_b ...
    1813             : !> \param nsgfl_c ...
    1814             : !> \param nsgfl_d ...
    1815             : !> \param sphi_a_ext ...
    1816             : !> \param sphi_b_ext ...
    1817             : !> \param sphi_c_ext ...
    1818             : !> \param sphi_d_ext ...
    1819             : !> \param ede_work ...
    1820             : !> \param ede_work2 ...
    1821             : !> \param ede_work_forces ...
    1822             : !> \param ede_buffer1 ...
    1823             : !> \param ede_buffer2 ...
    1824             : !> \param ede_primitives_tmp ...
    1825             : !> \param nimages ...
    1826             : !> \param do_periodic ...
    1827             : !> \param use_virial ...
    1828             : !> \param ede_work_virial ...
    1829             : !> \param ede_work2_virial ...
    1830             : !> \param ede_primitives_tmp_virial ...
    1831             : !> \param primitive_integrals_virial ...
    1832             : !> \param cart_estimate_virial ...
    1833             : ! **************************************************************************************************
    1834     1610523 :    SUBROUTINE forces4(deriv, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
    1835             :                       la_min, la_max, lb_min, lb_max, &
    1836             :                       lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
    1837             :                       sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    1838             :                       sphi_b_u1, sphi_b_u2, sphi_b_u3, &
    1839             :                       sphi_c_u1, sphi_c_u2, sphi_c_u3, &
    1840             :                       sphi_d_u1, sphi_d_u2, sphi_d_u3, &
    1841     1610523 :                       zeta, zetb, zetc, zetd, &
    1842     1610523 :                       primitive_integrals, &
    1843             :                       potential_parameter, neighbor_cells, &
    1844             :                       screen1, screen2, eps_schwarz, max_contraction_val, &
    1845             :                       cart_estimate, cell, neris_tmp, log10_pmax, &
    1846             :                       log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
    1847             :                       pgf_list_ij, pgf_list_kl, &
    1848             :                       pgf_product_list, &
    1849     1610523 :                       nsgfl_a, nsgfl_b, nsgfl_c, &
    1850     1610523 :                       nsgfl_d, &
    1851     1610523 :                       sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
    1852             :                       ede_work, ede_work2, ede_work_forces, &
    1853             :                       ede_buffer1, ede_buffer2, ede_primitives_tmp, &
    1854             :                       nimages, do_periodic, use_virial, ede_work_virial, &
    1855     1610523 :                       ede_work2_virial, ede_primitives_tmp_virial, primitive_integrals_virial, &
    1856             :                       cart_estimate_virial)
    1857             : 
    1858             :       TYPE(cp_libint_t)                                  :: deriv
    1859             :       REAL(dp), INTENT(IN)                               :: ra(3), rb(3), rc(3), rd(3)
    1860             :       INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
    1861             :                              lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    1862             :                              sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
    1863             :                              sphi_d_u3
    1864             :       REAL(dp), DIMENSION(1:npgfa), INTENT(IN)           :: zeta
    1865             :       REAL(dp), DIMENSION(1:npgfb), INTENT(IN)           :: zetb
    1866             :       REAL(dp), DIMENSION(1:npgfc), INTENT(IN)           :: zetc
    1867             :       REAL(dp), DIMENSION(1:npgfd), INTENT(IN)           :: zetd
    1868             :       REAL(dp), &
    1869             :          DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12)       :: primitive_integrals
    1870             :       TYPE(hfx_potential_type)                           :: potential_parameter
    1871             :       TYPE(hfx_cell_type), DIMENSION(:), POINTER         :: neighbor_cells
    1872             :       REAL(dp), INTENT(IN)                               :: screen1(2), screen2(2), eps_schwarz, &
    1873             :                                                             max_contraction_val
    1874             :       REAL(dp)                                           :: cart_estimate
    1875             :       TYPE(cell_type), POINTER                           :: cell
    1876             :       INTEGER(int_8)                                     :: neris_tmp
    1877             :       REAL(dp), INTENT(IN)                               :: log10_pmax, log10_eps_schwarz
    1878             :       TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
    1879             :          POINTER                                         :: R1_pgf, R2_pgf, pgf1, pgf2
    1880             :       TYPE(hfx_pgf_list), DIMENSION(*)                   :: pgf_list_ij, pgf_list_kl
    1881             :       TYPE(hfx_pgf_product_list), ALLOCATABLE, &
    1882             :          DIMENSION(:), INTENT(INOUT)                     :: pgf_product_list
    1883             :       INTEGER, DIMENSION(0:), INTENT(IN)                 :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
    1884             :       REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
    1885             :                               sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
    1886             :                               sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
    1887             :       REAL(dp), DIMENSION(*)                             :: ede_work, ede_work2, ede_work_forces, &
    1888             :                                                             ede_buffer1, ede_buffer2, &
    1889             :                                                             ede_primitives_tmp
    1890             :       INTEGER, DIMENSION(*)                              :: nimages
    1891             :       LOGICAL, INTENT(IN)                                :: do_periodic, use_virial
    1892             :       REAL(dp), DIMENSION(*)                             :: ede_work_virial, ede_work2_virial, &
    1893             :                                                             ede_primitives_tmp_virial
    1894             :       REAL(dp), &
    1895             :          DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12, 3)    :: primitive_integrals_virial
    1896             :       REAL(dp)                                           :: cart_estimate_virial
    1897             : 
    1898             :       INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
    1899             :                  ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
    1900             :                  nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
    1901             :       REAL(dp)                                           :: EtaInv, tmp_max, tmp_max_virial, Zeta_A, &
    1902             :                                                             Zeta_B, Zeta_C, Zeta_D, ZetaInv
    1903             : 
    1904             :       CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
    1905             :                                pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
    1906             :                                nelements_ij, &
    1907     1610523 :                                neighbor_cells, nimages, do_periodic)
    1908             :       CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
    1909             :                                pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
    1910             :                                nelements_kl, &
    1911     1610523 :                                neighbor_cells, nimages, do_periodic)
    1912             : 
    1913     1610523 :       cart_estimate = 0.0_dp
    1914   802089051 :       primitive_integrals = 0.0_dp
    1915     1610523 :       IF (use_virial) THEN
    1916   285087896 :          primitive_integrals_virial = 0.0_dp
    1917      198587 :          cart_estimate_virial = 0.0_dp
    1918             :       END IF
    1919     1610523 :       neris_tmp = 0_int_8
    1920     1610523 :       max_l = la_max + lb_max + lc_max + ld_max + 1
    1921     4313491 :       DO list_ij = 1, nelements_ij
    1922     2702968 :          Zeta_A = pgf_list_ij(list_ij)%zeta
    1923     2702968 :          Zeta_B = pgf_list_ij(list_ij)%zetb
    1924     2702968 :          ZetaInv = pgf_list_ij(list_ij)%ZetaInv
    1925     2702968 :          ipgf = pgf_list_ij(list_ij)%ipgf
    1926     2702968 :          jpgf = pgf_list_ij(list_ij)%jpgf
    1927             : 
    1928    17526790 :          DO list_kl = 1, nelements_kl
    1929    13213299 :             Zeta_C = pgf_list_kl(list_kl)%zeta
    1930    13213299 :             Zeta_D = pgf_list_kl(list_kl)%zetb
    1931    13213299 :             EtaInv = pgf_list_kl(list_kl)%ZetaInv
    1932    13213299 :             kpgf = pgf_list_kl(list_kl)%ipgf
    1933    13213299 :             lpgf = pgf_list_kl(list_kl)%jpgf
    1934             : 
    1935             :             CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
    1936             :                                         nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
    1937    13213299 :                                         potential_parameter, max_l, do_periodic)
    1938    13213299 :             s_offset_a = 0
    1939    31848436 :             DO la = la_min, la_max
    1940    15932169 :                s_offset_b = 0
    1941    15932169 :                ncoa = nco(la)
    1942    15932169 :                nsgfla = nsgfl_a(la)
    1943    15932169 :                nsoa = nso(la)
    1944             : 
    1945    32841129 :                DO lb = lb_min, lb_max
    1946    16908960 :                   s_offset_c = 0
    1947    16908960 :                   ncob = nco(lb)
    1948    16908960 :                   nsgflb = nsgfl_b(lb)
    1949    16908960 :                   nsob = nso(lb)
    1950             : 
    1951    41807806 :                   DO lc = lc_min, lc_max
    1952    24898846 :                      s_offset_d = 0
    1953    24898846 :                      ncoc = nco(lc)
    1954    24898846 :                      nsgflc = nsgfl_c(lc)
    1955    24898846 :                      nsoc = nso(lc)
    1956             : 
    1957    56680298 :                      DO ld = ld_min, ld_max
    1958    31781452 :                         ncod = nco(ld)
    1959    31781452 :                         nsgfld = nsgfl_d(ld)
    1960    31781452 :                         nsod = nso(ld)
    1961    31781452 :                         tmp_max = 0.0_dp
    1962    31781452 :                         tmp_max_virial = 0.0_dp
    1963             :                         CALL evaluate_deriv_eri(deriv, nproducts, pgf_product_list, &
    1964             :                                                 la, lb, lc, ld, &
    1965             :                                                 ncoa, ncob, ncoc, ncod, &
    1966             :                                                 nsgfa, nsgfb, nsgfc, nsgfd, &
    1967             :                                                 primitive_integrals, &
    1968             :                                                 max_contraction_val, tmp_max, eps_schwarz, neris_tmp, &
    1969             :                                                 Zeta_A, Zeta_B, Zeta_C, Zeta_D, ZetaInv, EtaInv, &
    1970             :                                                 s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
    1971             :                                                 nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
    1972             :                                                 sphi_a_ext(1, la + 1, ipgf), &
    1973             :                                                 sphi_b_ext(1, lb + 1, jpgf), &
    1974             :                                                 sphi_c_ext(1, lc + 1, kpgf), &
    1975             :                                                 sphi_d_ext(1, ld + 1, lpgf), &
    1976             :                                                 ede_work, ede_work2, ede_work_forces, &
    1977             :                                                 ede_buffer1, ede_buffer2, ede_primitives_tmp, use_virial, &
    1978             :                                                 ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial, &
    1979    31781452 :                                                 primitive_integrals_virial, cell, tmp_max_virial)
    1980    31781452 :                         cart_estimate = MAX(tmp_max, cart_estimate)
    1981    31781452 :                         IF (use_virial) cart_estimate_virial = MAX(tmp_max_virial, cart_estimate_virial)
    1982    56680298 :                         s_offset_d = s_offset_d + nsod*nsgfld
    1983             :                      END DO !ld
    1984    41807806 :                      s_offset_c = s_offset_c + nsoc*nsgflc
    1985             :                   END DO !lc
    1986    32841129 :                   s_offset_b = s_offset_b + nsob*nsgflb
    1987             :                END DO !lb
    1988    29145468 :                s_offset_a = s_offset_a + nsoa*nsgfla
    1989             :             END DO !la
    1990             :          END DO
    1991             :       END DO
    1992             : 
    1993     1610523 :    END SUBROUTINE forces4
    1994             : 
    1995             : ! **************************************************************************************************
    1996             : !> \brief Given a 2d index pair, this function returns a 1d index pair for
    1997             : !>        a symmetric upper triangle NxN matrix
    1998             : !>        The compiler should inline this function, therefore it appears in
    1999             : !>        several modules
    2000             : !> \param i 2d index
    2001             : !> \param j 2d index
    2002             : !> \param N matrix size
    2003             : !> \return ...
    2004             : !> \par History
    2005             : !>      03.2009 created [Manuel Guidon]
    2006             : !> \author Manuel Guidon
    2007             : ! **************************************************************************************************
    2008       57408 :    PURE FUNCTION get_1D_idx(i, j, N)
    2009             :       INTEGER, INTENT(IN)                                :: i, j
    2010             :       INTEGER(int_8), INTENT(IN)                         :: N
    2011             :       INTEGER(int_8)                                     :: get_1D_idx
    2012             : 
    2013             :       INTEGER(int_8)                                     :: min_ij
    2014             : 
    2015       57408 :       min_ij = MIN(i, j)
    2016       57408 :       get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
    2017             : 
    2018       57408 :    END FUNCTION get_1D_idx
    2019             : 
    2020             : ! **************************************************************************************************
    2021             : !> \brief This routine prefetches density matrix elements, i.e. reshuffles the
    2022             : !>        data in a way that they can be accessed later on in a cache friendly
    2023             : !>        way
    2024             : !> \param ma_max Size of matrix blocks
    2025             : !> \param mb_max Size of matrix blocks
    2026             : !> \param mc_max Size of matrix blocks
    2027             : !> \param md_max Size of matrix blocks
    2028             : !> \param density ...
    2029             : !> \param pbd buffer that will contain P(b,d)
    2030             : !> \param pbc buffer that will contain P(b,c)
    2031             : !> \param pad buffer that will contain P(a,d)
    2032             : !> \param pac buffer that will contain P(a,c)
    2033             : !> \param iatom ...
    2034             : !> \param jatom ...
    2035             : !> \param katom ...
    2036             : !> \param latom ...
    2037             : !> \param iset ...
    2038             : !> \param jset ...
    2039             : !> \param kset ...
    2040             : !> \param lset ...
    2041             : !> \param offset_bd_set ...
    2042             : !> \param offset_bc_set ...
    2043             : !> \param offset_ad_set ...
    2044             : !> \param offset_ac_set ...
    2045             : !> \param atomic_offset_bd ...
    2046             : !> \param atomic_offset_bc ...
    2047             : !> \param atomic_offset_ad ...
    2048             : !> \param atomic_offset_ac ...
    2049             : !> \param anti_symmetric ...
    2050             : !> \par History
    2051             : !>      03.2009 created [Manuel Guidon]
    2052             : !> \author Manuel Guidon
    2053             : ! **************************************************************************************************
    2054     2876282 :    SUBROUTINE prefetch_density_matrix(ma_max, mb_max, mc_max, md_max, &
    2055     2876282 :                                       density, pbd, pbc, pad, pac, &
    2056             :                                       iatom, jatom, katom, latom, &
    2057             :                                       iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
    2058             :                                       offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
    2059             :                                       atomic_offset_ad, atomic_offset_ac, anti_symmetric)
    2060             : 
    2061             :       INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
    2062             :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
    2063             :       REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac
    2064             :       INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
    2065             :                                                             kset, lset
    2066             :       INTEGER, DIMENSION(:, :), POINTER                  :: offset_bd_set, offset_bc_set, &
    2067             :                                                             offset_ad_set, offset_ac_set
    2068             :       INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
    2069             :                                                             atomic_offset_ad, atomic_offset_ac
    2070             :       LOGICAL                                            :: anti_symmetric
    2071             : 
    2072             :       INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
    2073             :                                                             offset_ad, offset_bc, offset_bd
    2074             :       REAL(dp)                                           :: fac
    2075             : 
    2076     2876282 :       fac = 1.0_dp
    2077     2876282 :       IF (anti_symmetric) fac = -1.0_dp
    2078     2876282 :       IF (jatom >= latom) THEN
    2079     2870724 :          i = 1
    2080     2870724 :          offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
    2081     2870724 :          j = offset_bd
    2082     8389115 :          DO md = 1, md_max
    2083    17939961 :             DO mb = 1, mb_max
    2084     9550846 :                pbd(i) = density(j)*fac
    2085     9550846 :                i = i + 1
    2086    15069237 :                j = j + 1
    2087             :             END DO
    2088             :          END DO
    2089             :       ELSE
    2090        5558 :          i = 1
    2091        5558 :          offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
    2092       13394 :          DO md = 1, md_max
    2093        7836 :             j = offset_bd + md - 1
    2094       25078 :             DO mb = 1, mb_max
    2095       11684 :                pbd(i) = density(j)
    2096       11684 :                i = i + 1
    2097       19520 :                j = j + md_max
    2098             :             END DO
    2099             :          END DO
    2100             :       END IF
    2101     2876282 :       IF (jatom >= katom) THEN
    2102     2876282 :          i = 1
    2103     2876282 :          offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
    2104     2876282 :          j = offset_bc
    2105     9384950 :          DO mc = 1, mc_max
    2106    20324752 :             DO mb = 1, mb_max
    2107    10939802 :                pbc(i) = density(j)*fac
    2108    10939802 :                i = i + 1
    2109    17448470 :                j = j + 1
    2110             :             END DO
    2111             :          END DO
    2112             :       ELSE
    2113           0 :          i = 1
    2114           0 :          offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
    2115           0 :          DO mc = 1, mc_max
    2116           0 :             j = offset_bc + mc - 1
    2117           0 :             DO mb = 1, mb_max
    2118           0 :                pbc(i) = density(j)
    2119           0 :                i = i + 1
    2120           0 :                j = j + mc_max
    2121             :             END DO
    2122             :          END DO
    2123             :       END IF
    2124     2876282 :       IF (iatom >= latom) THEN
    2125     2217722 :          i = 1
    2126     2217722 :          offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
    2127     2217722 :          j = offset_ad
    2128     6810939 :          DO md = 1, md_max
    2129    16353419 :             DO ma = 1, ma_max
    2130     9542480 :                pad(i) = density(j)*fac
    2131     9542480 :                i = i + 1
    2132    14135697 :                j = j + 1
    2133             :             END DO
    2134             :          END DO
    2135             :       ELSE
    2136      658560 :          i = 1
    2137      658560 :          offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
    2138     1591570 :          DO md = 1, md_max
    2139      933010 :             j = offset_ad + md - 1
    2140     3850832 :             DO ma = 1, ma_max
    2141     2259262 :                pad(i) = density(j)
    2142     2259262 :                i = i + 1
    2143     3192272 :                j = j + md_max
    2144             :             END DO
    2145             :          END DO
    2146             :       END IF
    2147     2876282 :       IF (iatom >= katom) THEN
    2148     2799034 :          i = 1
    2149     2799034 :          offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
    2150     2799034 :          j = offset_ac
    2151     9199648 :          DO mc = 1, mc_max
    2152    22880997 :             DO ma = 1, ma_max
    2153    13681349 :                pac(i) = density(j)*fac
    2154    13681349 :                i = i + 1
    2155    20081963 :                j = j + 1
    2156             :             END DO
    2157             :          END DO
    2158             :       ELSE
    2159       77248 :          i = 1
    2160       77248 :          offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
    2161      185302 :          DO mc = 1, mc_max
    2162      108054 :             j = offset_ac + mc - 1
    2163      511211 :             DO ma = 1, ma_max
    2164      325909 :                pac(i) = density(j)
    2165      325909 :                i = i + 1
    2166      433963 :                j = j + mc_max
    2167             :             END DO
    2168             :          END DO
    2169             :       END IF
    2170     2876282 :    END SUBROUTINE prefetch_density_matrix
    2171             : 
    2172             : ! **************************************************************************************************
    2173             : !> \brief This routine updates the forces using buffered density matrices
    2174             : !> \param ma_max Size of matrix blocks
    2175             : !> \param mb_max Size of matrix blocks
    2176             : !> \param mc_max Size of matrix blocks
    2177             : !> \param md_max Size of matrix blocks
    2178             : !> \param pbd buffer that will contain P(b,d)
    2179             : !> \param pbc buffer that will contain P(b,c)
    2180             : !> \param pad buffer that will contain P(a,d)
    2181             : !> \param pac buffer that will contain P(a,c)
    2182             : !> \param fac mulitplication factor (spin, symmetry)
    2183             : !> \param prim primitive forces
    2184             : !> \param force storage loacation for forces
    2185             : !> \param forces_map index table
    2186             : !> \param coord which of the 12 coords to be updated
    2187             : !> \param pbd_resp ...
    2188             : !> \param pbc_resp ...
    2189             : !> \param pad_resp ...
    2190             : !> \param pac_resp ...
    2191             : !> \param resp_only ...
    2192             : !> \par History
    2193             : !>      03.2009 created [Manuel Guidon]
    2194             : !> \author Manuel Guidon
    2195             : ! **************************************************************************************************
    2196    22815552 :    SUBROUTINE update_forces(ma_max, mb_max, mc_max, md_max, &
    2197             :                             pbd, pbc, pad, pac, fac, &
    2198    22815552 :                             prim, force, forces_map, coord, &
    2199             :                             pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
    2200             : 
    2201             :       INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
    2202             :       REAL(dp), DIMENSION(*), INTENT(IN)                 :: pbd, pbc, pad, pac
    2203             :       REAL(dp), INTENT(IN)                               :: fac
    2204             :       REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
    2205             :          INTENT(IN)                                      :: prim
    2206             :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    2207             :       INTEGER, INTENT(IN)                                :: forces_map(4, 2), coord
    2208             :       REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL       :: pbd_resp, pbc_resp, pad_resp, pac_resp
    2209             :       LOGICAL, INTENT(IN), OPTIONAL                      :: resp_only
    2210             : 
    2211             :       INTEGER                                            :: ma, mb, mc, md, p_index
    2212             :       LOGICAL                                            :: full_force, with_resp_density
    2213             :       REAL(dp)                                           :: temp1, temp1_resp, temp3, temp3_resp, &
    2214             :                                                             temp4, teresp
    2215             : 
    2216    22815552 :       with_resp_density = .FALSE.
    2217             :       IF (PRESENT(pbd_resp) .AND. &
    2218             :           PRESENT(pbc_resp) .AND. &
    2219    22815552 :           PRESENT(pad_resp) .AND. &
    2220             :           PRESENT(pac_resp)) with_resp_density = .TRUE.
    2221             : 
    2222             :       IF (with_resp_density) THEN
    2223    11668620 :          full_force = .TRUE.
    2224    11668620 :          IF (PRESENT(resp_only)) full_force = .NOT. resp_only
    2225    11668620 :          p_index = 0
    2226    11668620 :          temp4 = 0.0_dp
    2227    33818064 :          DO md = 1, md_max
    2228    87186876 :             DO mc = 1, mc_max
    2229   176449044 :                DO mb = 1, mb_max
    2230   100930788 :                   temp1 = pbc((mc - 1)*mb_max + mb)*fac
    2231   100930788 :                   temp3 = pbd((md - 1)*mb_max + mb)*fac
    2232   100930788 :                   temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
    2233   100930788 :                   temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
    2234   433302132 :                   DO ma = 1, ma_max
    2235   279002532 :                      p_index = p_index + 1
    2236             :                      ! HF-SCF
    2237   279002532 :                      IF (full_force) THEN
    2238             :                         teresp = temp1*pad((md - 1)*ma_max + ma) + &
    2239    98082384 :                                  temp3*pac((mc - 1)*ma_max + ma)
    2240             :                      ELSE
    2241             :                         teresp = 0.0_dp
    2242             :                      END IF
    2243             :                      ! RESP+HF
    2244             :                      teresp = teresp + &
    2245             :                               pac((mc - 1)*ma_max + ma)*temp3_resp + &
    2246             :                               pac_resp((mc - 1)*ma_max + ma)*temp3 + &
    2247             :                               pad((md - 1)*ma_max + ma)*temp1_resp + &
    2248   279002532 :                               pad_resp((md - 1)*ma_max + ma)*temp1
    2249   379933320 :                      temp4 = temp4 + teresp*prim(p_index)
    2250             :                   END DO !ma
    2251             :                END DO !mb
    2252             :             END DO !mc
    2253             :          END DO !md
    2254             :       ELSE
    2255             :          p_index = 0
    2256             :          temp4 = 0.0_dp
    2257    33124572 :          DO md = 1, md_max
    2258    92439276 :             DO mc = 1, mc_max
    2259   198196464 :                DO mb = 1, mb_max
    2260   116904120 :                   temp1 = pbc((mc - 1)*mb_max + mb)*fac
    2261   116904120 :                   temp3 = pbd((md - 1)*mb_max + mb)*fac
    2262   569013240 :                   DO ma = 1, ma_max
    2263   392794416 :                      p_index = p_index + 1
    2264             :                      teresp = temp1*pad((md - 1)*ma_max + ma) + &
    2265   392794416 :                               temp3*pac((mc - 1)*ma_max + ma)
    2266   509698536 :                      temp4 = temp4 + teresp*prim(p_index)
    2267             :                   END DO !ma
    2268             :                END DO !mb
    2269             :             END DO !mc
    2270             :          END DO !md
    2271             :       END IF
    2272             : 
    2273    22815552 : !$OMP ATOMIC
    2274             :       force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
    2275             :                                                       forces_map((coord - 1)/3 + 1, 2)) = &
    2276             :          force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
    2277             :                                                          forces_map((coord - 1)/3 + 1, 2)) - &
    2278             :          temp4
    2279             : 
    2280    22815552 :    END SUBROUTINE update_forces
    2281             : 
    2282             : ! **************************************************************************************************
    2283             : !> \brief This routine updates the virial using buffered density matrices
    2284             : !> \param ma_max Size of matrix blocks
    2285             : !> \param mb_max Size of matrix blocks
    2286             : !> \param mc_max Size of matrix blocks
    2287             : !> \param md_max Size of matrix blocks
    2288             : !> \param pbd buffer that will contain P(b,d)
    2289             : !> \param pbc buffer that will contain P(b,c)
    2290             : !> \param pad buffer that will contain P(a,d)
    2291             : !> \param pac buffer that will contain P(a,c)
    2292             : !> \param fac mulitplication factor (spin, symmetry)
    2293             : !> \param prim primitive forces
    2294             : !> \param tmp_virial ...
    2295             : !> \param coord which of the 12 coords to be updated
    2296             : !> \param l ...
    2297             : !> \param pbd_resp ...
    2298             : !> \param pbc_resp ...
    2299             : !> \param pad_resp ...
    2300             : !> \param pac_resp ...
    2301             : !> \par History
    2302             : !>      03.2009 created [Manuel Guidon]
    2303             : !> \author Manuel Guidon
    2304             : ! **************************************************************************************************
    2305     6323868 :    SUBROUTINE update_virial(ma_max, mb_max, mc_max, md_max, &
    2306             :                             pbd, pbc, pad, pac, fac, &
    2307     6323868 :                             prim, tmp_virial, coord, l, &
    2308             :                             pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
    2309             : 
    2310             :       INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
    2311             :       REAL(dp), DIMENSION(*), INTENT(IN)                 :: pbd, pbc, pad, pac
    2312             :       REAL(dp), INTENT(IN)                               :: fac
    2313             :       REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
    2314             :          INTENT(IN)                                      :: prim
    2315             :       REAL(dp)                                           :: tmp_virial(3, 3)
    2316             :       INTEGER, INTENT(IN)                                :: coord, l
    2317             :       REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL       :: pbd_resp, pbc_resp, pad_resp, pac_resp
    2318             :       LOGICAL, INTENT(IN), OPTIONAL                      :: resp_only
    2319             : 
    2320             :       INTEGER                                            :: i, j, ma, mb, mc, md, p_index
    2321             :       LOGICAL                                            :: with_resp_density, full_force
    2322             :       REAL(dp)                                           :: temp1, temp1_resp, temp3, temp3_resp, &
    2323             :                                                             temp4, teresp
    2324             : 
    2325     6323868 :       with_resp_density = .FALSE.
    2326             :       IF (PRESENT(pbd_resp) .AND. &
    2327             :           PRESENT(pbc_resp) .AND. &
    2328     6323868 :           PRESENT(pad_resp) .AND. &
    2329             :           PRESENT(pac_resp)) with_resp_density = .TRUE.
    2330             : 
    2331             :       IF (with_resp_density) THEN
    2332     5587128 :          full_force = .TRUE.
    2333     5587128 :          IF (PRESENT(resp_only)) full_force = .NOT. resp_only
    2334     5587128 :          p_index = 0
    2335     5587128 :          temp4 = 0.0_dp
    2336    16806312 :          DO md = 1, md_max
    2337    41978952 :             DO mc = 1, mc_max
    2338    91230912 :                DO mb = 1, mb_max
    2339    54839088 :                   temp1 = pbc((mc - 1)*mb_max + mb)*fac
    2340    54839088 :                   temp3 = pbd((md - 1)*mb_max + mb)*fac
    2341    54839088 :                   temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
    2342    54839088 :                   temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
    2343   236616804 :                   DO ma = 1, ma_max
    2344   156605076 :                      p_index = p_index + 1
    2345             :                      ! HF-SCF
    2346   156605076 :                      IF (full_force) THEN
    2347             :                         teresp = temp1*pad((md - 1)*ma_max + ma) + &
    2348    25169472 :                                  temp3*pac((mc - 1)*ma_max + ma)
    2349             :                      ELSE
    2350             :                         teresp = 0.0_dp
    2351             :                      END IF
    2352             :                      ! RESP+HF
    2353             :                      teresp = teresp + &
    2354             :                               pac((mc - 1)*ma_max + ma)*temp3_resp + &
    2355             :                               pac_resp((mc - 1)*ma_max + ma)*temp3 + &
    2356             :                               pad((md - 1)*ma_max + ma)*temp1_resp + &
    2357   156605076 :                               pad_resp((md - 1)*ma_max + ma)*temp1
    2358   211444164 :                      temp4 = temp4 + teresp*prim(p_index)
    2359             :                   END DO !ma
    2360             :                END DO !mb
    2361             :             END DO !mc
    2362             :          END DO !md
    2363             :       ELSE
    2364             :          p_index = 0
    2365             :          temp4 = 0.0_dp
    2366     1830528 :          DO md = 1, md_max
    2367     3917916 :             DO mc = 1, mc_max
    2368     5776452 :                DO mb = 1, mb_max
    2369     2595276 :                   temp1 = pbc((mc - 1)*mb_max + mb)*fac
    2370     2595276 :                   temp3 = pbd((md - 1)*mb_max + mb)*fac
    2371     9136800 :                   DO ma = 1, ma_max
    2372     4454136 :                      p_index = p_index + 1
    2373             :                      teresp = temp1*pad((md - 1)*ma_max + ma) + &
    2374     4454136 :                               temp3*pac((mc - 1)*ma_max + ma)
    2375     7049412 :                      temp4 = temp4 + teresp*prim(p_index)
    2376             :                   END DO !ma
    2377             :                END DO !mb
    2378             :             END DO !mc
    2379             :          END DO !md
    2380             :       END IF
    2381             : 
    2382     6323868 :       j = l
    2383     6323868 :       i = MOD(coord - 1, 3) + 1
    2384     6323868 :       tmp_virial(i, j) = tmp_virial(i, j) - temp4
    2385     6323868 :    END SUBROUTINE update_virial
    2386             : 
    2387             :    #:include 'hfx_get_pmax_val.fypp'
    2388             : 
    2389             : END MODULE hfx_derivatives

Generated by: LCOV version 1.15