Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief RI-methods for HFX
10 : ! **************************************************************************************************
11 :
12 : MODULE hfx_ri
13 :
14 : USE OMP_LIB, ONLY: omp_get_num_threads,&
15 : omp_get_thread_num
16 : USE arnoldi_api, ONLY: arnoldi_extremal
17 : USE atomic_kind_types, ONLY: atomic_kind_type,&
18 : get_atomic_kind_set
19 : USE basis_set_types, ONLY: gto_basis_set_p_type,&
20 : gto_basis_set_type
21 : USE cell_types, ONLY: cell_type
22 : USE cp_blacs_env, ONLY: cp_blacs_env_type
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_dbcsr_api, ONLY: &
25 : dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
26 : dbcsr_distribution_get, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
27 : dbcsr_get_info, dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
28 : dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
29 : dbcsr_type_symmetric
30 : USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
31 : cp_dbcsr_cholesky_invert
32 : USE cp_dbcsr_contrib, ONLY: dbcsr_add_on_diag,&
33 : dbcsr_dot,&
34 : dbcsr_frobenius_norm
35 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
36 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
37 : copy_fm_to_dbcsr,&
38 : cp_dbcsr_dist2d_to_dist,&
39 : dbcsr_deallocate_matrix_set
40 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
41 : cp_fm_struct_release,&
42 : cp_fm_struct_type
43 : USE cp_fm_types, ONLY: cp_fm_create,&
44 : cp_fm_release,&
45 : cp_fm_type,&
46 : cp_fm_write_unformatted
47 : USE cp_log_handling, ONLY: cp_get_default_logger,&
48 : cp_logger_type
49 : USE cp_output_handling, ONLY: cp_p_file,&
50 : cp_print_key_finished_output,&
51 : cp_print_key_should_output,&
52 : cp_print_key_unit_nr
53 : USE dbt_api, ONLY: &
54 : dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
55 : dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
56 : dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
57 : dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
58 : dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
59 : dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
60 : dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
61 : USE distribution_2d_types, ONLY: distribution_2d_type
62 : USE hfx_types, ONLY: alloc_containers,&
63 : block_ind_type,&
64 : dealloc_containers,&
65 : hfx_compression_type,&
66 : hfx_ri_init,&
67 : hfx_ri_release,&
68 : hfx_ri_type
69 : USE input_constants, ONLY: hfx_ri_do_2c_cholesky,&
70 : hfx_ri_do_2c_diag,&
71 : hfx_ri_do_2c_iter
72 : USE input_cp2k_hfx, ONLY: ri_mo,&
73 : ri_pmat
74 : USE input_section_types, ONLY: section_vals_get_subs_vals,&
75 : section_vals_type,&
76 : section_vals_val_get
77 : USE iterate_matrix, ONLY: invert_hotelling,&
78 : matrix_sqrt_newton_schulz
79 : USE kinds, ONLY: default_string_length,&
80 : dp,&
81 : int_8
82 : USE machine, ONLY: m_walltime
83 : USE message_passing, ONLY: mp_cart_type,&
84 : mp_comm_type,&
85 : mp_para_env_type
86 : USE particle_methods, ONLY: get_particle_set
87 : USE particle_types, ONLY: particle_type
88 : USE qs_environment_types, ONLY: get_qs_env,&
89 : qs_environment_type
90 : USE qs_force_types, ONLY: qs_force_type
91 : USE qs_integral_utils, ONLY: basis_set_list_setup
92 : USE qs_interactions, ONLY: init_interaction_radii_orb_basis
93 : USE qs_kind_types, ONLY: qs_kind_type
94 : USE qs_ks_types, ONLY: qs_ks_env_type
95 : USE qs_mo_types, ONLY: get_mo_set,&
96 : mo_set_type
97 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type,&
98 : release_neighbor_list_sets
99 : USE qs_rho_types, ONLY: qs_rho_get,&
100 : qs_rho_type
101 : USE qs_tensors, ONLY: &
102 : build_2c_derivatives, build_2c_integrals, build_2c_neighbor_lists, build_3c_derivatives, &
103 : build_3c_integrals, build_3c_neighbor_lists, calc_2c_virial, calc_3c_virial, &
104 : compress_tensor, decompress_tensor, get_tensor_occupancy, neighbor_list_3c_destroy
105 : USE qs_tensors_types, ONLY: create_2c_tensor,&
106 : create_3c_tensor,&
107 : create_tensor_batches,&
108 : distribution_3d_create,&
109 : distribution_3d_type,&
110 : neighbor_list_3c_type,&
111 : split_block_sizes
112 : USE util, ONLY: sort
113 : USE virial_types, ONLY: virial_type
114 : #include "./base/base_uses.f90"
115 :
116 : !$ USE OMP_LIB, ONLY: omp_get_num_threads
117 :
118 : IMPLICIT NONE
119 : PRIVATE
120 :
121 : PUBLIC :: hfx_ri_update_ks, hfx_ri_update_forces, get_force_from_3c_trace, get_2c_der_force, &
122 : get_idx_to_atom, print_ri_hfx, hfx_ri_pre_scf_calc_tensors, check_inverse
123 :
124 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
125 : CONTAINS
126 :
127 : ! **************************************************************************************************
128 : !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
129 : !> \param ri_data ...
130 : !> \param qs_env ...
131 : !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
132 : ! **************************************************************************************************
133 22 : SUBROUTINE switch_ri_flavor(ri_data, qs_env)
134 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
135 : TYPE(qs_environment_type), POINTER :: qs_env
136 :
137 : CHARACTER(LEN=*), PARAMETER :: routineN = 'switch_ri_flavor'
138 :
139 : INTEGER :: handle, n_mem, new_flavor
140 22 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
141 : TYPE(dft_control_type), POINTER :: dft_control
142 : TYPE(mp_para_env_type), POINTER :: para_env
143 22 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
144 22 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
145 :
146 22 : NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
147 :
148 22 : CALL timeset(routineN, handle)
149 :
150 : CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
151 22 : particle_set=particle_set, qs_kind_set=qs_kind_set)
152 :
153 22 : CALL hfx_ri_release(ri_data, write_stats=.FALSE.)
154 :
155 22 : IF (ri_data%flavor == ri_pmat) THEN
156 : new_flavor = ri_mo
157 : ELSE
158 12 : new_flavor = ri_pmat
159 : END IF
160 22 : ri_data%flavor = new_flavor
161 :
162 22 : n_mem = ri_data%n_mem_input
163 22 : ri_data%n_mem_input = ri_data%n_mem_flavor_switch
164 22 : ri_data%n_mem_flavor_switch = n_mem
165 :
166 22 : CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
167 :
168 : !Need to recalculate the integrals in the new flavor
169 : !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
170 : ! that only makes sense if actual integral calculation is not negligible
171 22 : IF (ri_data%flavor == ri_pmat) THEN
172 12 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
173 : ELSE
174 10 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
175 : END IF
176 :
177 22 : IF (ri_data%unit_nr > 0) THEN
178 0 : IF (ri_data%flavor == ri_pmat) THEN
179 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
180 : ELSE
181 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
182 : END IF
183 : END IF
184 :
185 22 : CALL timestop(handle)
186 :
187 22 : END SUBROUTINE switch_ri_flavor
188 :
189 : ! **************************************************************************************************
190 : !> \brief Pre-SCF steps in MO flavor of RI HFX
191 : !>
192 : !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
193 : !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
194 : !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
195 : !> \param qs_env ...
196 : !> \param ri_data ...
197 : !> \param nspins ...
198 : ! **************************************************************************************************
199 26 : SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
200 : TYPE(qs_environment_type), POINTER :: qs_env
201 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
202 : INTEGER, INTENT(IN) :: nspins
203 :
204 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_mo'
205 :
206 : INTEGER :: handle, handle2, ispin, n_dependent, &
207 : unit_nr, unit_nr_dbcsr
208 : REAL(KIND=dp) :: threshold
209 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
210 156 : TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
211 130 : t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
212 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
213 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
214 : TYPE(mp_para_env_type), POINTER :: para_env
215 :
216 26 : CALL timeset(routineN, handle)
217 :
218 26 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
219 26 : unit_nr = ri_data%unit_nr
220 :
221 26 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
222 :
223 26 : CALL timeset(routineN//"_int", handle2)
224 :
225 806 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
226 26 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int)
227 :
228 26 : CALL timestop(handle2)
229 :
230 26 : CALL timeset(routineN//"_2c", handle2)
231 26 : IF (.NOT. ri_data%same_op) THEN
232 4 : SELECT CASE (ri_data%t2c_method)
233 : CASE (hfx_ri_do_2c_iter)
234 0 : CALL dbcsr_create(t_2c_op_RI_inv(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
235 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
236 0 : CALL invert_hotelling(t_2c_op_RI_inv(1), t_2c_op_RI(1), threshold=threshold, silent=.FALSE.)
237 : CASE (hfx_ri_do_2c_cholesky)
238 4 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
239 4 : CALL cp_dbcsr_cholesky_decompose(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env)
240 4 : CALL cp_dbcsr_cholesky_invert(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
241 : CASE (hfx_ri_do_2c_diag)
242 0 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
243 : CALL cp_dbcsr_power(t_2c_op_RI_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
244 4 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
245 : END SELECT
246 :
247 4 : IF (ri_data%check_2c_inv) THEN
248 0 : CALL check_inverse(t_2c_op_RI_inv(1), t_2c_op_RI(1), unit_nr=unit_nr)
249 : END IF
250 :
251 4 : CALL dbcsr_release(t_2c_op_RI(1))
252 :
253 4 : SELECT CASE (ri_data%t2c_method)
254 : CASE (hfx_ri_do_2c_iter)
255 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
256 0 : CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
257 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
258 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
259 0 : ri_data%max_iter_lanczos)
260 :
261 0 : CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
262 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
263 4 : CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
264 : CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
265 8 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
266 : END SELECT
267 :
268 : !We need S^-1 and (P|Q) for the forces.
269 4 : CALL dbt_create(t_2c_op_RI_inv(1), t_2c_work(1))
270 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_RI_inv(1), t_2c_work(1))
271 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
272 4 : CALL dbt_destroy(t_2c_work(1))
273 4 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
274 :
275 4 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
276 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
277 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
278 4 : CALL dbt_destroy(t_2c_work(1))
279 4 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
280 :
281 4 : IF (ri_data%check_2c_inv) THEN
282 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
283 : END IF
284 4 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
285 : CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_RI_inv(1), t_2c_op_pot_sqrt(1), &
286 4 : 0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
287 4 : CALL dbcsr_release(t_2c_op_RI_inv(1))
288 4 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
289 : ELSE
290 22 : SELECT CASE (ri_data%t2c_method)
291 : CASE (hfx_ri_do_2c_iter)
292 0 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
293 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
294 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
295 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
296 0 : ri_data%max_iter_lanczos)
297 0 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
298 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
299 22 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
300 : CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
301 44 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
302 : END SELECT
303 22 : IF (ri_data%check_2c_inv) THEN
304 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
305 : END IF
306 :
307 : !We need (P|Q)^-1 for the forces
308 22 : CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
309 22 : CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
310 22 : CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
311 22 : CALL dbcsr_release(dbcsr_work_1(1))
312 22 : CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
313 22 : CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
314 22 : CALL dbcsr_release(dbcsr_work_2(1))
315 22 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
316 22 : CALL dbt_destroy(t_2c_work(1))
317 22 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
318 : END IF
319 :
320 26 : CALL dbcsr_release(t_2c_op_pot(1))
321 :
322 26 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
323 26 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
324 26 : CALL dbcsr_release(t_2c_int_mat(1))
325 58 : DO ispin = 1, nspins
326 58 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
327 : END DO
328 26 : CALL dbt_destroy(t_2c_int(1))
329 26 : CALL timestop(handle2)
330 :
331 26 : CALL timeset(routineN//"_3c", handle2)
332 26 : CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
333 26 : CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
334 26 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
335 26 : CALL dbt_destroy(t_3c_int(1, 1))
336 26 : CALL timestop(handle2)
337 :
338 26 : CALL timestop(handle)
339 :
340 260 : END SUBROUTINE
341 :
342 : ! **************************************************************************************************
343 : !> \brief ...
344 : !> \param matrix_1 ...
345 : !> \param matrix_2 ...
346 : !> \param name ...
347 : !> \param unit_nr ...
348 : ! **************************************************************************************************
349 0 : SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
350 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_1, matrix_2
351 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
352 : INTEGER, INTENT(IN) :: unit_nr
353 :
354 : CHARACTER(len=default_string_length) :: name_prv
355 : REAL(KIND=dp) :: error, frob_matrix, frob_matrix_base
356 : TYPE(dbcsr_type) :: matrix_tmp
357 :
358 0 : IF (PRESENT(name)) THEN
359 0 : name_prv = name
360 : ELSE
361 0 : CALL dbcsr_get_info(matrix_1, name=name_prv)
362 : END IF
363 :
364 0 : CALL dbcsr_create(matrix_tmp, template=matrix_1)
365 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
366 0 : 0.0_dp, matrix_tmp)
367 0 : frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
368 0 : CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
369 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
370 0 : error = frob_matrix/frob_matrix_base
371 0 : IF (unit_nr > 0) THEN
372 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
373 0 : "HFX_RI_INFO| Error for INV(", TRIM(name_prv), "):", error
374 : END IF
375 :
376 0 : CALL dbcsr_release(matrix_tmp)
377 0 : END SUBROUTINE
378 :
379 : ! **************************************************************************************************
380 : !> \brief ...
381 : !> \param matrix ...
382 : !> \param matrix_sqrt ...
383 : !> \param matrix_sqrt_inv ...
384 : !> \param name ...
385 : !> \param unit_nr ...
386 : ! **************************************************************************************************
387 0 : SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
388 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix
389 : TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: matrix_sqrt, matrix_sqrt_inv
390 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
391 : INTEGER, INTENT(IN) :: unit_nr
392 :
393 : CHARACTER(len=default_string_length) :: name_prv
394 : REAL(KIND=dp) :: frob_matrix
395 : TYPE(dbcsr_type) :: matrix_copy, matrix_tmp
396 :
397 0 : IF (PRESENT(name)) THEN
398 0 : name_prv = name
399 : ELSE
400 0 : CALL dbcsr_get_info(matrix, name=name_prv)
401 : END IF
402 0 : IF (PRESENT(matrix_sqrt)) THEN
403 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
404 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt)
405 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
406 0 : 0.0_dp, matrix_tmp)
407 0 : CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
408 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
409 0 : IF (unit_nr > 0) THEN
410 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
411 0 : "HFX_RI_INFO| Error for SQRT(", TRIM(name_prv), "):", frob_matrix
412 : END IF
413 0 : CALL dbcsr_release(matrix_tmp)
414 0 : CALL dbcsr_release(matrix_copy)
415 : END IF
416 :
417 0 : IF (PRESENT(matrix_sqrt_inv)) THEN
418 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
419 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
420 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
421 0 : 0.0_dp, matrix_tmp)
422 0 : CALL check_inverse(matrix_tmp, matrix, name="SQRT("//TRIM(name_prv)//")", unit_nr=unit_nr)
423 0 : CALL dbcsr_release(matrix_tmp)
424 0 : CALL dbcsr_release(matrix_copy)
425 : END IF
426 :
427 0 : END SUBROUTINE
428 :
429 : ! **************************************************************************************************
430 : !> \brief Calculate 2-center and 3-center integrals
431 : !>
432 : !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
433 : !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
434 : !> v_1 is HF operator, v_2 is RI metric
435 : !> \param qs_env ...
436 : !> \param ri_data ...
437 : !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
438 : !> \param t_2c_int_pot K_1(P, R)
439 : !> \param t_3c_int int_3c(mu, lambda, P)
440 : !> \param do_kpoints ...
441 : !> \notes the integral tensor arrays are already allocated on entry
442 : ! **************************************************************************************************
443 4004 : SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
444 : TYPE(qs_environment_type), POINTER :: qs_env
445 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
446 : TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT) :: t_2c_int_RI, t_2c_int_pot
447 : TYPE(dbt_type), DIMENSION(:, :) :: t_3c_int
448 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
449 :
450 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_calc_tensors'
451 :
452 : CHARACTER :: symm
453 : INTEGER :: handle, i_img, i_mem, ibasis, j_img, &
454 : n_mem, natom, nblks, nimg, nkind, &
455 : nthreads
456 : INTEGER(int_8) :: nze
457 178 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, dist_RI_ext, &
458 356 : ends_array_mc_block_int, ends_array_mc_int, sizes_AO, sizes_RI, sizes_RI_ext, &
459 178 : sizes_RI_ext_split, starts_array_mc_block_int, starts_array_mc_int
460 : INTEGER, DIMENSION(3) :: pcoord, pdims
461 356 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
462 : LOGICAL :: converged, do_kpoints_prv
463 : REAL(dp) :: max_ev, min_ev, occ, RI_range
464 178 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
465 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
466 1246 : TYPE(dbt_distribution_type) :: t_dist
467 534 : TYPE(dbt_pgrid_type) :: pgrid
468 1246 : TYPE(dbt_type) :: t_3c_tmp
469 178 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
470 : TYPE(dft_control_type), POINTER :: dft_control
471 : TYPE(distribution_2d_type), POINTER :: dist_2d
472 : TYPE(distribution_3d_type) :: dist_3d
473 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
474 178 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
475 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
476 178 : TYPE(mp_cart_type) :: mp_comm_t3c
477 : TYPE(mp_para_env_type), POINTER :: para_env
478 : TYPE(neighbor_list_3c_type) :: nl_3c
479 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
480 178 : POINTER :: nl_2c_pot, nl_2c_RI
481 178 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
482 178 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
483 : TYPE(qs_ks_env_type), POINTER :: ks_env
484 :
485 178 : CALL timeset(routineN, handle)
486 178 : NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_RI, &
487 178 : particle_set, qs_kind_set, ks_env, para_env)
488 :
489 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
490 178 : distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
491 :
492 178 : RI_range = 0.0_dp
493 178 : do_kpoints_prv = .FALSE.
494 178 : IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
495 178 : nimg = 1
496 178 : IF (do_kpoints_prv) THEN
497 70 : nimg = ri_data%nimg
498 70 : RI_range = ri_data%kp_RI_range
499 : END IF
500 :
501 712 : ALLOCATE (sizes_RI(natom), sizes_AO(natom))
502 1332 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
503 178 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
504 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_RI, basis=basis_set_RI)
505 178 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
506 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_AO, basis=basis_set_AO)
507 :
508 488 : DO ibasis = 1, SIZE(basis_set_AO)
509 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
510 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
511 : ! interaction radii should be based on eps_pgf_orb controlled in RI section
512 : ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
513 : ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
514 310 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
515 488 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
516 : END DO
517 :
518 178 : n_mem = ri_data%n_mem
519 : CALL create_tensor_batches(sizes_RI, n_mem, starts_array_mc_int, ends_array_mc_int, &
520 : starts_array_mc_block_int, ends_array_mc_block_int)
521 :
522 178 : DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
523 :
524 : !We separate the K-points and standard 3c integrals, because handle quite differently
525 178 : IF (.NOT. do_kpoints_prv) THEN
526 :
527 108 : nthreads = 1
528 108 : !$ nthreads = omp_get_num_threads()
529 108 : pdims = 0
530 432 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(n_mem*nthreads)), natom, natom])
531 :
532 972 : ALLOCATE (t_3c_int_batched(1, 1))
533 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_RI, dist_AO_1, dist_AO_2, pgrid, &
534 : sizes_RI, sizes_AO, sizes_AO, map1=[1], map2=[2, 3], &
535 108 : name="(RI | AO AO)")
536 :
537 108 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
538 108 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
539 108 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
540 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
541 108 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
542 108 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
543 :
544 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
545 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
546 : map1=[1], map2=[2, 3], &
547 108 : name="O (RI AO | AO)")
548 :
549 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
550 108 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
551 :
552 424 : DO i_mem = 1, n_mem
553 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
554 : basis_set_RI, basis_set_AO, basis_set_AO, &
555 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
556 : desymmetrize=.FALSE., &
557 948 : bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
558 316 : CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.TRUE., move_data=.TRUE.)
559 424 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
560 : END DO
561 :
562 108 : CALL dbt_destroy(t_3c_int_batched(1, 1))
563 :
564 108 : CALL neighbor_list_3c_destroy(nl_3c)
565 :
566 108 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
567 :
568 108 : IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
569 : ! desymmetrize
570 82 : CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
571 82 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
572 :
573 : ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
574 : ! with RI metric but not to bare integral tensor
575 82 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
576 : ELSE
577 26 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
578 : END IF
579 :
580 108 : CALL dbt_destroy(t_3c_tmp)
581 :
582 : ELSE !do_kpoints
583 :
584 70 : nthreads = 1
585 70 : !$ nthreads = omp_get_num_threads()
586 70 : pdims = 0
587 280 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, MAX(1, natom/(n_mem*nthreads))])
588 :
589 : !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
590 : !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
591 70 : nblks = SIZE(ri_data%bsizes_RI_split)
592 350 : ALLOCATE (sizes_RI_ext(natom*ri_data%ncell_RI), sizes_RI_ext_split(nblks*ri_data%ncell_RI))
593 506 : DO i_img = 1, ri_data%ncell_RI
594 1308 : sizes_RI_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_RI(:)
595 2344 : sizes_RI_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
596 : END DO
597 :
598 : CALL create_3c_tensor(t_3c_tmp, dist_AO_1, dist_AO_2, dist_RI, &
599 : pgrid, sizes_AO, sizes_AO, sizes_RI, map1=[1, 2], map2=[3], &
600 70 : name="(AO AO | RI)")
601 70 : CALL dbt_destroy(t_3c_tmp)
602 :
603 : !For the integrals to work, the distribution along the RI direction must be repeated
604 210 : ALLOCATE (dist_RI_ext(natom*ri_data%ncell_RI))
605 506 : DO i_img = 1, ri_data%ncell_RI
606 1378 : dist_RI_ext((i_img - 1)*natom + 1:i_img*natom) = dist_RI(:)
607 : END DO
608 :
609 2416 : ALLOCATE (t_3c_int_batched(nimg, 1))
610 70 : CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
611 : CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
612 70 : sizes_AO, sizes_AO, sizes_RI_ext)
613 1716 : DO i_img = 2, nimg
614 1716 : CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
615 : END DO
616 70 : CALL dbt_distribution_destroy(t_dist)
617 :
618 70 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
619 70 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
620 70 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
621 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
622 70 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
623 70 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
624 :
625 : ! create 3c tensor for storage of ints
626 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
627 70 : "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., own_dist=.TRUE.)
628 :
629 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
630 : sizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
631 : map1=[1], map2=[2, 3], &
632 70 : name="O (RI AO | AO)")
633 1716 : DO j_img = 2, nimg
634 1716 : CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
635 : END DO
636 :
637 70 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
638 192 : DO i_mem = 1, n_mem
639 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
640 : basis_set_AO, basis_set_AO, basis_set_RI, &
641 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
642 : desymmetrize=.FALSE., do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
643 : bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
644 366 : RI_range=RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
645 :
646 3440 : DO i_img = 1, nimg
647 : !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
648 3248 : CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
649 3370 : IF (nze > 0) THEN
650 2154 : CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.TRUE.)
651 2154 : CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
652 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
653 2154 : summation=.TRUE., move_data=.TRUE.)
654 : END IF
655 : END DO
656 : END DO
657 :
658 1786 : DO i_img = 1, nimg
659 1786 : CALL dbt_destroy(t_3c_int_batched(i_img, 1))
660 : END DO
661 1786 : DEALLOCATE (t_3c_int_batched)
662 70 : CALL neighbor_list_3c_destroy(nl_3c)
663 70 : CALL dbt_destroy(t_3c_tmp)
664 : END IF
665 178 : CALL dbt_pgrid_destroy(pgrid)
666 :
667 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
668 : "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
669 178 : dist_2d=dist_2d)
670 :
671 178 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
672 534 : ALLOCATE (row_bsize(SIZE(sizes_RI)))
673 356 : ALLOCATE (col_bsize(SIZE(sizes_RI)))
674 676 : row_bsize(:) = sizes_RI
675 676 : col_bsize(:) = sizes_RI
676 :
677 : !Use non-symmetric nl and matrices for k-points
678 178 : symm = dbcsr_type_symmetric
679 178 : IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
680 :
681 178 : CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
682 1824 : DO i_img = 2, nimg
683 1824 : CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
684 : END DO
685 :
686 178 : IF (.NOT. ri_data%same_op) THEN
687 88 : CALL dbcsr_create(t_2c_int_RI(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
688 1734 : DO i_img = 2, nimg
689 1734 : CALL dbcsr_create(t_2c_int_RI(i_img), template=t_2c_int_RI(1))
690 : END DO
691 : END IF
692 178 : DEALLOCATE (col_bsize, row_bsize)
693 :
694 178 : CALL dbcsr_distribution_release(dbcsr_dist)
695 :
696 : CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_RI, basis_set_RI, &
697 178 : ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
698 178 : CALL release_neighbor_list_sets(nl_2c_pot)
699 :
700 178 : IF (.NOT. ri_data%same_op) THEN
701 : CALL build_2c_neighbor_lists(nl_2c_RI, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
702 : "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
703 88 : dist_2d=dist_2d)
704 :
705 : CALL build_2c_integrals(t_2c_int_RI, ri_data%filter_eps_2c, qs_env, nl_2c_RI, basis_set_RI, basis_set_RI, &
706 88 : ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
707 :
708 88 : CALL release_neighbor_list_sets(nl_2c_RI)
709 : END IF
710 :
711 488 : DO ibasis = 1, SIZE(basis_set_AO)
712 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
713 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
714 : ! reset interaction radii of orb basis
715 310 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
716 488 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
717 : END DO
718 :
719 178 : IF (ri_data%calc_condnum) THEN
720 : CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
721 4 : max_iter=ri_data%max_iter_lanczos, converged=converged)
722 :
723 4 : IF (.NOT. converged) THEN
724 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
725 : END IF
726 :
727 4 : IF (ri_data%unit_nr > 0) THEN
728 2 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
729 2 : IF (min_ev > 0) THEN
730 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
731 2 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
732 : ELSE
733 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
734 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
735 : END IF
736 : END IF
737 :
738 4 : IF (.NOT. ri_data%same_op) THEN
739 : CALL arnoldi_extremal(t_2c_int_RI(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
740 2 : max_iter=ri_data%max_iter_lanczos, converged=converged)
741 :
742 2 : IF (.NOT. converged) THEN
743 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
744 : END IF
745 :
746 2 : IF (ri_data%unit_nr > 0) THEN
747 1 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
748 1 : IF (min_ev > 0) THEN
749 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
750 1 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
751 : ELSE
752 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
753 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
754 : END IF
755 : END IF
756 : END IF
757 : END IF
758 :
759 178 : CALL timestop(handle)
760 1176 : END SUBROUTINE
761 :
762 : ! **************************************************************************************************
763 : !> \brief Pre-SCF steps in rho flavor of RI HFX
764 : !>
765 : !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
766 : !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
767 : !> \param qs_env ...
768 : !> \param ri_data ...
769 : ! **************************************************************************************************
770 82 : SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data)
771 : TYPE(qs_environment_type), POINTER :: qs_env
772 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
773 :
774 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat'
775 :
776 : INTEGER :: handle, handle2, i_mem, j_mem, &
777 : n_dependent, unit_nr, unit_nr_dbcsr
778 : INTEGER(int_8) :: nflop, nze, nze_O
779 82 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI
780 : INTEGER, DIMENSION(2, 1) :: bounds_i
781 : INTEGER, DIMENSION(2, 2) :: bounds_j
782 : INTEGER, DIMENSION(3) :: dims_3c
783 : REAL(KIND=dp) :: compression_factor, memory_3c, occ, &
784 : threshold
785 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
786 328 : TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, &
787 246 : t_2c_tmp, t_2c_tmp_2
788 738 : TYPE(dbt_type) :: t_3c_2
789 164 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
790 82 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_1
791 : TYPE(mp_para_env_type), POINTER :: para_env
792 :
793 82 : CALL timeset(routineN, handle)
794 :
795 82 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
796 82 : unit_nr = ri_data%unit_nr
797 :
798 82 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
799 :
800 82 : CALL timeset(routineN//"_int", handle2)
801 :
802 2542 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
803 82 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int_1)
804 :
805 82 : CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.TRUE.)
806 :
807 82 : CALL dbt_destroy(t_3c_int_1(1, 1))
808 :
809 82 : CALL timestop(handle2)
810 :
811 82 : CALL timeset(routineN//"_2c", handle2)
812 :
813 82 : IF (ri_data%same_op) t_2c_op_RI(1) = t_2c_op_pot(1)
814 82 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
815 82 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
816 :
817 82 : SELECT CASE (ri_data%t2c_method)
818 : CASE (hfx_ri_do_2c_iter)
819 : CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_RI(1), &
820 0 : threshold=threshold, silent=.FALSE.)
821 : CASE (hfx_ri_do_2c_cholesky)
822 82 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
823 82 : CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
824 82 : CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
825 : CASE (hfx_ri_do_2c_diag)
826 0 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
827 : CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
828 82 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
829 : END SELECT
830 :
831 82 : IF (ri_data%check_2c_inv) THEN
832 0 : CALL check_inverse(t_2c_int_mat(1), t_2c_op_RI(1), unit_nr=unit_nr)
833 : END IF
834 :
835 : !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
836 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
837 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
838 82 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
839 82 : CALL dbt_destroy(t_2c_work(1))
840 82 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
841 82 : IF (.NOT. ri_data%same_op) THEN
842 : !Also save the RI (P|Q) integral
843 14 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
844 14 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
845 14 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
846 14 : CALL dbt_destroy(t_2c_work(1))
847 14 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
848 : END IF
849 :
850 82 : IF (ri_data%same_op) THEN
851 68 : CALL dbcsr_release(t_2c_op_pot(1))
852 : ELSE
853 14 : CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
854 14 : CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
855 14 : CALL dbcsr_release(t_2c_op_RI(1))
856 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
857 14 : filter_eps=ri_data%filter_eps)
858 :
859 14 : CALL dbcsr_release(t_2c_op_pot(1))
860 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
861 14 : filter_eps=ri_data%filter_eps)
862 14 : CALL dbcsr_release(t_2c_tmp(1))
863 14 : CALL dbcsr_release(t_2c_int_mat(1))
864 14 : t_2c_int_mat(1) = t_2c_tmp_2(1)
865 : END IF
866 :
867 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
868 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
869 82 : CALL dbcsr_release(t_2c_int_mat(1))
870 82 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.TRUE.)
871 82 : CALL dbt_destroy(t_2c_int(1))
872 82 : CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
873 :
874 82 : CALL timestop(handle2)
875 :
876 82 : CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
877 :
878 82 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
879 :
880 82 : memory_3c = 0.0_dp
881 82 : nze_O = 0
882 :
883 246 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
884 246 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
885 328 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
886 82 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
887 328 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
888 82 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
889 :
890 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_RI, &
891 82 : batch_range_2=batch_ranges_AO)
892 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_RI, &
893 82 : batch_range_2=batch_ranges_AO)
894 :
895 328 : DO i_mem = 1, ri_data%n_mem_RI
896 738 : bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
897 :
898 246 : CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
899 984 : DO j_mem = 1, ri_data%n_mem
900 2214 : bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
901 2214 : bounds_j(:, 2) = [1, dims_3c(3)]
902 738 : CALL timeset(routineN//"_RIx3C", handle2)
903 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
904 : 0.0_dp, t_3c_2, &
905 : contract_1=[2], notcontract_1=[1], &
906 : contract_2=[1], notcontract_2=[2, 3], &
907 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
908 : bounds_2=bounds_i, &
909 : bounds_3=bounds_j, &
910 : unit_nr=unit_nr_dbcsr, &
911 738 : flop=nflop)
912 :
913 738 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
914 738 : CALL timestop(handle2)
915 :
916 738 : CALL timeset(routineN//"_copy_2", handle2)
917 738 : CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
918 :
919 738 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
920 738 : nze_O = nze_O + nze
921 :
922 : CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
923 738 : ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
924 :
925 3198 : CALL timestop(handle2)
926 : END DO
927 328 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
928 : END DO
929 82 : CALL dbt_batched_contract_finalize(t_3c_2)
930 82 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
931 :
932 82 : CALL para_env%sum(memory_3c)
933 82 : compression_factor = REAL(nze_O, dp)*1.0E-06*8.0_dp/memory_3c
934 :
935 82 : IF (unit_nr > 0) THEN
936 : WRITE (UNIT=unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
937 20 : "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
938 :
939 : WRITE (UNIT=unit_nr, FMT="((T3,A,T60,F21.2))") &
940 20 : "MEMORY_INFO| Compression factor: ", compression_factor
941 : END IF
942 :
943 82 : CALL dbt_clear(ri_data%t_2c_int(1, 1))
944 82 : CALL dbt_destroy(t_3c_2)
945 :
946 82 : CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.TRUE.)
947 :
948 82 : CALL timestop(handle)
949 738 : END SUBROUTINE
950 :
951 : ! **************************************************************************************************
952 : !> \brief Sorts 2d indices w.r.t. rows and columns
953 : !> \param blk_ind ...
954 : ! **************************************************************************************************
955 0 : SUBROUTINE sort_unique_blkind_2d(blk_ind)
956 : INTEGER, ALLOCATABLE, DIMENSION(:, :), &
957 : INTENT(INOUT) :: blk_ind
958 :
959 : INTEGER :: end_ind, iblk, iblk_all, irow, nblk, &
960 : ncols, start_ind
961 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ind_1, ind_2, sort_1, sort_2
962 0 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blk_ind_tmp
963 :
964 0 : nblk = SIZE(blk_ind, 1)
965 :
966 0 : ALLOCATE (sort_1(nblk))
967 0 : ALLOCATE (ind_1(nblk))
968 :
969 0 : sort_1(:) = blk_ind(:, 1)
970 0 : CALL sort(sort_1, nblk, ind_1)
971 :
972 0 : blk_ind(:, :) = blk_ind(ind_1, :)
973 :
974 0 : start_ind = 1
975 :
976 0 : DO WHILE (start_ind <= nblk)
977 0 : irow = blk_ind(start_ind, 1)
978 0 : end_ind = start_ind
979 :
980 0 : IF (end_ind + 1 <= nblk) THEN
981 0 : DO WHILE (blk_ind(end_ind + 1, 1) == irow)
982 0 : end_ind = end_ind + 1
983 0 : IF (end_ind + 1 > nblk) EXIT
984 : END DO
985 : END IF
986 :
987 0 : ncols = end_ind - start_ind + 1
988 0 : ALLOCATE (sort_2(ncols))
989 0 : ALLOCATE (ind_2(ncols))
990 0 : sort_2(:) = blk_ind(start_ind:end_ind, 2)
991 0 : CALL sort(sort_2, ncols, ind_2)
992 0 : ind_2 = ind_2 + start_ind - 1
993 :
994 0 : blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
995 0 : start_ind = end_ind + 1
996 :
997 0 : DEALLOCATE (sort_2, ind_2)
998 : END DO
999 :
1000 0 : ALLOCATE (blk_ind_tmp(nblk, 2))
1001 0 : blk_ind_tmp = 0
1002 :
1003 : iblk = 0
1004 0 : DO iblk_all = 1, nblk
1005 0 : IF (iblk >= 1) THEN
1006 0 : IF (ALL(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
1007 : CYCLE
1008 : END IF
1009 : END IF
1010 0 : iblk = iblk + 1
1011 0 : blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
1012 : END DO
1013 0 : nblk = iblk
1014 :
1015 0 : DEALLOCATE (blk_ind)
1016 0 : ALLOCATE (blk_ind(nblk, 2))
1017 :
1018 0 : blk_ind(:, :) = blk_ind_tmp(:nblk, :)
1019 :
1020 0 : END SUBROUTINE
1021 :
1022 : ! **************************************************************************************************
1023 : !> \brief ...
1024 : !> \param qs_env ...
1025 : !> \param ri_data ...
1026 : !> \param ks_matrix ...
1027 : !> \param ehfx ...
1028 : !> \param mos ...
1029 : !> \param rho_ao ...
1030 : !> \param geometry_did_change ...
1031 : !> \param nspins ...
1032 : !> \param hf_fraction ...
1033 : ! **************************************************************************************************
1034 1700 : SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
1035 : geometry_did_change, nspins, hf_fraction)
1036 : TYPE(qs_environment_type), POINTER :: qs_env
1037 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1038 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
1039 : REAL(KIND=dp), INTENT(OUT) :: ehfx
1040 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
1041 : OPTIONAL :: mos
1042 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
1043 : LOGICAL, INTENT(IN) :: geometry_did_change
1044 : INTEGER, INTENT(IN) :: nspins
1045 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
1046 :
1047 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks'
1048 :
1049 : CHARACTER(1) :: mtype
1050 : INTEGER :: handle, handle2, i, ispin, j
1051 : INTEGER(int_8) :: nblks
1052 : INTEGER, DIMENSION(2) :: homo
1053 : LOGICAL :: is_antisymmetric
1054 : REAL(dp) :: etmp, fac
1055 1700 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
1056 : TYPE(cp_fm_type), POINTER :: mo_coeff
1057 1700 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: my_ks_matrix, my_rho_ao
1058 5100 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
1059 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
1060 : TYPE(mp_para_env_type), POINTER :: para_env
1061 :
1062 1700 : CALL timeset(routineN, handle)
1063 :
1064 1700 : IF (nspins == 1) THEN
1065 1336 : fac = 0.5_dp*hf_fraction
1066 : ELSE
1067 364 : fac = 1.0_dp*hf_fraction
1068 : END IF
1069 :
1070 : !If incoming assymetric matrices, need to convert to normal
1071 1700 : NULLIFY (my_ks_matrix, my_rho_ao)
1072 1700 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
1073 1700 : is_antisymmetric = mtype == dbcsr_type_antisymmetric
1074 1700 : IF (is_antisymmetric) THEN
1075 960 : ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
1076 960 : ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
1077 :
1078 320 : DO i = 1, SIZE(ks_matrix, 1)
1079 480 : DO j = 1, SIZE(ks_matrix, 2)
1080 160 : ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
1081 : CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
1082 160 : matrix_type=dbcsr_type_no_symmetry)
1083 160 : CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
1084 : CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
1085 160 : matrix_type=dbcsr_type_no_symmetry)
1086 320 : CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
1087 : END DO
1088 : END DO
1089 : ELSE
1090 1540 : my_ks_matrix => ks_matrix
1091 1540 : my_rho_ao => rho_ao
1092 : END IF
1093 :
1094 : !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
1095 : ! the current flavor is not yet RHO. We switch back to MO if there are
1096 : ! MOs available and the current flavor is actually RHO
1097 1700 : IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
1098 12 : CALL switch_ri_flavor(ri_data, qs_env)
1099 1688 : ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
1100 10 : CALL switch_ri_flavor(ri_data, qs_env)
1101 : END IF
1102 :
1103 1928 : SELECT CASE (ri_data%flavor)
1104 : CASE (ri_mo)
1105 228 : CPASSERT(PRESENT(mos))
1106 228 : CALL timeset(routineN//"_MO", handle2)
1107 :
1108 550 : DO ispin = 1, nspins
1109 322 : NULLIFY (mo_coeff_b_tmp)
1110 322 : CPASSERT(mos(ispin)%uniform_occupation)
1111 322 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
1112 :
1113 322 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
1114 550 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
1115 : END DO
1116 :
1117 550 : DO ispin = 1, nspins
1118 322 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
1119 550 : homo(ispin) = mos(ispin)%homo
1120 : END DO
1121 :
1122 228 : CALL timestop(handle2)
1123 :
1124 : CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
1125 228 : geometry_did_change, nspins, fac)
1126 : CASE (ri_pmat)
1127 :
1128 1472 : NULLIFY (para_env)
1129 1472 : CALL get_qs_env(qs_env, para_env=para_env)
1130 3214 : DO ispin = 1, SIZE(my_rho_ao, 1)
1131 1742 : nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
1132 1742 : CALL para_env%sum(nblks)
1133 3214 : IF (nblks == 0) THEN
1134 0 : CPABORT("received empty density matrix")
1135 : END IF
1136 : END DO
1137 :
1138 : CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
1139 3400 : geometry_did_change, nspins, fac)
1140 :
1141 : END SELECT
1142 :
1143 3764 : DO ispin = 1, nspins
1144 3764 : CALL dbcsr_release(mo_coeff_b(ispin))
1145 : END DO
1146 :
1147 3764 : DO ispin = 1, nspins
1148 3764 : CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
1149 : END DO
1150 :
1151 1700 : CALL timeset(routineN//"_energy", handle2)
1152 : ! Calculate the exchange energy
1153 1700 : ehfx = 0.0_dp
1154 3764 : DO ispin = 1, nspins
1155 : CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
1156 2064 : etmp)
1157 3764 : ehfx = ehfx + 0.5_dp*etmp
1158 :
1159 : END DO
1160 1700 : CALL timestop(handle2)
1161 :
1162 : !Anti-symmetric case
1163 1700 : IF (is_antisymmetric) THEN
1164 320 : DO i = 1, SIZE(ks_matrix, 1)
1165 480 : DO j = 1, SIZE(ks_matrix, 2)
1166 160 : CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
1167 320 : CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
1168 : END DO
1169 : END DO
1170 160 : CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
1171 160 : CALL dbcsr_deallocate_matrix_set(my_rho_ao)
1172 : END IF
1173 :
1174 1700 : CALL timestop(handle)
1175 1700 : END SUBROUTINE
1176 :
1177 : ! **************************************************************************************************
1178 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
1179 : !>
1180 : !> C(mu, i) (MO coefficients)
1181 : !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
1182 : !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
1183 : !> \param qs_env ...
1184 : !> \param ri_data ...
1185 : !> \param ks_matrix ...
1186 : !> \param mo_coeff C(mu, i)
1187 : !> \param homo ...
1188 : !> \param geometry_did_change ...
1189 : !> \param nspins ...
1190 : !> \param fac ...
1191 : ! **************************************************************************************************
1192 228 : SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
1193 228 : homo, geometry_did_change, nspins, fac)
1194 : TYPE(qs_environment_type), POINTER :: qs_env
1195 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1196 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix
1197 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1198 : INTEGER, DIMENSION(:) :: homo
1199 : LOGICAL, INTENT(IN) :: geometry_did_change
1200 : INTEGER, INTENT(IN) :: nspins
1201 : REAL(dp), INTENT(IN) :: fac
1202 :
1203 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_mo'
1204 :
1205 : INTEGER :: bsize, bsum, comm_2d_handle, handle, &
1206 : handle2, i_mem, iblock, iproc, ispin, &
1207 : n_mem, n_mos, nblock, unit_nr_dbcsr
1208 : INTEGER(int_8) :: nblks, nflop
1209 228 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
1210 228 : mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
1211 228 : mem_start_block_2, mo_bsizes_1, mo_bsizes_2
1212 228 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
1213 : INTEGER, DIMENSION(2) :: pdims_2d
1214 : INTEGER, DIMENSION(3) :: pdims
1215 : LOGICAL :: do_initialize
1216 : REAL(dp) :: t1, t2
1217 : TYPE(dbcsr_distribution_type) :: ks_dist
1218 1140 : TYPE(dbt_pgrid_type) :: pgrid, pgrid_2d
1219 5700 : TYPE(dbt_type) :: ks_t, ks_t_mat, mo_coeff_t, &
1220 2052 : mo_coeff_t_split
1221 228 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_mo_1, t_3c_int_mo_2
1222 : TYPE(mp_comm_type) :: comm_2d
1223 : TYPE(mp_para_env_type), POINTER :: para_env
1224 :
1225 228 : CALL timeset(routineN, handle)
1226 :
1227 228 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1228 :
1229 228 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1230 :
1231 228 : IF (geometry_did_change) THEN
1232 16 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
1233 : END IF
1234 :
1235 228 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
1236 228 : IF (nblks == 0) THEN
1237 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1238 : END IF
1239 :
1240 550 : DO ispin = 1, nspins
1241 322 : nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
1242 550 : IF (nblks == 0) THEN
1243 0 : CPABORT("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1244 : END IF
1245 : END DO
1246 :
1247 228 : IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
1248 18 : do_initialize = .TRUE.
1249 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
1250 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
1251 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
1252 256 : ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
1253 238 : ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
1254 238 : ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
1255 238 : ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
1256 : ELSE
1257 : do_initialize = .FALSE.
1258 : END IF
1259 :
1260 228 : CALL get_qs_env(qs_env, para_env=para_env)
1261 :
1262 228 : ALLOCATE (bounds(2, 1))
1263 :
1264 228 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
1265 228 : CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
1266 :
1267 228 : CALL comm_2d%set_handle(comm_2d_handle)
1268 228 : pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
1269 :
1270 : CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
1271 228 : name="(AO | AO)")
1272 :
1273 228 : DEALLOCATE (dist1, dist2)
1274 :
1275 228 : CALL para_env%sync()
1276 228 : t1 = m_walltime()
1277 :
1278 5244 : ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
1279 550 : DO ispin = 1, nspins
1280 :
1281 322 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1282 966 : ALLOCATE (mo_bsizes_2(n_mos))
1283 2020 : mo_bsizes_2 = 1
1284 :
1285 : CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
1286 322 : mem_start_block_2, mem_end_block_2)
1287 322 : n_mem = ri_data%n_mem
1288 966 : ALLOCATE (mem_size(n_mem))
1289 :
1290 1542 : DO i_mem = 1, n_mem
1291 2918 : bsize = SUM(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
1292 1542 : mem_size(i_mem) = bsize
1293 : END DO
1294 :
1295 322 : CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
1296 644 : ALLOCATE (mem_start_block_1(n_mem))
1297 644 : ALLOCATE (mem_end_block_1(n_mem))
1298 322 : nblock = SIZE(mo_bsizes_1)
1299 322 : iblock = 0
1300 1542 : DO i_mem = 1, n_mem
1301 : bsum = 0
1302 322 : DO
1303 1220 : iblock = iblock + 1
1304 1220 : CPASSERT(iblock <= nblock)
1305 1220 : bsum = bsum + mo_bsizes_1(iblock)
1306 1220 : IF (bsum == mem_size(i_mem)) THEN
1307 1220 : IF (i_mem == 1) THEN
1308 322 : mem_start_block_1(i_mem) = 1
1309 : ELSE
1310 898 : mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
1311 : END IF
1312 1220 : mem_end_block_1(i_mem) = iblock
1313 : EXIT
1314 : END IF
1315 : END DO
1316 : END DO
1317 :
1318 966 : ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
1319 1542 : batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
1320 322 : batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
1321 :
1322 644 : ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
1323 1542 : batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
1324 322 : batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
1325 :
1326 322 : iproc = para_env%mepos
1327 :
1328 : CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
1329 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
1330 : [1, 2], [3], &
1331 322 : name="(AO RI | MO)")
1332 :
1333 322 : DEALLOCATE (dist1, dist2, dist3)
1334 :
1335 : CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
1336 : mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1337 : [1], [2, 3], &
1338 322 : name="(MO | RI AO)")
1339 :
1340 322 : DEALLOCATE (dist1, dist2, dist3)
1341 :
1342 : CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
1343 : name="(AO | MO)")
1344 :
1345 322 : DEALLOCATE (dist1, dist2)
1346 :
1347 322 : CPASSERT(homo(ispin)/ri_data%n_mem > 0)
1348 :
1349 322 : IF (do_initialize) THEN
1350 22 : pdims(:) = 0
1351 :
1352 : CALL dbt_pgrid_create(para_env, pdims, pgrid, &
1353 : tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
1354 : (homo(ispin) - 1)/ri_data%n_mem + 1, &
1355 88 : SIZE(ri_data%bsizes_AO_fit)])
1356 : CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1357 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1358 : [1], [2, 3], &
1359 22 : name="(RI | MO AO)")
1360 :
1361 22 : DEALLOCATE (dist1, dist2, dist3)
1362 :
1363 : CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1364 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1365 : [1, 2], [3], &
1366 22 : name="(RI MO | AO)")
1367 22 : DEALLOCATE (dist1, dist2, dist3)
1368 22 : CALL dbt_pgrid_destroy(pgrid)
1369 :
1370 22 : CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
1371 22 : CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1372 : END IF
1373 :
1374 322 : CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
1375 322 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
1376 322 : CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.TRUE.)
1377 322 : CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
1378 322 : CALL dbt_destroy(mo_coeff_t)
1379 :
1380 322 : CALL dbt_batched_contract_init(ks_t)
1381 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
1382 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
1383 :
1384 322 : CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
1385 322 : CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
1386 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
1387 :
1388 1542 : DO i_mem = 1, n_mem
1389 :
1390 3660 : bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
1391 :
1392 1220 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1393 1220 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
1394 : CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
1395 1220 : batch_range_3=batch_ranges_1)
1396 1220 : CALL timeset(routineN//"_MOx3C_R", handle2)
1397 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
1398 : 0.0_dp, t_3c_int_mo_1(1, 1), &
1399 : contract_1=[1], notcontract_1=[2], &
1400 : contract_2=[3], notcontract_2=[1, 2], &
1401 : map_1=[3], map_2=[1, 2], &
1402 : bounds_2=bounds, &
1403 : filter_eps=ri_data%filter_eps_mo/2, &
1404 : unit_nr=unit_nr_dbcsr, &
1405 : move_data=.FALSE., &
1406 1220 : flop=nflop)
1407 :
1408 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1409 :
1410 1220 : CALL timestop(handle2)
1411 1220 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1412 1220 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1413 1220 : CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
1414 :
1415 1220 : CALL timeset(routineN//"_copy_1", handle2)
1416 1220 : CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.TRUE.)
1417 1220 : CALL timestop(handle2)
1418 :
1419 1220 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1420 1220 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
1421 : CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
1422 1220 : batch_range_1=batch_ranges_1)
1423 :
1424 1220 : CALL timeset(routineN//"_MOx3C_L", handle2)
1425 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
1426 : 0.0_dp, t_3c_int_mo_2(1, 1), &
1427 : contract_1=[1], notcontract_1=[2], &
1428 : contract_2=[1], notcontract_2=[2, 3], &
1429 : map_1=[1], map_2=[2, 3], &
1430 : bounds_2=bounds, &
1431 : filter_eps=ri_data%filter_eps_mo/2, &
1432 : unit_nr=unit_nr_dbcsr, &
1433 : move_data=.FALSE., &
1434 1220 : flop=nflop)
1435 :
1436 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1437 :
1438 1220 : CALL timestop(handle2)
1439 :
1440 1220 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1441 1220 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1442 1220 : CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
1443 :
1444 1220 : CALL timeset(routineN//"_copy_1", handle2)
1445 : CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
1446 1220 : summation=.TRUE., move_data=.TRUE.)
1447 :
1448 1220 : CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
1449 1220 : CALL timestop(handle2)
1450 :
1451 1220 : CALL timeset(routineN//"_RIx3C", handle2)
1452 :
1453 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
1454 : 0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
1455 : contract_1=[1], notcontract_1=[2], &
1456 : contract_2=[1], notcontract_2=[2, 3], &
1457 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
1458 : unit_nr=unit_nr_dbcsr, &
1459 1220 : flop=nflop)
1460 :
1461 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1462 :
1463 1220 : CALL timestop(handle2)
1464 :
1465 1220 : CALL timeset(routineN//"_copy_2", handle2)
1466 :
1467 : ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
1468 1220 : CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.TRUE.)
1469 1220 : CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1470 1220 : CALL timestop(handle2)
1471 :
1472 1220 : CALL timeset(routineN//"_3Cx3C", handle2)
1473 : CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
1474 : 1.0_dp, ks_t, &
1475 : contract_1=[1, 2], notcontract_1=[3], &
1476 : contract_2=[1, 2], notcontract_2=[3], &
1477 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1478 : unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
1479 1220 : flop=nflop)
1480 :
1481 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1482 :
1483 10082 : CALL timestop(handle2)
1484 : END DO
1485 :
1486 322 : CALL dbt_batched_contract_finalize(ks_t)
1487 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
1488 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1489 :
1490 322 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
1491 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
1492 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
1493 :
1494 322 : CALL dbt_destroy(t_3c_int_mo_1(1, 1))
1495 322 : CALL dbt_destroy(t_3c_int_mo_2(1, 1))
1496 322 : CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
1497 :
1498 322 : CALL dbt_destroy(mo_coeff_t_split)
1499 :
1500 322 : CALL dbt_filter(ks_t, ri_data%filter_eps)
1501 :
1502 322 : CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
1503 322 : CALL dbt_copy(ks_t, ks_t_mat, move_data=.TRUE.)
1504 322 : CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1505 322 : CALL dbt_destroy(ks_t_mat)
1506 :
1507 0 : DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
1508 1838 : mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
1509 :
1510 : END DO
1511 :
1512 228 : CALL dbt_pgrid_destroy(pgrid_2d)
1513 228 : CALL dbt_destroy(ks_t)
1514 :
1515 228 : CALL para_env%sync()
1516 228 : t2 = m_walltime()
1517 :
1518 228 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1519 :
1520 228 : CALL timestop(handle)
1521 :
1522 2052 : END SUBROUTINE
1523 :
1524 : ! **************************************************************************************************
1525 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
1526 : !>
1527 : !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
1528 : !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
1529 : !> \param qs_env ...
1530 : !> \param ri_data ...
1531 : !> \param ks_matrix ...
1532 : !> \param rho_ao ...
1533 : !> \param geometry_did_change ...
1534 : !> \param nspins ...
1535 : !> \param fac ...
1536 : ! **************************************************************************************************
1537 1472 : SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
1538 : geometry_did_change, nspins, fac)
1539 : TYPE(qs_environment_type), POINTER :: qs_env
1540 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1541 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix, rho_ao
1542 : LOGICAL, INTENT(IN) :: geometry_did_change
1543 : INTEGER, INTENT(IN) :: nspins
1544 : REAL(dp), INTENT(IN) :: fac
1545 :
1546 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_Pmat'
1547 :
1548 : INTEGER :: handle, handle2, i_mem, ispin, j_mem, &
1549 : n_mem, n_mem_RI, unit_nr, unit_nr_dbcsr
1550 : INTEGER(int_8) :: flops_ks_max, flops_p_max, nblks, nflop, &
1551 : nze, nze_3c, nze_3c_1, nze_3c_2, &
1552 : nze_ks, nze_rho
1553 1472 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI, dist1, &
1554 1472 : dist2
1555 : INTEGER, DIMENSION(2, 1) :: bounds_i
1556 : INTEGER, DIMENSION(2, 2) :: bounds_ij, bounds_j
1557 : INTEGER, DIMENSION(3) :: dims_3c
1558 : REAL(dp) :: memory_3c, occ, occ_3c, occ_3c_1, &
1559 : occ_3c_2, occ_ks, occ_rho, t1, t2, &
1560 : unused
1561 36800 : TYPE(dbt_type) :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
1562 19136 : t_3c_3, tensor_old
1563 : TYPE(mp_para_env_type), POINTER :: para_env
1564 :
1565 1472 : IF (.NOT. fac > EPSILON(0.0_dp)) RETURN
1566 :
1567 1472 : CALL timeset(routineN, handle)
1568 :
1569 1472 : NULLIFY (para_env)
1570 :
1571 : ! get a useful output_unit
1572 1472 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1573 1472 : unit_nr = ri_data%unit_nr
1574 :
1575 1472 : CALL get_qs_env(qs_env, para_env=para_env)
1576 :
1577 1472 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1578 :
1579 1472 : IF (geometry_did_change) THEN
1580 70 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
1581 160 : DO ispin = 1, nspins
1582 90 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
1583 160 : CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
1584 : END DO
1585 : END IF
1586 :
1587 1472 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
1588 1472 : IF (nblks == 0) THEN
1589 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1590 : END IF
1591 :
1592 1472 : n_mem = ri_data%n_mem
1593 1472 : n_mem_RI = ri_data%n_mem_RI
1594 :
1595 1472 : CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
1596 1472 : CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
1597 :
1598 : CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
1599 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1600 : name="(AO | AO)")
1601 1472 : DEALLOCATE (dist1, dist2)
1602 :
1603 1472 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
1604 1472 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
1605 :
1606 1472 : CALL para_env%sync()
1607 1472 : t1 = m_walltime()
1608 :
1609 1472 : flops_ks_max = 0; flops_p_max = 0
1610 :
1611 4416 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
1612 4416 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
1613 5888 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
1614 1472 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
1615 5888 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
1616 1472 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
1617 :
1618 1472 : memory_3c = 0.0_dp
1619 3214 : DO ispin = 1, nspins
1620 :
1621 1742 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
1622 :
1623 : nze_rho = 0
1624 : occ_rho = 0.0_dp
1625 1742 : nze_3c_1 = 0
1626 1742 : occ_3c_1 = 0.0_dp
1627 1742 : nze_3c_2 = 0
1628 1742 : occ_3c_2 = 0.0_dp
1629 :
1630 1742 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1631 :
1632 : !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
1633 1742 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
1634 1742 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1635 :
1636 1742 : CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
1637 :
1638 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_AO, &
1639 1742 : batch_range_2=batch_ranges_RI)
1640 1742 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_AO, batch_range_2=batch_ranges_RI)
1641 :
1642 1742 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
1643 :
1644 6968 : DO i_mem = 1, n_mem
1645 :
1646 5226 : CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
1647 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_RI, &
1648 5226 : batch_range_3=batch_ranges_AO)
1649 5226 : CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges_AO)
1650 20904 : DO j_mem = 1, n_mem_RI
1651 :
1652 15678 : CALL timeset(routineN//"_Px3C", handle2)
1653 :
1654 15678 : CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
1655 47034 : bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1656 47034 : bounds_j(:, 1) = [1, dims_3c(1)]
1657 47034 : bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1658 :
1659 : CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
1660 : 0.0_dp, t_3c_1, &
1661 : contract_1=[2], notcontract_1=[1], &
1662 : contract_2=[3], notcontract_2=[1, 2], &
1663 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
1664 : bounds_2=bounds_i, &
1665 : bounds_3=bounds_j, &
1666 : unit_nr=unit_nr_dbcsr, &
1667 15678 : flop=nflop)
1668 :
1669 15678 : CALL timestop(handle2)
1670 :
1671 15678 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1672 :
1673 15678 : CALL get_tensor_occupancy(t_3c_1, nze, occ)
1674 15678 : nze_3c_1 = nze_3c_1 + nze
1675 15678 : occ_3c_1 = occ_3c_1 + occ
1676 :
1677 15678 : CALL timeset(routineN//"_copy_2", handle2)
1678 15678 : CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.TRUE.)
1679 15678 : CALL timestop(handle2)
1680 :
1681 47034 : bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1682 47034 : bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1683 :
1684 : CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
1685 15678 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1686 :
1687 15678 : CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.TRUE.)
1688 :
1689 15678 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
1690 15678 : nze_3c_2 = nze_3c_2 + nze
1691 15678 : occ_3c_2 = occ_3c_2 + occ
1692 15678 : CALL timeset(routineN//"_KS", handle2)
1693 15678 : CALL dbt_batched_contract_init(ks_t)
1694 : CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
1695 : 1.0_dp, ks_t, &
1696 : contract_1=[1, 2], notcontract_1=[3], &
1697 : contract_2=[1, 2], notcontract_2=[3], &
1698 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1699 : bounds_1=bounds_ij, &
1700 : unit_nr=unit_nr_dbcsr, &
1701 15678 : flop=nflop, move_data=.TRUE.)
1702 :
1703 15678 : CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
1704 15678 : CALL timestop(handle2)
1705 :
1706 67938 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1707 :
1708 : END DO
1709 5226 : CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
1710 5226 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1711 6968 : CALL dbt_batched_contract_finalize(t_3c_1)
1712 : END DO
1713 1742 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1714 1742 : CALL dbt_batched_contract_finalize(t_3c_3)
1715 :
1716 6968 : DO i_mem = 1, n_mem
1717 22646 : DO j_mem = 1, n_mem_RI
1718 5226 : ASSOCIATE (blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
1719 : CALL decompress_tensor(tensor_old, blk_indices%ind, &
1720 15678 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1721 15678 : CALL dbt_copy(tensor_old, t_3c, move_data=.TRUE.)
1722 :
1723 15678 : unused = 0
1724 : CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
1725 31356 : ri_data%filter_eps_storage, unused)
1726 : END ASSOCIATE
1727 : END DO
1728 : END DO
1729 :
1730 1742 : CALL dbt_destroy(tensor_old)
1731 :
1732 1742 : CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
1733 :
1734 : !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
1735 1742 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1736 1742 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.TRUE.)
1737 1742 : CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1738 :
1739 1742 : CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
1740 1742 : CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1741 1742 : CALL dbt_clear(ks_tmp)
1742 :
1743 8440 : IF (unit_nr > 0 .AND. geometry_did_change) THEN
1744 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1745 28 : 'Occupancy of density matrix P:', REAL(nze_rho, dp), '/', occ_rho*100, '%'
1746 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1747 28 : 'Occupancy of 3c ints:', REAL(nze_3c, dp), '/', occ_3c*100, '%'
1748 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1749 28 : 'Occupancy after contraction with K:', REAL(nze_3c_2, dp), '/', occ_3c_2*100, '%'
1750 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1751 28 : 'Occupancy after contraction with P:', REAL(nze_3c_1, dp), '/', occ_3c_1*100, '%'
1752 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1753 28 : 'Occupancy of Kohn-Sham matrix:', REAL(nze_ks, dp), '/', occ_ks*100, '%'
1754 : END IF
1755 :
1756 : END DO
1757 :
1758 1472 : CALL para_env%sync()
1759 1472 : t2 = m_walltime()
1760 :
1761 1472 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1762 :
1763 1472 : CALL dbt_destroy(t_3c_1)
1764 1472 : CALL dbt_destroy(t_3c_3)
1765 :
1766 1472 : CALL dbt_destroy(rho_ao_tmp)
1767 1472 : CALL dbt_destroy(ks_t)
1768 1472 : CALL dbt_destroy(ks_tmp)
1769 :
1770 1472 : CALL timestop(handle)
1771 :
1772 4416 : END SUBROUTINE
1773 :
1774 : ! **************************************************************************************************
1775 : !> \brief Implementation based on the MO flavor
1776 : !> \param qs_env ...
1777 : !> \param ri_data ...
1778 : !> \param nspins ...
1779 : !> \param hf_fraction ...
1780 : !> \param mo_coeff ...
1781 : !> \param use_virial ...
1782 : !> \note There is no response code for forces with the MO flavor
1783 : ! **************************************************************************************************
1784 14 : SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
1785 :
1786 : TYPE(qs_environment_type), POINTER :: qs_env
1787 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1788 : INTEGER, INTENT(IN) :: nspins
1789 : REAL(dp), INTENT(IN) :: hf_fraction
1790 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1791 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial
1792 :
1793 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_mo'
1794 :
1795 : INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
1796 : n_mem_input_RI, n_mem_RI, n_mem_RI_fit, n_mos, natom, nkind, unit_nr_dbcsr
1797 : INTEGER(int_8) :: nflop
1798 14 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
1799 14 : batch_end, batch_end_RI, batch_end_RI_fit, batch_ranges, batch_ranges_RI, &
1800 14 : batch_ranges_RI_fit, batch_start, batch_start_RI, batch_start_RI_fit, bsizes_MO, dist1, &
1801 14 : dist2, dist3, idx_to_at_AO, idx_to_at_RI, kind_of
1802 : INTEGER, DIMENSION(2, 1) :: bounds_ctr_1d
1803 : INTEGER, DIMENSION(2, 2) :: bounds_ctr_2d
1804 : INTEGER, DIMENSION(3) :: pdims
1805 : LOGICAL :: use_virial_prv
1806 : REAL(dp) :: pref, spin_fac, t1, t2
1807 14 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1808 14 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
1809 : TYPE(cell_type), POINTER :: cell
1810 14 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
1811 70 : TYPE(dbt_pgrid_type) :: pgrid_1, pgrid_2
1812 602 : TYPE(dbt_type) :: t_2c_RI, t_2c_RI_inv, t_2c_RI_met, t_2c_RI_PQ, t_2c_tmp, t_3c_0, t_3c_1, &
1813 700 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
1814 434 : t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_RI_ctr, t_3c_ri_mo_mo, &
1815 350 : t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
1816 14 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, t_2c_MO_AO, &
1817 28 : t_2c_MO_AO_ctr, t_3c_der_AO, t_3c_der_AO_ctr_1, t_3c_der_RI, t_3c_der_RI_ctr_1, &
1818 14 : t_3c_der_RI_ctr_2
1819 : TYPE(dft_control_type), POINTER :: dft_control
1820 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
1821 14 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
1822 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
1823 : TYPE(hfx_compression_type), ALLOCATABLE, &
1824 14 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
1825 : TYPE(mp_para_env_type), POINTER :: para_env
1826 14 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1827 14 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1828 14 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1829 :
1830 : ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
1831 : ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
1832 : ! 3) Contract all 3c integrals and derivatives with MO coeffs
1833 : ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
1834 : ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
1835 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R)
1836 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
1837 : ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
1838 :
1839 14 : use_virial_prv = .FALSE.
1840 14 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
1841 14 : IF (use_virial_prv) THEN
1842 0 : CPABORT("Stress tensor with RI-HFX MO flavor not implemented.")
1843 : END IF
1844 :
1845 14 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1846 :
1847 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
1848 : atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
1849 : matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
1850 14 : qs_kind_set=qs_kind_set)
1851 :
1852 14 : pdims(:) = 0
1853 : CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
1854 : SIZE(ri_data%bsizes_RI_split), &
1855 56 : SIZE(ri_data%bsizes_AO_split)])
1856 14 : pdims(:) = 0
1857 : CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
1858 : SIZE(ri_data%bsizes_AO_split), &
1859 56 : SIZE(ri_data%bsizes_AO_split)])
1860 :
1861 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
1862 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1863 14 : [1, 2], [3], name="(AO RI | AO)")
1864 14 : DEALLOCATE (dist1, dist2, dist3)
1865 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
1866 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1867 14 : [1], [2, 3], name="(RI | AO AO)")
1868 14 : DEALLOCATE (dist1, dist2, dist3)
1869 :
1870 104 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
1871 14 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
1872 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
1873 14 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
1874 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
1875 :
1876 38 : DO ibasis = 1, SIZE(basis_set_AO)
1877 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1878 24 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
1879 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1880 38 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
1881 : END DO
1882 :
1883 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_2c_MO_AO(3), t_2c_MO_AO_ctr(3), t_3c_der_AO(3), &
1884 1148 : t_3c_der_AO_ctr_1(3), t_3c_der_RI(3), t_3c_der_RI_ctr_1(3), t_3c_der_RI_ctr_2(3))
1885 :
1886 : ! 1) Precompute the derivatives
1887 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
1888 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
1889 14 : basis_set_AO, basis_set_RI, ri_data, qs_env)
1890 :
1891 38 : DO ibasis = 1, SIZE(basis_set_AO)
1892 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1893 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1894 24 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
1895 38 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
1896 : END DO
1897 :
1898 14 : n_mem = SIZE(t_3c_der_RI_comp, 1)
1899 56 : DO i_xyz = 1, 3
1900 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_RI(i_xyz))
1901 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_AO(i_xyz))
1902 :
1903 194 : DO i_mem = 1, n_mem
1904 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
1905 138 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1906 138 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1907 :
1908 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
1909 138 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1910 180 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1911 : END DO
1912 : END DO
1913 :
1914 56 : DO i_xyz = 1, 3
1915 194 : DO i_mem = 1, n_mem
1916 138 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
1917 180 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
1918 : END DO
1919 : END DO
1920 290 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
1921 :
1922 : ! Get the 3c integrals (desymmetrized)
1923 14 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
1924 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
1925 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
1926 14 : summation=.TRUE., move_data=.TRUE.)
1927 :
1928 14 : CALL dbt_destroy(t_3c_ao_ri_ao)
1929 14 : CALL dbt_destroy(t_3c_ri_ao_ao)
1930 :
1931 : ! Some utilities
1932 14 : spin_fac = 0.5_dp
1933 14 : IF (nspins == 2) spin_fac = 1.0_dp
1934 :
1935 42 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
1936 14 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
1937 :
1938 42 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
1939 14 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
1940 :
1941 14 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
1942 :
1943 : ! 2-center RI tensors
1944 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
1945 14 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
1946 14 : DEALLOCATE (dist1, dist2)
1947 :
1948 : CALL create_2c_tensor(t_2c_RI_PQ, dist1, dist2, ri_data%pgrid_2d, &
1949 : ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
1950 14 : DEALLOCATE (dist1, dist2)
1951 :
1952 14 : IF (.NOT. ri_data%same_op) THEN
1953 : !precompute the (P|Q)*S^-1 product
1954 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_inv)
1955 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_met)
1956 4 : CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
1957 :
1958 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
1959 : 0.0_dp, t_2c_tmp, &
1960 : contract_1=[2], notcontract_1=[1], &
1961 : contract_2=[1], notcontract_2=[2], &
1962 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
1963 4 : unit_nr=unit_nr_dbcsr, flop=nflop)
1964 4 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1965 :
1966 4 : CALL dbt_copy(t_2c_tmp, t_2c_RI_inv, move_data=.TRUE.)
1967 4 : CALL dbt_destroy(t_2c_tmp)
1968 : END IF
1969 :
1970 : !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
1971 : !No need to cut memory further because SCF tensors alrady dense
1972 14 : n_mem_input = FLOOR((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
1973 14 : n_mem_input_RI = FLOOR((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
1974 :
1975 : !batches on RI_split and RI_fit blocks
1976 14 : n_mem_RI = n_mem_input_RI
1977 : CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_RI, batch_start_RI, batch_end_RI, &
1978 14 : batch_blk_start, batch_blk_end)
1979 42 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
1980 28 : batch_ranges_RI(1:n_mem_RI) = batch_blk_start(1:n_mem_RI)
1981 14 : batch_ranges_RI(n_mem_RI + 1) = batch_blk_end(n_mem_RI) + 1
1982 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1983 :
1984 14 : n_mem_RI_fit = n_mem_input_RI
1985 : CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_RI_fit, batch_start_RI_fit, batch_end_RI_fit, &
1986 14 : batch_blk_start, batch_blk_end)
1987 42 : ALLOCATE (batch_ranges_RI_fit(n_mem_RI_fit + 1))
1988 28 : batch_ranges_RI_fit(1:n_mem_RI_fit) = batch_blk_start(1:n_mem_RI_fit)
1989 14 : batch_ranges_RI_fit(n_mem_RI_fit + 1) = batch_blk_end(n_mem_RI_fit) + 1
1990 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1991 :
1992 32 : DO ispin = 1, nspins
1993 :
1994 : ! 2 )Prepare the batches for this spin
1995 18 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1996 : !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
1997 36 : CALL split_block_sizes([n_mos], bsizes_MO, max_size=FLOOR(SQRT(ri_data%max_bsize_MO - 0.1)) + 1)
1998 :
1999 : !batching on MO blocks
2000 18 : n_mem = n_mem_input
2001 : CALL create_tensor_batches(bsizes_MO, n_mem, batch_start, batch_end, &
2002 18 : batch_blk_start, batch_blk_end)
2003 54 : ALLOCATE (batch_ranges(n_mem + 1))
2004 40 : batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
2005 18 : batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
2006 18 : DEALLOCATE (batch_blk_start, batch_blk_end)
2007 :
2008 : ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
2009 : CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_MO, &
2010 18 : ri_data%bsizes_AO_split, name="MO coeffs")
2011 18 : DEALLOCATE (dist1, dist2)
2012 18 : CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
2013 18 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
2014 18 : CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.TRUE.)
2015 18 : CALL dbt_destroy(t_2c_tmp)
2016 :
2017 18 : CALL dbt_create(t_mo_coeff, t_mo_cpy)
2018 18 : CALL dbt_copy(t_mo_coeff, t_mo_cpy)
2019 72 : DO i_xyz = 1, 3
2020 54 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO_ctr(i_xyz))
2021 72 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO(i_xyz))
2022 : END DO
2023 :
2024 : CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
2025 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(AO RI| MO)")
2026 18 : DEALLOCATE (dist1, dist2, dist3)
2027 :
2028 18 : CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
2029 18 : CALL dbt_destroy(t_3c_ao_ri_mo)
2030 :
2031 : CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_MO, ri_data%bsizes_RI_split, &
2032 18 : ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
2033 18 : DEALLOCATE (dist1, dist2, dist3)
2034 18 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
2035 :
2036 72 : DO i_xyz = 1, 3
2037 54 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_RI_ctr_1(i_xyz))
2038 72 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_AO_ctr_1(i_xyz))
2039 : END DO
2040 :
2041 : CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_MO, &
2042 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(MO RI | MO)")
2043 18 : DEALLOCATE (dist1, dist2, dist3)
2044 18 : CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
2045 :
2046 : CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
2047 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2048 18 : DEALLOCATE (dist1, dist2, dist3)
2049 :
2050 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
2051 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
2052 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_RI_ctr)
2053 72 : DO i_xyz = 1, 3
2054 72 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_RI_ctr_2(i_xyz))
2055 : END DO
2056 :
2057 : !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
2058 18 : pdims(:) = 0
2059 : CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
2060 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2061 18 : DEALLOCATE (dist1, dist2, dist3)
2062 :
2063 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
2064 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
2065 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
2066 :
2067 18 : CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_RI)
2068 18 : CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges)
2069 :
2070 72 : DO i_xyz = 1, 3
2071 54 : CALL dbt_batched_contract_init(t_3c_der_AO(i_xyz), batch_range_2=batch_ranges_RI)
2072 72 : CALL dbt_batched_contract_init(t_3c_der_RI(i_xyz), batch_range_2=batch_ranges_RI)
2073 : END DO
2074 :
2075 18 : CALL para_env%sync()
2076 18 : t1 = m_walltime()
2077 :
2078 : ! 2) Loop over batches
2079 40 : DO i_mem = 1, n_mem
2080 :
2081 22 : bounds_ctr_1d(1, 1) = batch_start(i_mem)
2082 22 : bounds_ctr_1d(2, 1) = batch_end(i_mem)
2083 :
2084 22 : bounds_ctr_2d(1, 1) = 1
2085 96 : bounds_ctr_2d(2, 1) = SUM(ri_data%bsizes_AO)
2086 :
2087 : ! 3) Do the first AO to MO contraction here
2088 22 : CALL timeset(routineN//"_AO2MO_1", handle)
2089 22 : CALL dbt_batched_contract_init(t_mo_coeff)
2090 44 : DO k_mem = 1, n_mem_RI
2091 22 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2092 22 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2093 :
2094 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
2095 : 1.0_dp, t_3c_0, &
2096 : contract_1=[2], notcontract_1=[1], &
2097 : contract_2=[3], notcontract_2=[1, 2], &
2098 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2099 : bounds_2=bounds_ctr_1d, &
2100 : bounds_3=bounds_ctr_2d, &
2101 22 : unit_nr=unit_nr_dbcsr, flop=nflop)
2102 44 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2103 : END DO
2104 22 : CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.TRUE.)
2105 :
2106 88 : DO i_xyz = 1, 3
2107 132 : DO k_mem = 1, n_mem_RI
2108 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2109 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2110 :
2111 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_AO(i_xyz), &
2112 : 1.0_dp, t_3c_0, &
2113 : contract_1=[2], notcontract_1=[1], &
2114 : contract_2=[3], notcontract_2=[1, 2], &
2115 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2116 : bounds_2=bounds_ctr_1d, &
2117 : bounds_3=bounds_ctr_2d, &
2118 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2119 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2120 : END DO
2121 66 : CALL dbt_copy(t_3c_0, t_3c_der_AO_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2122 :
2123 132 : DO k_mem = 1, n_mem_RI
2124 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2125 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2126 :
2127 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI(i_xyz), &
2128 : 1.0_dp, t_3c_0, &
2129 : contract_1=[2], notcontract_1=[1], &
2130 : contract_2=[3], notcontract_2=[1, 2], &
2131 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2132 : bounds_2=bounds_ctr_1d, &
2133 : bounds_3=bounds_ctr_2d, &
2134 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2135 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2136 : END DO
2137 88 : CALL dbt_copy(t_3c_0, t_3c_der_RI_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2138 : END DO
2139 22 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2140 22 : CALL timestop(handle)
2141 :
2142 22 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI)
2143 : CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI, &
2144 22 : batch_range_3=batch_ranges)
2145 22 : CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2146 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2147 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2148 :
2149 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI_fit, &
2150 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2151 22 : CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2152 :
2153 88 : DO i_xyz = 1, 3
2154 : CALL dbt_batched_contract_init(t_3c_der_RI_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2155 66 : batch_range_2=batch_ranges_RI)
2156 : CALL dbt_batched_contract_init(t_3c_der_AO_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2157 88 : batch_range_2=batch_ranges_RI)
2158 :
2159 : END DO
2160 :
2161 22 : IF (.NOT. ri_data%same_op) THEN
2162 8 : CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2163 : END IF
2164 :
2165 52 : DO j_mem = 1, n_mem
2166 :
2167 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2168 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2169 :
2170 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2171 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2172 :
2173 : ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
2174 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2175 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2176 60 : DO k_mem = 1, n_mem_RI
2177 30 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2178 30 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2179 :
2180 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
2181 : 1.0_dp, t_3c_work, &
2182 : contract_1=[2], notcontract_1=[1], &
2183 : contract_2=[3], notcontract_2=[1, 2], &
2184 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2185 : bounds_2=bounds_ctr_1d, &
2186 : bounds_3=bounds_ctr_2d, &
2187 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2188 60 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2189 : END DO
2190 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2191 30 : CALL timestop(handle)
2192 :
2193 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2194 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2195 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2196 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2197 :
2198 : ! 4) Contract 3c MO integrals with S^-1 as well
2199 30 : CALL timeset(routineN//"_2c_inv", handle)
2200 30 : CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2201 60 : DO k_mem = 1, n_mem_RI
2202 30 : bounds_ctr_1d(1, 1) = batch_start_RI(k_mem)
2203 30 : bounds_ctr_1d(2, 1) = batch_end_RI(k_mem)
2204 :
2205 30 : CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
2206 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
2207 : 1.0_dp, t_3c_2, &
2208 : contract_1=[2], notcontract_1=[1], &
2209 : contract_2=[1], notcontract_2=[2, 3], &
2210 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2211 : bounds_1=bounds_ctr_1d, &
2212 : bounds_3=bounds_ctr_2d, &
2213 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2214 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2215 60 : CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
2216 : END DO
2217 30 : CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
2218 30 : CALL timestop(handle)
2219 :
2220 : !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
2221 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2222 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2223 :
2224 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2225 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2226 :
2227 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2228 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2229 120 : DO i_xyz = 1, 3
2230 180 : DO k_mem = 1, n_mem_RI
2231 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2232 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2233 :
2234 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI_ctr_1(i_xyz), &
2235 : 1.0_dp, t_3c_work, &
2236 : contract_1=[2], notcontract_1=[1], &
2237 : contract_2=[3], notcontract_2=[1, 2], &
2238 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2239 : bounds_2=bounds_ctr_1d, &
2240 : bounds_3=bounds_ctr_2d, &
2241 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2242 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2243 : END DO
2244 120 : CALL dbt_copy(t_3c_work, t_3c_der_RI_ctr_2(i_xyz), order=[2, 1, 3], move_data=.TRUE.)
2245 : END DO
2246 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2247 30 : CALL timestop(handle)
2248 :
2249 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2250 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2251 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2252 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2253 :
2254 : ! 5) Force due to d/dx (P|Q)
2255 30 : CALL timeset(routineN//"_PQ_der", handle)
2256 30 : CALL dbt_copy(t_3c_2, t_3c_4, move_data=.TRUE.)
2257 30 : CALL dbt_copy(t_3c_4, t_3c_5)
2258 60 : DO k_mem = 1, n_mem_RI_fit
2259 30 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2260 30 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2261 :
2262 30 : CALL dbt_batched_contract_init(t_2c_RI_PQ)
2263 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2264 : 1.0_dp, t_2c_RI_PQ, &
2265 : contract_1=[2, 3], notcontract_1=[1], &
2266 : contract_2=[2, 3], notcontract_2=[1], &
2267 : bounds_1=bounds_ctr_2d, &
2268 : bounds_2=bounds_ctr_1d, &
2269 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2270 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2271 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2272 60 : CALL dbt_batched_contract_finalize(t_2c_RI_PQ)
2273 : END DO
2274 30 : CALL timestop(handle)
2275 :
2276 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R) (not on the derivatives)
2277 30 : IF (.NOT. ri_data%same_op) THEN
2278 16 : CALL timeset(routineN//"_metric", handle)
2279 32 : DO k_mem = 1, n_mem_RI_fit
2280 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2281 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2282 :
2283 16 : CALL dbt_batched_contract_init(t_2c_RI_inv)
2284 : CALL dbt_contract(1.0_dp, t_2c_RI_inv, t_3c_4, &
2285 : 1.0_dp, t_3c_6, &
2286 : contract_1=[2], notcontract_1=[1], &
2287 : contract_2=[1], notcontract_2=[2, 3], &
2288 : bounds_1=bounds_ctr_1d, &
2289 : bounds_3=bounds_ctr_2d, &
2290 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2291 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2292 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2293 32 : CALL dbt_batched_contract_finalize(t_2c_RI_inv)
2294 : END DO
2295 16 : CALL dbt_copy(t_3c_6, t_3c_4, move_data=.TRUE.)
2296 :
2297 : ! 8) and get the force due to d/dx S^-1
2298 32 : DO k_mem = 1, n_mem_RI_fit
2299 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2300 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2301 :
2302 16 : CALL dbt_batched_contract_init(t_2c_RI_met)
2303 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2304 : 1.0_dp, t_2c_RI_met, &
2305 : contract_1=[2, 3], notcontract_1=[1], &
2306 : contract_2=[2, 3], notcontract_2=[1], &
2307 : bounds_1=bounds_ctr_2d, &
2308 : bounds_2=bounds_ctr_1d, &
2309 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2310 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2311 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2312 32 : CALL dbt_batched_contract_finalize(t_2c_RI_met)
2313 : END DO
2314 16 : CALL timestop(handle)
2315 : END IF
2316 30 : CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
2317 :
2318 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
2319 :
2320 : ! (ab|P')
2321 30 : CALL timeset(routineN//"_3c_RI", handle)
2322 30 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2323 30 : CALL dbt_copy(t_3c_4, t_3c_RI_ctr, move_data=.TRUE.)
2324 : CALL get_force_from_3c_trace(force, t_3c_RI_ctr, t_3c_der_RI_ctr_2, atom_of_kind, kind_of, &
2325 30 : idx_to_at_RI, pref)
2326 30 : CALL timestop(handle)
2327 :
2328 : ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
2329 : ! which also prevents doing a direct 3-center trace
2330 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2331 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2332 :
2333 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2334 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2335 :
2336 30 : CALL timeset(routineN//"_3c_AO", handle)
2337 30 : CALL dbt_copy(t_3c_RI_ctr, t_3c_work, order=[2, 1, 3], move_data=.TRUE.)
2338 120 : DO i_xyz = 1, 3
2339 :
2340 90 : CALL dbt_batched_contract_init(t_2c_MO_AO_ctr(i_xyz))
2341 180 : DO k_mem = 1, n_mem_RI
2342 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2343 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2344 :
2345 : CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_AO_ctr_1(i_xyz), &
2346 : 1.0_dp, t_2c_MO_AO_ctr(i_xyz), &
2347 : contract_1=[1, 2], notcontract_1=[3], &
2348 : contract_2=[1, 2], notcontract_2=[3], &
2349 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2350 : bounds_1=bounds_ctr_2d, &
2351 : bounds_2=bounds_ctr_1d, &
2352 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2353 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2354 : END DO
2355 120 : CALL dbt_batched_contract_finalize(t_2c_MO_AO_ctr(i_xyz))
2356 : END DO
2357 232 : CALL timestop(handle)
2358 :
2359 : END DO !j_mem
2360 22 : CALL dbt_batched_contract_finalize(t_3c_1)
2361 22 : CALL dbt_batched_contract_finalize(t_3c_work)
2362 22 : CALL dbt_batched_contract_finalize(t_3c_2)
2363 22 : CALL dbt_batched_contract_finalize(t_3c_3)
2364 22 : CALL dbt_batched_contract_finalize(t_3c_4)
2365 22 : CALL dbt_batched_contract_finalize(t_3c_5)
2366 :
2367 88 : DO i_xyz = 1, 3
2368 66 : CALL dbt_batched_contract_finalize(t_3c_der_RI_ctr_1(i_xyz))
2369 88 : CALL dbt_batched_contract_finalize(t_3c_der_AO_ctr_1(i_xyz))
2370 : END DO
2371 :
2372 62 : IF (.NOT. ri_data%same_op) THEN
2373 8 : CALL dbt_batched_contract_finalize(t_3c_6)
2374 : END IF
2375 :
2376 : END DO !i_mem
2377 18 : CALL dbt_batched_contract_finalize(t_3c_desymm)
2378 18 : CALL dbt_batched_contract_finalize(t_3c_0)
2379 :
2380 72 : DO i_xyz = 1, 3
2381 54 : CALL dbt_batched_contract_finalize(t_3c_der_AO(i_xyz))
2382 72 : CALL dbt_batched_contract_finalize(t_3c_der_RI(i_xyz))
2383 : END DO
2384 :
2385 : !Force contribution due to 3-center AO derivatives (a'b|P)
2386 18 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2387 72 : DO i_xyz = 1, 3
2388 54 : CALL dbt_copy(t_2c_MO_AO_ctr(i_xyz), t_2c_MO_AO(i_xyz), move_data=.TRUE.) !ensures matching distributions
2389 54 : CALL get_mo_ao_force(force, t_mo_cpy, t_2c_MO_AO(i_xyz), atom_of_kind, kind_of, idx_to_at_AO, pref, i_xyz)
2390 72 : CALL dbt_clear(t_2c_MO_AO(i_xyz))
2391 : END DO
2392 :
2393 : !Force contribution of d/dx (P|Q)
2394 18 : pref = 0.5_dp*hf_fraction*spin_fac
2395 18 : IF (.NOT. ri_data%same_op) pref = -pref
2396 :
2397 : !Making sure dists of the t_2c_RI tensors match
2398 18 : CALL dbt_copy(t_2c_RI_PQ, t_2c_RI, move_data=.TRUE.)
2399 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, &
2400 18 : kind_of, idx_to_at_RI, pref)
2401 18 : CALL dbt_clear(t_2c_RI)
2402 :
2403 : !Force contribution due to the inverse metric
2404 18 : IF (.NOT. ri_data%same_op) THEN
2405 4 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2406 :
2407 4 : CALL dbt_copy(t_2c_RI_met, t_2c_RI, move_data=.TRUE.)
2408 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, &
2409 4 : kind_of, idx_to_at_RI, pref)
2410 4 : CALL dbt_clear(t_2c_RI)
2411 : END IF
2412 :
2413 18 : CALL dbt_destroy(t_3c_0)
2414 18 : CALL dbt_destroy(t_3c_1)
2415 18 : CALL dbt_destroy(t_3c_2)
2416 18 : CALL dbt_destroy(t_3c_3)
2417 18 : CALL dbt_destroy(t_3c_4)
2418 18 : CALL dbt_destroy(t_3c_5)
2419 18 : CALL dbt_destroy(t_3c_6)
2420 18 : CALL dbt_destroy(t_3c_work)
2421 18 : CALL dbt_destroy(t_3c_RI_ctr)
2422 18 : CALL dbt_destroy(t_3c_mo_ri_ao)
2423 18 : CALL dbt_destroy(t_3c_mo_ri_mo)
2424 18 : CALL dbt_destroy(t_3c_ri_mo_mo)
2425 18 : CALL dbt_destroy(t_3c_ri_mo_mo_fit)
2426 18 : CALL dbt_destroy(t_mo_coeff)
2427 18 : CALL dbt_destroy(t_mo_cpy)
2428 72 : DO i_xyz = 1, 3
2429 54 : CALL dbt_destroy(t_2c_MO_AO(i_xyz))
2430 54 : CALL dbt_destroy(t_2c_MO_AO_ctr(i_xyz))
2431 54 : CALL dbt_destroy(t_3c_der_RI_ctr_1(i_xyz))
2432 54 : CALL dbt_destroy(t_3c_der_AO_ctr_1(i_xyz))
2433 72 : CALL dbt_destroy(t_3c_der_RI_ctr_2(i_xyz))
2434 : END DO
2435 86 : DEALLOCATE (batch_ranges, batch_start, batch_end)
2436 : END DO !ispin
2437 :
2438 : ! Clean-up
2439 14 : CALL dbt_pgrid_destroy(pgrid_1)
2440 14 : CALL dbt_pgrid_destroy(pgrid_2)
2441 14 : CALL dbt_destroy(t_3c_desymm)
2442 14 : CALL dbt_destroy(t_2c_RI)
2443 14 : CALL dbt_destroy(t_2c_RI_PQ)
2444 14 : IF (.NOT. ri_data%same_op) THEN
2445 4 : CALL dbt_destroy(t_2c_RI_met)
2446 4 : CALL dbt_destroy(t_2c_RI_inv)
2447 : END IF
2448 56 : DO i_xyz = 1, 3
2449 42 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2450 42 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2451 42 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
2452 56 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2453 : END DO
2454 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
2455 :
2456 14 : CALL para_env%sync()
2457 14 : t2 = m_walltime()
2458 :
2459 14 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2460 :
2461 476 : END SUBROUTINE hfx_ri_forces_mo
2462 :
2463 : ! **************************************************************************************************
2464 : !> \brief New sparser implementation
2465 : !> \param qs_env ...
2466 : !> \param ri_data ...
2467 : !> \param nspins ...
2468 : !> \param hf_fraction ...
2469 : !> \param rho_ao ...
2470 : !> \param rho_ao_resp ...
2471 : !> \param use_virial ...
2472 : !> \param resp_only ...
2473 : !> \param rescale_factor ...
2474 : ! **************************************************************************************************
2475 116 : SUBROUTINE hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
2476 : use_virial, resp_only, rescale_factor)
2477 :
2478 : TYPE(qs_environment_type), POINTER :: qs_env
2479 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2480 : INTEGER, INTENT(IN) :: nspins
2481 : REAL(dp), INTENT(IN) :: hf_fraction
2482 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
2483 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
2484 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
2485 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
2486 :
2487 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_Pmat'
2488 :
2489 : INTEGER :: dummy_int, handle, i_mem, i_spin, i_xyz, &
2490 : ibasis, j_mem, j_xyz, k_mem, k_xyz, &
2491 : n_mem, n_mem_RI, natom, nkind, &
2492 : unit_nr_dbcsr
2493 : INTEGER(int_8) :: nflop
2494 116 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_RI, batch_ranges, &
2495 116 : batch_ranges_RI, batch_start, batch_start_RI, dist1, dist2, dist3, idx_to_at_AO, &
2496 116 : idx_to_at_RI, kind_of
2497 : INTEGER, DIMENSION(2, 1) :: ibounds, jbounds, kbounds
2498 : INTEGER, DIMENSION(2, 2) :: ijbounds
2499 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
2500 232 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
2501 : LOGICAL :: do_resp, resp_only_prv, use_virial_prv
2502 : REAL(dp) :: pref, spin_fac, t1, t2
2503 : REAL(dp), DIMENSION(3, 3) :: work_virial
2504 116 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2505 116 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
2506 : TYPE(cell_type), POINTER :: cell
2507 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist
2508 : TYPE(dbcsr_type) :: dbcsr_tmp, virial_trace
2509 6728 : TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_RI, t_2c_RI_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
2510 7656 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
2511 5684 : t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_R, t_SVS
2512 116 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, &
2513 116 : t_3c_der_AO, t_3c_der_RI
2514 : TYPE(dft_control_type), POINTER :: dft_control
2515 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2516 116 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
2517 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
2518 : TYPE(hfx_compression_type), ALLOCATABLE, &
2519 116 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
2520 : TYPE(mp_para_env_type), POINTER :: para_env
2521 : TYPE(neighbor_list_3c_type) :: nl_3c
2522 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2523 116 : POINTER :: nl_2c_met, nl_2c_pot
2524 116 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2525 116 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2526 116 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2527 : TYPE(virial_type), POINTER :: virial
2528 :
2529 : !The idea is the following: we need to compute the gradients
2530 : ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
2531 : ! Which we do in a few steps:
2532 : ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
2533 : ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
2534 : ! For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
2535 : ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
2536 : ! R_PS = (acP) M_acS
2537 : ! 4) From there, we can easily calculate the 2c contributions to the force:
2538 : ! Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
2539 : ! Metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2540 :
2541 116 : NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
2542 116 : NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
2543 :
2544 116 : use_virial_prv = .FALSE.
2545 116 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
2546 :
2547 116 : do_resp = .FALSE.
2548 116 : IF (PRESENT(rho_ao_resp)) THEN
2549 30 : IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .TRUE.
2550 : END IF
2551 :
2552 116 : resp_only_prv = .FALSE.
2553 116 : IF (PRESENT(resp_only)) resp_only_prv = resp_only
2554 :
2555 116 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
2556 :
2557 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
2558 : atomic_kind_set=atomic_kind_set, virial=virial, &
2559 : cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
2560 116 : qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
2561 :
2562 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
2563 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
2564 116 : [1, 2], [3], name="(AO RI | AO)")
2565 116 : DEALLOCATE (dist1, dist2, dist3)
2566 :
2567 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
2568 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2569 116 : [1], [2, 3], name="(RI | AO AO)")
2570 116 : DEALLOCATE (dist1, dist2, dist3)
2571 :
2572 916 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
2573 116 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
2574 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
2575 116 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
2576 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
2577 :
2578 342 : DO ibasis = 1, SIZE(basis_set_AO)
2579 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
2580 226 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
2581 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
2582 342 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
2583 : END DO
2584 :
2585 : ! Precompute the derivatives
2586 5684 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_3c_der_AO(3), t_3c_der_RI(3))
2587 116 : IF (use_virial) THEN
2588 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2589 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2590 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
2591 : nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
2592 4 : nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
2593 :
2594 16 : ALLOCATE (col_bsize(natom), row_bsize(natom))
2595 16 : col_bsize(:) = ri_data%bsizes_RI
2596 16 : row_bsize(:) = ri_data%bsizes_RI
2597 4 : CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
2598 4 : CALL dbt_create(virial_trace, t_2c_virial)
2599 4 : DEALLOCATE (col_bsize, row_bsize)
2600 : ELSE
2601 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2602 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2603 112 : basis_set_AO, basis_set_RI, ri_data, qs_env)
2604 : END IF
2605 :
2606 : ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
2607 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
2608 464 : DO i_xyz = 1, 3
2609 1490 : DO i_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2610 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
2611 1026 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2612 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE., move_data=.TRUE.)
2613 :
2614 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
2615 1026 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2616 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE.)
2617 1374 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
2618 : END DO
2619 : END DO
2620 :
2621 464 : DO i_xyz = 1, 3
2622 348 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz))
2623 464 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz))
2624 : END DO
2625 :
2626 : ! Some utilities
2627 116 : spin_fac = 0.5_dp
2628 116 : IF (nspins == 2) spin_fac = 1.0_dp
2629 116 : IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
2630 :
2631 348 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
2632 116 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
2633 :
2634 348 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
2635 116 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2636 :
2637 116 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
2638 :
2639 : ! Go over batches of the 2 AO indices to save memory
2640 116 : n_mem = ri_data%n_mem
2641 464 : ALLOCATE (batch_start(n_mem), batch_end(n_mem))
2642 464 : batch_start(:) = ri_data%starts_array_mem(:)
2643 464 : batch_end(:) = ri_data%ends_array_mem(:)
2644 :
2645 348 : ALLOCATE (batch_ranges(n_mem + 1))
2646 464 : batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
2647 116 : batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
2648 :
2649 116 : n_mem_RI = ri_data%n_mem_RI
2650 464 : ALLOCATE (batch_start_RI(n_mem_RI), batch_end_RI(n_mem_RI))
2651 464 : batch_start_RI(:) = ri_data%starts_array_RI_mem(:)
2652 464 : batch_end_RI(:) = ri_data%ends_array_RI_mem(:)
2653 :
2654 348 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
2655 464 : batch_ranges_RI(:n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
2656 116 : batch_ranges_RI(n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(n_mem_RI) + 1
2657 :
2658 : ! Pre-create all the needed tensors
2659 : CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
2660 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2661 116 : name="(AO | AO)")
2662 116 : DEALLOCATE (dist1, dist2)
2663 116 : CALL dbt_create(rho_ao_1, rho_ao_2)
2664 :
2665 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
2666 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
2667 116 : DEALLOCATE (dist1, dist2)
2668 116 : CALL dbt_create(t_2c_RI, t_SVS)
2669 116 : CALL dbt_create(t_2c_RI, t_R)
2670 116 : CALL dbt_create(t_2c_RI, t_2c_RI_tmp)
2671 :
2672 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
2673 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
2674 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
2675 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
2676 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
2677 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
2678 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
2679 :
2680 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
2681 116 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
2682 :
2683 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
2684 :
2685 116 : CALL para_env%sync()
2686 116 : t1 = m_walltime()
2687 :
2688 : !Pre-calculate the necessary 2-center quantities
2689 116 : IF (.NOT. ri_data%same_op) THEN
2690 : !S^-1 * V * S^-1
2691 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI, &
2692 : contract_1=[2], notcontract_1=[1], &
2693 : contract_2=[1], notcontract_2=[2], &
2694 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2695 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2696 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2697 :
2698 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_inv(1, 1), 0.0_dp, t_SVS, &
2699 : contract_1=[2], notcontract_1=[1], &
2700 : contract_2=[1], notcontract_2=[2], &
2701 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2702 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2703 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2704 : ELSE
2705 : ! Simply V^-1
2706 88 : CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_SVS)
2707 : END IF
2708 :
2709 116 : CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2710 : CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_RI, &
2711 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2712 116 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2713 116 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2714 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2715 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2716 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI, &
2717 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2718 : CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_RI, &
2719 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2720 : CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_RI, &
2721 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2722 :
2723 242 : DO i_spin = 1, nspins
2724 :
2725 : !Prepare Pmat in tensor format
2726 126 : CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2727 126 : CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2728 126 : CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.TRUE.)
2729 126 : CALL dbt_destroy(t_2c_tmp)
2730 :
2731 126 : IF (.NOT. do_resp) THEN
2732 94 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2733 32 : ELSE IF (do_resp .AND. resp_only_prv) THEN
2734 :
2735 24 : CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2736 24 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2737 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2)
2738 : !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
2739 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2740 24 : CALL dbt_destroy(t_2c_tmp)
2741 : ELSE
2742 :
2743 : !if not resp_only, need P-P_resp and P+P_resp
2744 8 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2745 8 : CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
2746 8 : CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
2747 8 : CALL dbt_create(dbcsr_tmp, t_2c_tmp)
2748 8 : CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
2749 8 : CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.TRUE., move_data=.TRUE.)
2750 8 : CALL dbcsr_release(dbcsr_tmp)
2751 :
2752 8 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2753 8 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2754 8 : CALL dbt_destroy(t_2c_tmp)
2755 :
2756 : END IF
2757 126 : work_virial = 0.0_dp
2758 :
2759 126 : CALL timeset(routineN//"_3c", handle)
2760 : !Start looping of the batches
2761 504 : DO i_mem = 1, n_mem
2762 1134 : ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2763 :
2764 378 : CALL dbt_batched_contract_init(rho_ao_1)
2765 : CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
2766 : contract_1=[1], notcontract_1=[2], &
2767 : contract_2=[3], notcontract_2=[1, 2], &
2768 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2769 378 : bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2770 378 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2771 378 : CALL dbt_batched_contract_finalize(rho_ao_1)
2772 :
2773 378 : CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.TRUE.)
2774 :
2775 1512 : DO j_mem = 1, n_mem
2776 3402 : jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
2777 :
2778 1134 : CALL dbt_batched_contract_init(rho_ao_2)
2779 : CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
2780 : contract_1=[1], notcontract_1=[2], &
2781 : contract_2=[3], notcontract_2=[1, 2], &
2782 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2783 1134 : bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2784 1134 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2785 1134 : CALL dbt_batched_contract_finalize(rho_ao_2)
2786 :
2787 3402 : bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2788 6840 : bounds_cpy(:, 2) = [1, SUM(ri_data%bsizes_RI)]
2789 3402 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2790 1134 : CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
2791 1134 : CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2792 :
2793 4914 : DO k_mem = 1, n_mem_RI
2794 10206 : kbounds(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2795 :
2796 10206 : bounds_cpy(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2797 10206 : bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
2798 10206 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2799 3402 : CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
2800 :
2801 : !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
2802 3402 : CALL dbt_batched_contract_init(t_SVS)
2803 : CALL dbt_contract(1.0_dp, t_SVS, t_3c_3, 0.0_dp, t_3c_4, &
2804 : contract_1=[2], notcontract_1=[1], &
2805 : contract_2=[1], notcontract_2=[2, 3], &
2806 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2807 3402 : retain_sparsity=.TRUE., unit_nr=unit_nr_dbcsr, flop=nflop)
2808 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2809 3402 : CALL dbt_batched_contract_finalize(t_SVS)
2810 :
2811 3402 : CALL dbt_copy(t_3c_4, t_3c_5, summation=.TRUE., move_data=.TRUE.)
2812 :
2813 10206 : ijbounds(:, 1) = ibounds(:, 1)
2814 10206 : ijbounds(:, 2) = jbounds(:, 1)
2815 :
2816 : !Contract R_PS = (acP) M_acS
2817 3402 : CALL dbt_batched_contract_init(t_R)
2818 : CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_R, &
2819 : contract_1=[2, 3], notcontract_1=[1], &
2820 : contract_2=[2, 3], notcontract_2=[1], &
2821 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2822 : bounds_1=ijbounds, bounds_3=kbounds, &
2823 3402 : unit_nr=unit_nr_dbcsr, flop=nflop)
2824 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2825 4536 : CALL dbt_batched_contract_finalize(t_R)
2826 :
2827 : END DO !k_mem
2828 : END DO !j_mem
2829 :
2830 378 : CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.TRUE.)
2831 :
2832 : !The force from the 3c derivatives
2833 378 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2834 :
2835 1476 : DO k_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2836 4392 : DO i_xyz = 1, 3
2837 3294 : CALL dbt_clear(t_3c_der_RI(i_xyz))
2838 : CALL decompress_tensor(t_3c_der_RI(i_xyz), t_3c_der_RI_ind(k_mem, i_xyz)%ind, &
2839 4392 : t_3c_der_RI_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2840 : END DO
2841 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_RI, atom_of_kind, kind_of, &
2842 1476 : idx_to_at_RI, pref)
2843 : END DO
2844 :
2845 378 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2846 378 : IF (do_resp) THEN
2847 96 : pref = 0.5_dp*pref
2848 96 : CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
2849 : END IF
2850 :
2851 1476 : DO k_mem = 1, SIZE(t_3c_der_AO_comp, 1)
2852 4392 : DO i_xyz = 1, 3
2853 3294 : CALL dbt_clear(t_3c_der_AO(i_xyz))
2854 : CALL decompress_tensor(t_3c_der_AO(i_xyz), t_3c_der_AO_ind(k_mem, i_xyz)%ind, &
2855 4392 : t_3c_der_AO_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2856 : END DO
2857 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_AO, atom_of_kind, kind_of, &
2858 1098 : idx_to_at_AO, pref, deriv_dim=2)
2859 :
2860 1476 : IF (do_resp) THEN
2861 : CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_AO, atom_of_kind, kind_of, &
2862 276 : idx_to_at_AO, pref, deriv_dim=2)
2863 : END IF
2864 : END DO
2865 :
2866 : !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
2867 378 : IF (use_virial) THEN
2868 12 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2869 12 : CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.TRUE.)
2870 : CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_RI, &
2871 : basis_set_AO, basis_set_AO, ri_data%ri_metric, &
2872 12 : der_eps=ri_data%eps_schwarz_forces, op_pos=1)
2873 :
2874 12 : CALL dbt_clear(t_3c_virial)
2875 : END IF
2876 :
2877 378 : CALL dbt_clear(t_3c_help_1)
2878 504 : CALL dbt_clear(t_3c_help_2)
2879 : END DO !i_mem
2880 126 : CALL timestop(handle)
2881 :
2882 126 : CALL timeset(routineN//"_2c", handle)
2883 : !Now we deal with all the 2-center quantities
2884 : !Calculate S^-1 * R * S^-1
2885 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_R, 0.0_dp, t_2c_RI_tmp, &
2886 : contract_1=[2], notcontract_1=[1], &
2887 : contract_2=[1], notcontract_2=[2], &
2888 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2889 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2890 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2891 :
2892 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2893 : contract_1=[2], notcontract_1=[1], &
2894 : contract_2=[1], notcontract_2=[2], &
2895 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2896 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2897 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2898 :
2899 : !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
2900 126 : pref = 0.5_dp*hf_fraction*spin_fac
2901 126 : IF (.NOT. ri_data%same_op) pref = -pref
2902 126 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, kind_of, idx_to_at_RI, pref)
2903 :
2904 : !Calculate the contribution to the virial on the fly
2905 126 : IF (use_virial_prv) THEN
2906 4 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2907 4 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2908 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
2909 4 : basis_set_RI, basis_set_RI, ri_data%hfx_pot)
2910 : END IF
2911 :
2912 : !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2913 126 : IF (.NOT. ri_data%same_op) THEN
2914 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI_tmp, &
2915 : contract_1=[2], notcontract_1=[1], &
2916 : contract_2=[1], notcontract_2=[2], &
2917 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2918 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2919 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2920 :
2921 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2922 : contract_1=[2], notcontract_1=[1], &
2923 : contract_2=[1], notcontract_2=[2], &
2924 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2925 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2926 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2927 :
2928 28 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2929 28 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_RI, pref)
2930 :
2931 28 : IF (use_virial_prv) THEN
2932 0 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2933 0 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2934 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
2935 0 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
2936 : END IF
2937 : END IF
2938 126 : CALL dbt_clear(t_2c_RI)
2939 126 : CALL dbt_clear(t_2c_RI_tmp)
2940 126 : CALL dbt_clear(t_R)
2941 126 : CALL dbt_clear(t_3c_help_1)
2942 126 : CALL dbt_clear(t_3c_help_2)
2943 126 : CALL timestop(handle)
2944 :
2945 494 : IF (use_virial_prv) THEN
2946 16 : DO k_xyz = 1, 3
2947 52 : DO j_xyz = 1, 3
2948 156 : DO i_xyz = 1, 3
2949 : virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
2950 144 : + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
2951 : END DO
2952 : END DO
2953 : END DO
2954 : END IF
2955 : END DO !i_spin
2956 :
2957 116 : CALL dbt_batched_contract_finalize(t_3c_int)
2958 116 : CALL dbt_batched_contract_finalize(t_3c_int_2)
2959 116 : CALL dbt_batched_contract_finalize(t_3c_1)
2960 116 : CALL dbt_batched_contract_finalize(t_3c_2)
2961 116 : CALL dbt_batched_contract_finalize(t_3c_3)
2962 116 : CALL dbt_batched_contract_finalize(t_3c_4)
2963 116 : CALL dbt_batched_contract_finalize(t_3c_5)
2964 116 : CALL dbt_batched_contract_finalize(t_3c_sparse)
2965 :
2966 116 : CALL para_env%sync()
2967 116 : t2 = m_walltime()
2968 :
2969 116 : CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.TRUE.)
2970 :
2971 : !clean-up
2972 116 : CALL dbt_destroy(rho_ao_1)
2973 116 : CALL dbt_destroy(rho_ao_2)
2974 116 : CALL dbt_destroy(t_3c_ao_ri_ao)
2975 116 : CALL dbt_destroy(t_3c_ri_ao_ao)
2976 116 : CALL dbt_destroy(t_3c_int)
2977 116 : CALL dbt_destroy(t_3c_int_2)
2978 116 : CALL dbt_destroy(t_3c_1)
2979 116 : CALL dbt_destroy(t_3c_2)
2980 116 : CALL dbt_destroy(t_3c_3)
2981 116 : CALL dbt_destroy(t_3c_4)
2982 116 : CALL dbt_destroy(t_3c_5)
2983 116 : CALL dbt_destroy(t_3c_help_1)
2984 116 : CALL dbt_destroy(t_3c_help_2)
2985 116 : CALL dbt_destroy(t_3c_sparse)
2986 116 : CALL dbt_destroy(t_SVS)
2987 116 : CALL dbt_destroy(t_R)
2988 116 : CALL dbt_destroy(t_2c_RI)
2989 116 : CALL dbt_destroy(t_2c_RI_tmp)
2990 :
2991 464 : DO i_xyz = 1, 3
2992 348 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2993 348 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2994 348 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
2995 464 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2996 : END DO
2997 :
2998 464 : DO i_xyz = 1, 3
2999 1490 : DO i_mem = 1, SIZE(t_3c_der_AO_comp, 1)
3000 1026 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
3001 1374 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
3002 : END DO
3003 : END DO
3004 2168 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
3005 :
3006 342 : DO ibasis = 1, SIZE(basis_set_AO)
3007 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3008 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3009 226 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3010 342 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3011 : END DO
3012 :
3013 116 : IF (use_virial) THEN
3014 4 : CALL release_neighbor_list_sets(nl_2c_met)
3015 4 : CALL release_neighbor_list_sets(nl_2c_pot)
3016 4 : CALL neighbor_list_3c_destroy(nl_3c)
3017 4 : CALL dbcsr_release(virial_trace)
3018 4 : CALL dbt_destroy(t_2c_virial)
3019 4 : CALL dbt_destroy(t_3c_virial)
3020 : END IF
3021 :
3022 2552 : END SUBROUTINE hfx_ri_forces_Pmat
3023 :
3024 : ! **************************************************************************************************
3025 : !> \brief the general routine that calls the relevant force code
3026 : !> \param qs_env ...
3027 : !> \param ri_data ...
3028 : !> \param nspins ...
3029 : !> \param hf_fraction ...
3030 : !> \param rho_ao ...
3031 : !> \param rho_ao_resp ...
3032 : !> \param mos ...
3033 : !> \param use_virial ...
3034 : !> \param resp_only ...
3035 : !> \param rescale_factor ...
3036 : ! **************************************************************************************************
3037 130 : SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
3038 130 : mos, use_virial, resp_only, rescale_factor)
3039 :
3040 : TYPE(qs_environment_type), POINTER :: qs_env
3041 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3042 : INTEGER, INTENT(IN) :: nspins
3043 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
3044 : TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL :: rho_ao
3045 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
3046 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
3047 : OPTIONAL :: mos
3048 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
3049 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
3050 :
3051 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces'
3052 :
3053 : INTEGER :: handle, ispin
3054 : INTEGER, DIMENSION(2) :: homo
3055 130 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
3056 : TYPE(cp_fm_type), POINTER :: mo_coeff
3057 390 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
3058 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
3059 :
3060 130 : CALL timeset(routineN, handle)
3061 :
3062 144 : SELECT CASE (ri_data%flavor)
3063 : CASE (ri_mo)
3064 :
3065 32 : DO ispin = 1, nspins
3066 18 : NULLIFY (mo_coeff_b_tmp)
3067 18 : CPASSERT(mos(ispin)%uniform_occupation)
3068 18 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
3069 :
3070 18 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
3071 32 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
3072 : END DO
3073 :
3074 32 : DO ispin = 1, nspins
3075 18 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
3076 14 : homo(ispin) = mos(ispin)%homo
3077 : END DO
3078 :
3079 14 : CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
3080 :
3081 : CASE (ri_pmat)
3082 :
3083 : CALL hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
3084 216 : resp_only, rescale_factor)
3085 : END SELECT
3086 :
3087 274 : DO ispin = 1, nspins
3088 274 : CALL dbcsr_release(mo_coeff_b(ispin))
3089 : END DO
3090 :
3091 130 : CALL timestop(handle)
3092 :
3093 130 : END SUBROUTINE hfx_ri_update_forces
3094 :
3095 : ! **************************************************************************************************
3096 : !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
3097 : !> \param t_3c_der_RI_comp compressed RI derivatives
3098 : !> \param t_3c_der_AO_comp compressed AO derivatives
3099 : !> \param t_3c_der_RI_ind ...
3100 : !> \param t_3c_der_AO_ind ...
3101 : !> \param t_2c_der_RI format based on standard atomic block sizes
3102 : !> \param t_2c_der_metric format based on standard atomic block sizes
3103 : !> \param ri_ao_ao_template ...
3104 : !> \param basis_set_AO ...
3105 : !> \param basis_set_RI ...
3106 : !> \param ri_data ...
3107 : !> \param qs_env ...
3108 : !> \param nl_2c_pot ...
3109 : !> \param nl_2c_met ...
3110 : !> \param nl_3c_out ...
3111 : !> \param t_3c_virial ...
3112 : ! **************************************************************************************************
3113 4160 : SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
3114 : t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
3115 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
3116 : nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
3117 :
3118 : TYPE(hfx_compression_type), ALLOCATABLE, &
3119 : DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_RI_comp, t_3c_der_AO_comp
3120 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_RI_ind, t_3c_der_AO_ind
3121 : TYPE(dbt_type), DIMENSION(3), INTENT(OUT) :: t_2c_der_RI, t_2c_der_metric
3122 : TYPE(dbt_type), INTENT(INOUT) :: ri_ao_ao_template
3123 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3124 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3125 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3126 : TYPE(qs_environment_type), POINTER :: qs_env
3127 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3128 : OPTIONAL, POINTER :: nl_2c_pot, nl_2c_met
3129 : TYPE(neighbor_list_3c_type), OPTIONAL :: nl_3c_out
3130 : TYPE(dbt_type), INTENT(INOUT), OPTIONAL :: t_3c_virial
3131 :
3132 : CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
3133 :
3134 : INTEGER :: handle, i_mem, i_xyz, n_mem, natom, &
3135 : nkind, nthreads
3136 : INTEGER(int_8) :: nze, nze_tot
3137 130 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist_AO_1, dist_AO_2, &
3138 130 : dist_RI, dummy_end, dummy_start, &
3139 260 : end_blocks, start_blocks
3140 : INTEGER, DIMENSION(3) :: pcoord, pdims
3141 260 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3142 : REAL(dp) :: compression_factor, memory, occ
3143 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3144 1690 : TYPE(dbcsr_type), DIMENSION(1, 3) :: t_2c_der_metric_prv, t_2c_der_RI_prv
3145 390 : TYPE(dbt_pgrid_type) :: pgrid
3146 3380 : TYPE(dbt_type) :: t_2c_template, t_2c_tmp, t_3c_template
3147 130 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_AO_prv, t_3c_der_RI_prv
3148 : TYPE(dft_control_type), POINTER :: dft_control
3149 : TYPE(distribution_2d_type), POINTER :: dist_2d
3150 : TYPE(distribution_3d_type) :: dist_3d, dist_3d_out
3151 130 : TYPE(mp_cart_type) :: mp_comm_t3c, mp_comm_t3c_out
3152 : TYPE(mp_para_env_type), POINTER :: para_env
3153 : TYPE(neighbor_list_3c_type) :: nl_3c
3154 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3155 130 : POINTER :: nl_2c
3156 130 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3157 130 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3158 :
3159 130 : NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
3160 :
3161 130 : CALL timeset(routineN, handle)
3162 :
3163 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
3164 130 : particle_set=particle_set, dft_control=dft_control, para_env=para_env)
3165 :
3166 : !TODO: is such a pgrid correct?
3167 : !Dealing with the 3c derivatives
3168 130 : nthreads = 1
3169 130 : !$ nthreads = omp_get_num_threads()
3170 130 : pdims = 0
3171 520 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
3172 :
3173 : CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, pgrid, &
3174 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
3175 : map1=[1], map2=[2, 3], &
3176 130 : name="der (RI AO | AO)")
3177 :
3178 4550 : ALLOCATE (t_3c_der_AO_prv(1, 1, 3), t_3c_der_RI_prv(1, 1, 3))
3179 520 : DO i_xyz = 1, 3
3180 390 : CALL dbt_create(t_3c_template, t_3c_der_RI_prv(1, 1, i_xyz))
3181 520 : CALL dbt_create(t_3c_template, t_3c_der_AO_prv(1, 1, i_xyz))
3182 : END DO
3183 130 : IF (PRESENT(t_3c_virial)) THEN
3184 4 : CALL dbt_create(t_3c_template, t_3c_virial)
3185 : END IF
3186 130 : CALL dbt_destroy(t_3c_template)
3187 :
3188 130 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3189 130 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
3190 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
3191 130 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
3192 :
3193 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
3194 130 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
3195 :
3196 130 : IF (PRESENT(nl_3c_out)) THEN
3197 4 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3198 4 : CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
3199 : CALL distribution_3d_create(dist_3d_out, dist_RI, dist_AO_1, dist_AO_2, &
3200 4 : nkind, particle_set, mp_comm_t3c_out, own_comm=.TRUE.)
3201 : CALL build_3c_neighbor_lists(nl_3c_out, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d_out, &
3202 : ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.FALSE., &
3203 4 : own_dist=.TRUE.)
3204 : END IF
3205 130 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
3206 130 : CALL dbt_pgrid_destroy(pgrid)
3207 :
3208 130 : n_mem = ri_data%n_mem
3209 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
3210 130 : start_blocks, end_blocks)
3211 130 : DEALLOCATE (dummy_start, dummy_end)
3212 :
3213 553008 : ALLOCATE (t_3c_der_AO_comp(n_mem, 3), t_3c_der_RI_comp(n_mem, 3))
3214 3628 : ALLOCATE (t_3c_der_AO_ind(n_mem, 3), t_3c_der_RI_ind(n_mem, 3))
3215 :
3216 130 : memory = 0.0_dp
3217 130 : nze_tot = 0
3218 518 : DO i_mem = 1, n_mem
3219 : CALL build_3c_derivatives(t_3c_der_RI_prv, t_3c_der_AO_prv, ri_data%filter_eps, qs_env, &
3220 : nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, &
3221 : ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
3222 1164 : bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
3223 :
3224 1682 : DO i_xyz = 1, 3
3225 1164 : CALL dbt_copy(t_3c_der_RI_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.TRUE.)
3226 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3227 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3228 1164 : nze_tot = nze_tot + nze
3229 :
3230 1164 : CALL alloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), 1)
3231 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
3232 1164 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3233 1164 : CALL dbt_clear(ri_ao_ao_template)
3234 :
3235 : !put AO derivative as middle index
3236 1164 : CALL dbt_copy(t_3c_der_AO_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.TRUE.)
3237 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3238 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3239 1164 : nze_tot = nze_tot + nze
3240 :
3241 1164 : CALL alloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), 1)
3242 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
3243 1164 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3244 3880 : CALL dbt_clear(ri_ao_ao_template)
3245 : END DO
3246 : END DO
3247 :
3248 130 : CALL neighbor_list_3c_destroy(nl_3c)
3249 520 : DO i_xyz = 1, 3
3250 390 : CALL dbt_destroy(t_3c_der_RI_prv(1, 1, i_xyz))
3251 520 : CALL dbt_destroy(t_3c_der_AO_prv(1, 1, i_xyz))
3252 : END DO
3253 :
3254 130 : CALL para_env%sum(memory)
3255 130 : compression_factor = REAL(nze_tot, dp)*1.0E-06*8.0_dp/memory
3256 130 : IF (ri_data%unit_nr > 0) THEN
3257 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
3258 14 : "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
3259 :
3260 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T60,F21.2))") &
3261 14 : "MEMORY_INFO| Compression factor: ", compression_factor
3262 : END IF
3263 :
3264 : !Deal with the 2-center derivatives
3265 130 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3266 390 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3267 260 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3268 530 : row_bsize(:) = ri_data%bsizes_RI
3269 530 : col_bsize(:) = ri_data%bsizes_RI
3270 :
3271 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3272 130 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3273 :
3274 520 : DO i_xyz = 1, 3
3275 : CALL dbcsr_create(t_2c_der_RI_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3276 520 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3277 : END DO
3278 :
3279 : CALL build_2c_derivatives(t_2c_der_RI_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3280 130 : basis_set_RI, ri_data%hfx_pot)
3281 130 : CALL release_neighbor_list_sets(nl_2c)
3282 :
3283 130 : IF (PRESENT(nl_2c_pot)) THEN
3284 4 : NULLIFY (nl_2c_pot)
3285 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3286 4 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3287 : END IF
3288 :
3289 : !copy 2c derivative tensor into the standard format
3290 : CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
3291 130 : ri_data%bsizes_RI_split, name='(RI| RI)')
3292 130 : DEALLOCATE (dist1, dist2)
3293 :
3294 520 : DO i_xyz = 1, 3
3295 390 : CALL dbt_create(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3296 390 : CALL dbt_copy_matrix_to_tensor(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3297 :
3298 390 : CALL dbt_create(t_2c_template, t_2c_der_RI(i_xyz))
3299 390 : CALL dbt_copy(t_2c_tmp, t_2c_der_RI(i_xyz), move_data=.TRUE.)
3300 :
3301 390 : CALL dbt_destroy(t_2c_tmp)
3302 520 : CALL dbcsr_release(t_2c_der_RI_prv(1, i_xyz))
3303 : END DO
3304 :
3305 : !Repeat with the metric, if required
3306 130 : IF (.NOT. ri_data%same_op) THEN
3307 :
3308 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3309 32 : "HFX_2c_nl_RI", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3310 :
3311 128 : DO i_xyz = 1, 3
3312 : CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3313 128 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3314 : END DO
3315 :
3316 : CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
3317 32 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
3318 32 : CALL release_neighbor_list_sets(nl_2c)
3319 :
3320 32 : IF (PRESENT(nl_2c_met)) THEN
3321 0 : NULLIFY (nl_2c_met)
3322 : CALL build_2c_neighbor_lists(nl_2c_met, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3323 0 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3324 : END IF
3325 :
3326 128 : DO i_xyz = 1, 3
3327 96 : CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3328 96 : CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3329 :
3330 96 : CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
3331 96 : CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.TRUE.)
3332 :
3333 96 : CALL dbt_destroy(t_2c_tmp)
3334 128 : CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
3335 : END DO
3336 :
3337 : END IF
3338 :
3339 130 : CALL dbt_destroy(t_2c_template)
3340 130 : CALL dbcsr_distribution_release(dbcsr_dist)
3341 130 : DEALLOCATE (row_bsize, col_bsize)
3342 :
3343 130 : CALL timestop(handle)
3344 :
3345 1690 : END SUBROUTINE precalc_derivatives
3346 :
3347 : ! **************************************************************************************************
3348 : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
3349 : !> force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
3350 : !> the atom on which the force acts
3351 : !> \param force ...
3352 : !> \param t_3c_contr ...
3353 : !> \param t_3c_der ...
3354 : !> \param atom_of_kind ...
3355 : !> \param kind_of ...
3356 : !> \param idx_to_at ...
3357 : !> \param pref ...
3358 : !> \param do_mp2 ...
3359 : !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
3360 : ! **************************************************************************************************
3361 3862 : SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
3362 : pref, do_mp2, deriv_dim)
3363 :
3364 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3365 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
3366 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der
3367 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3368 : REAL(dp), INTENT(IN) :: pref
3369 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2
3370 : INTEGER, INTENT(IN), OPTIONAL :: deriv_dim
3371 :
3372 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
3373 :
3374 : INTEGER :: deriv_dim_prv, handle, i_xyz, iat, &
3375 : iat_of_kind, ikind, j_xyz
3376 : INTEGER, DIMENSION(3) :: ind
3377 : LOGICAL :: do_mp2_prv, found
3378 : REAL(dp) :: new_force
3379 3862 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk
3380 : REAL(dp), DIMENSION(3) :: scoord
3381 : TYPE(dbt_iterator_type) :: iter
3382 :
3383 3862 : CALL timeset(routineN, handle)
3384 :
3385 3862 : do_mp2_prv = .FALSE.
3386 3862 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3387 :
3388 3862 : deriv_dim_prv = 1
3389 3862 : IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
3390 :
3391 : !$OMP PARALLEL DEFAULT(NONE) &
3392 : !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3393 3862 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
3394 : DO i_xyz = 1, 3
3395 : CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
3396 : DO WHILE (dbt_iterator_blocks_left(iter))
3397 : CALL dbt_iterator_next_block(iter, ind)
3398 :
3399 : CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
3400 : CPASSERT(found)
3401 : CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
3402 :
3403 : IF (found) THEN
3404 :
3405 : !take the trace of the blocks
3406 : new_force = pref*SUM(der_blk(:, :, :)*contr_blk(:, :, :))
3407 :
3408 : !the first index of the derivative tensor defines the atom
3409 : iat = idx_to_at(ind(deriv_dim_prv))
3410 : iat_of_kind = atom_of_kind(iat)
3411 : ikind = kind_of(iat)
3412 :
3413 : IF (.NOT. do_mp2_prv) THEN
3414 : !$OMP ATOMIC
3415 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3416 : + new_force
3417 : ELSE
3418 : !$OMP ATOMIC
3419 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3420 : + new_force
3421 : END IF
3422 :
3423 : DEALLOCATE (contr_blk)
3424 : END IF
3425 : DEALLOCATE (der_blk)
3426 : END DO !iter
3427 : CALL dbt_iterator_stop(iter)
3428 : END DO
3429 : !$OMP END PARALLEL
3430 3862 : CALL timestop(handle)
3431 :
3432 7724 : END SUBROUTINE get_force_from_3c_trace
3433 :
3434 : ! **************************************************************************************************
3435 : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
3436 : !> \param force ...
3437 : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
3438 : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
3439 : !> \param atom_of_kind ...
3440 : !> \param kind_of ...
3441 : !> \param idx_to_at ...
3442 : !> \param pref ...
3443 : !> \param do_mp2 ...
3444 : !> \param do_ovlp ...
3445 : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
3446 : ! **************************************************************************************************
3447 610 : SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
3448 : pref, do_mp2, do_ovlp)
3449 :
3450 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3451 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
3452 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_2c_der
3453 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3454 : REAL(dp), INTENT(IN) :: pref
3455 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2, do_ovlp
3456 :
3457 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_2c_der_force'
3458 :
3459 : INTEGER :: handle, i_xyz, iat, iat_of_kind, ikind, &
3460 : j_xyz, jat, jat_of_kind, jkind
3461 : INTEGER, DIMENSION(2) :: ind
3462 : LOGICAL :: do_mp2_prv, do_ovlp_prv, found
3463 : REAL(dp) :: new_force
3464 610 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
3465 : REAL(dp), DIMENSION(3) :: scoord
3466 : TYPE(dbt_iterator_type) :: iter
3467 :
3468 : !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
3469 : !update the relevant force
3470 :
3471 610 : CALL timeset(routineN, handle)
3472 :
3473 610 : do_mp2_prv = .FALSE.
3474 610 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3475 :
3476 610 : do_ovlp_prv = .FALSE.
3477 610 : IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
3478 :
3479 : !$OMP PARALLEL DEFAULT(NONE) &
3480 : !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3481 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
3482 610 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
3483 : DO i_xyz = 1, 3
3484 : CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
3485 : DO WHILE (dbt_iterator_blocks_left(iter))
3486 : CALL dbt_iterator_next_block(iter, ind)
3487 :
3488 : IF (ind(1) == ind(2)) CYCLE
3489 :
3490 : CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
3491 : CPASSERT(found)
3492 : CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
3493 :
3494 : IF (found) THEN
3495 :
3496 : !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
3497 : !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
3498 : new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
3499 :
3500 : iat = idx_to_at(ind(1))
3501 : iat_of_kind = atom_of_kind(iat)
3502 : ikind = kind_of(iat)
3503 :
3504 : IF (do_mp2_prv) THEN
3505 : !$OMP ATOMIC
3506 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3507 : + new_force
3508 : ELSE IF (do_ovlp_prv) THEN
3509 : !$OMP ATOMIC
3510 : force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
3511 : + new_force
3512 : ELSE
3513 : !$OMP ATOMIC
3514 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3515 : + new_force
3516 : END IF
3517 :
3518 : jat = idx_to_at(ind(2))
3519 : jat_of_kind = atom_of_kind(jat)
3520 : jkind = kind_of(jat)
3521 :
3522 : IF (do_mp2_prv) THEN
3523 : !$OMP ATOMIC
3524 : force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
3525 : - new_force
3526 : ELSE IF (do_ovlp_prv) THEN
3527 : !$OMP ATOMIC
3528 : force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
3529 : - new_force
3530 : ELSE
3531 : !$OMP ATOMIC
3532 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
3533 : - new_force
3534 : END IF
3535 :
3536 : DEALLOCATE (contr_blk)
3537 : END IF
3538 :
3539 : DEALLOCATE (der_blk)
3540 : END DO !iter
3541 : CALL dbt_iterator_stop(iter)
3542 :
3543 : END DO !i_xyz
3544 : !$OMP END PARALLEL
3545 610 : CALL timestop(handle)
3546 :
3547 1220 : END SUBROUTINE get_2c_der_force
3548 :
3549 : ! **************************************************************************************************
3550 : !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
3551 : !> and a is a MO
3552 : !> \param force ...
3553 : !> \param t_mo_coeff ...
3554 : !> \param t_2c_MO_AO ...
3555 : !> \param atom_of_kind ...
3556 : !> \param kind_of ...
3557 : !> \param idx_to_at ...
3558 : !> \param pref ...
3559 : !> \param i_xyz ...
3560 : ! **************************************************************************************************
3561 54 : SUBROUTINE get_MO_AO_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
3562 :
3563 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3564 : TYPE(dbt_type), INTENT(INOUT) :: t_mo_coeff, t_2c_MO_AO
3565 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3566 : REAL(dp), INTENT(IN) :: pref
3567 : INTEGER, INTENT(IN) :: i_xyz
3568 :
3569 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_MO_AO_force'
3570 :
3571 : INTEGER :: handle, iat, iat_of_kind, ikind, j_xyz
3572 : INTEGER, DIMENSION(2) :: ind
3573 : LOGICAL :: found
3574 : REAL(dp) :: new_force
3575 54 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: mo_ao_blk, mo_coeff_blk
3576 : REAL(dp), DIMENSION(3) :: scoord
3577 : TYPE(dbt_iterator_type) :: iter
3578 :
3579 54 : CALL timeset(routineN, handle)
3580 :
3581 : !$OMP PARALLEL DEFAULT(NONE) &
3582 : !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
3583 54 : !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
3584 : CALL dbt_iterator_start(iter, t_2c_MO_AO)
3585 : DO WHILE (dbt_iterator_blocks_left(iter))
3586 : CALL dbt_iterator_next_block(iter, ind)
3587 :
3588 : CALL dbt_get_block(t_2c_MO_AO, ind, mo_ao_blk, found)
3589 : CPASSERT(found)
3590 : CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
3591 :
3592 : IF (found) THEN
3593 :
3594 : new_force = pref*SUM(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
3595 :
3596 : iat = idx_to_at(ind(2)) !AO index is column index
3597 : iat_of_kind = atom_of_kind(iat)
3598 : ikind = kind_of(iat)
3599 :
3600 : !$OMP ATOMIC
3601 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3602 : + new_force
3603 :
3604 : DEALLOCATE (mo_coeff_blk)
3605 : END IF
3606 :
3607 : DEALLOCATE (mo_ao_blk)
3608 : END DO !iter
3609 : CALL dbt_iterator_stop(iter)
3610 : !$OMP END PARALLEL
3611 :
3612 54 : CALL timestop(handle)
3613 :
3614 108 : END SUBROUTINE get_MO_AO_force
3615 :
3616 : ! **************************************************************************************************
3617 : !> \brief Print RI-HFX quantities, as required by the PRINT subsection
3618 : !> \param ri_data ...
3619 : !> \param qs_env ...
3620 : ! **************************************************************************************************
3621 148 : SUBROUTINE print_ri_hfx(ri_data, qs_env)
3622 :
3623 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3624 : TYPE(qs_environment_type), POINTER :: qs_env
3625 :
3626 : INTEGER :: i_RI, ibasis, nkind, nspins, unit_nr
3627 296 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3628 : LOGICAL :: mult_by_S, print_density, print_ri_metric
3629 148 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs, density_coeffs_2
3630 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3631 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
3632 : TYPE(cp_fm_type) :: matrix_s_fm
3633 : TYPE(cp_logger_type), POINTER :: logger
3634 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3635 148 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
3636 296 : TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3637 : TYPE(dft_control_type), POINTER :: dft_control
3638 : TYPE(distribution_2d_type), POINTER :: dist_2d
3639 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3640 148 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3641 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
3642 : TYPE(mp_para_env_type), POINTER :: para_env
3643 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3644 148 : POINTER :: nl_2c
3645 148 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3646 148 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3647 : TYPE(qs_rho_type), POINTER :: rho
3648 : TYPE(section_vals_type), POINTER :: input, print_section
3649 :
3650 148 : NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, nl_2c, &
3651 148 : dist_2d, col_bsize, row_bsize, para_env, blacs_env, fm_struct, orb_basis, dft_control)
3652 :
3653 148 : CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
3654 148 : logger => cp_get_default_logger()
3655 148 : print_density = .FALSE.
3656 148 : print_ri_metric = .FALSE.
3657 :
3658 : !Do we print the RI density coeffs and/or RI_metric 2c integrals?
3659 148 : print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
3660 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
3661 0 : cp_p_file)) print_density = .TRUE.
3662 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
3663 0 : cp_p_file)) print_ri_metric = .TRUE.
3664 :
3665 : !common stuff
3666 148 : IF (print_density .OR. print_ri_metric) THEN
3667 :
3668 : !Re-calculate the 2-center RI_metric integrals (because not stored and cheap)
3669 : !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
3670 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
3671 0 : distribution_2d=dist_2d, para_env=para_env, blacs_env=blacs_env)
3672 0 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
3673 0 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
3674 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
3675 0 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
3676 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
3677 :
3678 0 : DO ibasis = 1, nkind
3679 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3680 0 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
3681 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3682 0 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
3683 : END DO
3684 :
3685 0 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3686 0 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3687 0 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3688 0 : row_bsize(:) = ri_data%bsizes_RI
3689 0 : col_bsize(:) = ri_data%bsizes_RI
3690 :
3691 0 : CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
3692 :
3693 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3694 0 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3695 : CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3696 0 : basis_set_RI, ri_data%ri_metric)
3697 :
3698 0 : CALL release_neighbor_list_sets(nl_2c)
3699 0 : CALL dbcsr_distribution_release(dbcsr_dist)
3700 : END IF
3701 :
3702 0 : IF (print_density) THEN
3703 0 : CALL get_qs_env(qs_env, rho=rho)
3704 0 : CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
3705 0 : nspins = SIZE(rho_ao, 1)
3706 :
3707 0 : CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
3708 :
3709 : CALL get_RI_density_coeffs(density_coeffs, matrix_s(1), rho_ao, 1, basis_set_AO, basis_set_RI, &
3710 0 : mult_by_s, ri_data, qs_env)
3711 0 : IF (nspins == 2) &
3712 : CALL get_RI_density_coeffs(density_coeffs_2, matrix_s(1), rho_ao, 2, basis_set_AO, &
3713 0 : basis_set_RI, mult_by_s, ri_data, qs_env)
3714 :
3715 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
3716 : extension=".dat", file_status="REPLACE", &
3717 0 : file_action="WRITE", file_form="FORMATTED")
3718 :
3719 0 : IF (unit_nr > 0) THEN
3720 0 : IF (nspins == 1) THEN
3721 : WRITE (unit_nr, FMT="(A,A,A)") &
3722 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3723 0 : TRIM(logger%iter_info%project_name), " project"
3724 0 : DO i_RI = 1, SIZE(density_coeffs)
3725 0 : WRITE (unit_nr, FMT="(F20.12)") density_coeffs(i_RI)
3726 : END DO
3727 : ELSE
3728 : WRITE (unit_nr, FMT="(A,A,A)") &
3729 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3730 0 : TRIM(logger%iter_info%project_name), " project. Spin up, spin down"
3731 0 : DO i_RI = 1, SIZE(density_coeffs)
3732 0 : WRITE (unit_nr, FMT="(F20.12,F20.12)") density_coeffs(i_RI), density_coeffs_2(i_RI)
3733 : END DO
3734 : END IF
3735 : END IF
3736 :
3737 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
3738 : END IF
3739 :
3740 148 : IF (print_ri_metric) THEN
3741 :
3742 : !convert 2c integrals to fm for dumping
3743 : CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
3744 0 : nrow_global=SUM(row_bsize), ncol_global=SUM(col_bsize))
3745 0 : CALL cp_fm_create(matrix_s_fm, fm_struct)
3746 :
3747 0 : CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
3748 :
3749 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
3750 : extension=".fm", file_status="REPLACE", &
3751 0 : file_action="WRITE", file_form="UNFORMATTED")
3752 0 : CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
3753 :
3754 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
3755 :
3756 0 : CALL cp_fm_struct_release(fm_struct)
3757 0 : CALL cp_fm_release(matrix_s_fm)
3758 : END IF
3759 :
3760 : !clean-up
3761 148 : IF (print_density .OR. print_ri_metric) THEN
3762 0 : DO ibasis = 1, nkind
3763 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3764 0 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3765 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3766 0 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3767 : END DO
3768 :
3769 0 : CALL dbcsr_release(matrix_s(1))
3770 0 : DEALLOCATE (row_bsize, col_bsize)
3771 : END IF
3772 :
3773 444 : END SUBROUTINE print_ri_hfx
3774 :
3775 : ! **************************************************************************************************
3776 : !> \brief Projects the density on the RI basis and return the array of the RI coefficients
3777 : !> \param density_coeffs ...
3778 : !> \param ri_2c_ints ...
3779 : !> \param rho_ao ...
3780 : !> \param ispin ...
3781 : !> \param basis_set_AO ...
3782 : !> \param basis_set_RI ...
3783 : !> \param multiply_by_S ...
3784 : !> \param ri_data ...
3785 : !> \param qs_env ...
3786 : ! **************************************************************************************************
3787 0 : SUBROUTINE get_RI_density_coeffs(density_coeffs, ri_2c_ints, rho_ao, ispin, basis_set_AO, &
3788 0 : basis_set_RI, multiply_by_S, ri_data, qs_env)
3789 :
3790 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs
3791 : TYPE(dbcsr_type), INTENT(INOUT) :: ri_2c_ints
3792 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
3793 : INTEGER, INTENT(IN) :: ispin
3794 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_AO, basis_set_RI
3795 : LOGICAL, INTENT(IN) :: multiply_by_S
3796 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3797 : TYPE(qs_environment_type), POINTER :: qs_env
3798 :
3799 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_RI_density_coeffs'
3800 :
3801 : INTEGER :: a, b, handle, i_mem, idx, n_dependent, &
3802 : n_mem, n_mem_RI, natom, &
3803 : nblk_per_thread, nblks, nkind
3804 : INTEGER(int_8) :: nze
3805 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_block_end, batch_block_start, &
3806 0 : dist1, dist2, dist3, dummy1, dummy2, &
3807 0 : idx1, idx2, idx3
3808 : INTEGER, DIMENSION(2) :: ind, pdims_2d
3809 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
3810 : INTEGER, DIMENSION(3) :: dims_3c, pcoord_3d, pdims_3d
3811 : LOGICAL :: calc_ints, found
3812 : REAL(dp) :: occ, threshold
3813 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3814 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk_3d
3815 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3816 : TYPE(dbcsr_type) :: ri_2c_inv
3817 0 : TYPE(dbt_distribution_type) :: dist_2d, dist_3d
3818 : TYPE(dbt_iterator_type) :: iter
3819 0 : TYPE(dbt_pgrid_type) :: pgrid_2d, pgrid_3d
3820 0 : TYPE(dbt_type) :: density_coeffs_t, density_tmp, rho_ao_t, &
3821 0 : rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
3822 0 : t2c_ri_inv, t2c_ri_tmp
3823 0 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
3824 : TYPE(dft_control_type), POINTER :: dft_control
3825 : TYPE(distribution_3d_type) :: dist_nl3c
3826 0 : TYPE(mp_cart_type) :: mp_comm_t3c
3827 : TYPE(mp_para_env_type), POINTER :: para_env
3828 : TYPE(neighbor_list_3c_type) :: nl_3c
3829 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3830 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3831 :
3832 0 : NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
3833 :
3834 0 : CALL timeset(routineN, handle)
3835 :
3836 : ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
3837 : ! = sum_nu d_nu nu(r)
3838 : ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
3839 :
3840 0 : IF (.NOT. ri_data%flavor == ri_pmat) THEN
3841 0 : CPABORT("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
3842 : END IF
3843 :
3844 : CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
3845 0 : particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
3846 0 : n_mem = ri_data%n_mem
3847 0 : n_mem_RI = ri_data%n_mem_RI
3848 :
3849 : ! The RI 2c int tensor and its inverse
3850 0 : CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints, matrix_type=dbcsr_type_no_symmetry)
3851 :
3852 0 : SELECT CASE (ri_data%t2c_method)
3853 : CASE (hfx_ri_do_2c_iter)
3854 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
3855 0 : CALL invert_hotelling(ri_2c_inv, ri_2c_ints, threshold=threshold, silent=.FALSE.)
3856 : CASE (hfx_ri_do_2c_cholesky)
3857 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3858 0 : CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
3859 0 : CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
3860 : CASE (hfx_ri_do_2c_diag)
3861 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3862 : CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
3863 0 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
3864 : END SELECT
3865 :
3866 0 : CALL dbt_create(ri_2c_ints, t2c_ri_tmp)
3867 : CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
3868 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
3869 : name="(RI | RI)")
3870 0 : CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
3871 :
3872 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_ints, t2c_ri_tmp)
3873 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.TRUE.)
3874 0 : CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
3875 :
3876 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
3877 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.TRUE.)
3878 0 : CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
3879 :
3880 0 : CALL dbcsr_release(ri_2c_inv)
3881 0 : CALL dbt_destroy(t2c_ri_tmp)
3882 0 : DEALLOCATE (dist1, dist2)
3883 :
3884 : ! The AO density tensor
3885 0 : CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3886 : CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
3887 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3888 : name="(AO | AO)")
3889 0 : DEALLOCATE (dist1, dist2)
3890 :
3891 0 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3892 0 : CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
3893 0 : CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
3894 0 : CALL dbt_destroy(rho_ao_tmp)
3895 :
3896 : ! Put in in 3D
3897 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
3898 0 : dist3(1) = 0
3899 0 : CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
3900 0 : CALL dbt_default_distvec(1, 1, [1], dist3)
3901 :
3902 0 : pdims_3d(1) = pdims_2d(1)
3903 0 : pdims_3d(2) = pdims_2d(2)
3904 0 : pdims_3d(3) = 1
3905 :
3906 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
3907 0 : CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
3908 : CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
3909 0 : ri_data%bsizes_AO_split, [1])
3910 0 : DEALLOCATE (dist1, dist2, dist3)
3911 0 : CALL dbt_pgrid_destroy(pgrid_3d)
3912 0 : CALL dbt_distribution_destroy(dist_3d)
3913 :
3914 : ! copy density
3915 0 : nblks = 0
3916 : !$OMP PARALLEL DEFAULT(NONE) &
3917 : !$OMP SHARED(rho_ao_t,nblks) &
3918 0 : !$OMP PRIVATE(iter,ind,blk,found)
3919 : CALL dbt_iterator_start(iter, rho_ao_t)
3920 : DO WHILE (dbt_iterator_blocks_left(iter))
3921 : CALL dbt_iterator_next_block(iter, ind)
3922 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3923 : IF (found) THEN
3924 : !$OMP ATOMIC
3925 : nblks = nblks + 1
3926 : DEALLOCATE (blk)
3927 : END IF
3928 : END DO
3929 : CALL dbt_iterator_stop(iter)
3930 : !$OMP END PARALLEL
3931 :
3932 0 : ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
3933 0 : idx3 = 1
3934 0 : nblks = 0
3935 : !$OMP PARALLEL DEFAULT(NONE) &
3936 : !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
3937 0 : !$OMP PRIVATE(iter,ind,blk,found)
3938 : CALL dbt_iterator_start(iter, rho_ao_t)
3939 : DO WHILE (dbt_iterator_blocks_left(iter))
3940 : CALL dbt_iterator_next_block(iter, ind)
3941 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3942 : IF (found) THEN
3943 : !$OMP CRITICAL
3944 : nblks = nblks + 1
3945 : idx1(nblks) = ind(1)
3946 : idx2(nblks) = ind(2)
3947 : !$OMP END CRITICAL
3948 : DEALLOCATE (blk)
3949 : END IF
3950 : END DO
3951 : CALL dbt_iterator_stop(iter)
3952 : !$OMP END PARALLEL
3953 :
3954 0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
3955 : nblk_per_thread = nblks/omp_get_num_threads() + 1
3956 : a = omp_get_thread_num()*nblk_per_thread + 1
3957 : b = MIN(a + nblk_per_thread, nblks)
3958 : CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
3959 : !$OMP END PARALLEL
3960 :
3961 : !$OMP PARALLEL DEFAULT(NONE) &
3962 : !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
3963 0 : !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
3964 : CALL dbt_iterator_start(iter, rho_ao_t)
3965 : DO WHILE (dbt_iterator_blocks_left(iter))
3966 : CALL dbt_iterator_next_block(iter, ind)
3967 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3968 : IF (found) THEN
3969 : ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
3970 : blk_3d(:, :, 1) = blk(:, :)
3971 : !$OMP CRITICAL
3972 : CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
3973 : !$OMP END CRITICAL
3974 : DEALLOCATE (blk, blk_3d)
3975 : END IF
3976 : END DO
3977 : CALL dbt_iterator_stop(iter)
3978 : !$OMP END PARALLEL
3979 :
3980 : ! The 1D tensor with the density coeffs
3981 0 : pdims_2d(1) = para_env%num_pe
3982 0 : pdims_2d(2) = 1
3983 :
3984 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
3985 0 : CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
3986 0 : CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
3987 :
3988 0 : CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
3989 0 : CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
3990 0 : CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
3991 0 : CALL dbt_create(density_coeffs_t, density_tmp)
3992 0 : DEALLOCATE (dist1, dist2)
3993 0 : CALL dbt_pgrid_destroy(pgrid_2d)
3994 0 : CALL dbt_distribution_destroy(dist_2d)
3995 :
3996 0 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
3997 :
3998 : ! The 3c integrals tensor, in case we compute them here
3999 0 : pdims_3d = 0
4000 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[MAX(1, natom/n_mem), natom, natom])
4001 0 : ALLOCATE (t_3c_int_batched(1, 1))
4002 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
4003 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
4004 0 : name="(RI | AO AO)")
4005 :
4006 0 : CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
4007 0 : CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
4008 : CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
4009 0 : mp_comm_t3c, own_comm=.TRUE.)
4010 0 : DEALLOCATE (dist1, dist2, dist3)
4011 0 : CALL dbt_pgrid_destroy(pgrid_3d)
4012 :
4013 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_nl3c, ri_data%ri_metric, &
4014 0 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
4015 :
4016 0 : n_mem = ri_data%n_mem
4017 0 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
4018 :
4019 0 : calc_ints = .FALSE.
4020 0 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
4021 0 : IF (nze == 0) calc_ints = .TRUE.
4022 :
4023 0 : DO i_mem = 1, n_mem
4024 0 : IF (calc_ints) THEN
4025 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
4026 : basis_set_RI, basis_set_AO, basis_set_AO, &
4027 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
4028 : desymmetrize=.FALSE., &
4029 0 : bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
4030 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
4031 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.TRUE., summation=.TRUE.)
4032 0 : CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
4033 : ELSE
4034 : bounds_cpy(:, 2) = [SUM(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
4035 0 : SUM(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
4036 0 : bounds_cpy(:, 1) = [1, SUM(ri_data%bsizes_AO)]
4037 0 : bounds_cpy(:, 3) = [1, SUM(ri_data%bsizes_AO)]
4038 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
4039 0 : order=[2, 1, 3], bounds=bounds_cpy)
4040 : END IF
4041 :
4042 : !contract the integrals with the density P_pq (pq|R)
4043 : CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
4044 : contract_1=[2, 3], notcontract_1=[1], &
4045 : contract_2=[1, 2], notcontract_2=[3], &
4046 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4047 0 : CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
4048 :
4049 : !contract the above vector with the inverse metric
4050 : CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
4051 : contract_1=[2], notcontract_1=[1], &
4052 : contract_2=[1], notcontract_2=[2], &
4053 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4054 :
4055 : END DO
4056 0 : CALL neighbor_list_3c_destroy(nl_3c)
4057 :
4058 0 : IF (multiply_by_s) THEN
4059 : CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
4060 : contract_1=[2], notcontract_1=[1], &
4061 : contract_2=[1], notcontract_2=[2], &
4062 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4063 0 : CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
4064 : END IF
4065 :
4066 0 : ALLOCATE (density_coeffs(SUM(ri_data%bsizes_RI)))
4067 0 : density_coeffs = 0.0
4068 :
4069 : !$OMP PARALLEL DEFAULT(NONE) &
4070 : !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
4071 0 : !$OMP PRIVATE(iter,ind,blk,found,idx)
4072 : CALL dbt_iterator_start(iter, density_coeffs_t)
4073 : DO WHILE (dbt_iterator_blocks_left(iter))
4074 : CALL dbt_iterator_next_block(iter, ind)
4075 : CALL dbt_get_block(density_coeffs_t, ind, blk, found)
4076 : IF (found) THEN
4077 :
4078 : idx = SUM(ri_data%bsizes_RI_split(1:ind(1) - 1))
4079 : !$OMP CRITICAL
4080 : density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
4081 : !$OMP END CRITICAL
4082 : DEALLOCATE (blk)
4083 : END IF
4084 : END DO
4085 : CALL dbt_iterator_stop(iter)
4086 : !$OMP END PARALLEL
4087 0 : CALL para_env%sum(density_coeffs)
4088 :
4089 0 : CALL dbt_destroy(t2c_ri_ints)
4090 0 : CALL dbt_destroy(t2c_ri_inv)
4091 0 : CALL dbt_destroy(density_tmp)
4092 0 : CALL dbt_destroy(rho_ao_t)
4093 0 : CALL dbt_destroy(rho_ao_t_3d)
4094 0 : CALL dbt_destroy(density_coeffs_t)
4095 0 : CALL dbt_destroy(t_3c_int_batched(1, 1))
4096 :
4097 0 : CALL timestop(handle)
4098 :
4099 0 : END SUBROUTINE get_RI_density_coeffs
4100 :
4101 : ! **************************************************************************************************
4102 : !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
4103 : !> \param idx_to_at ...
4104 : !> \param bsizes_split ...
4105 : !> \param bsizes_orig ...
4106 : !> \return ...
4107 : ! **************************************************************************************************
4108 936 : SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
4109 : INTEGER, DIMENSION(:), INTENT(INOUT) :: idx_to_at
4110 : INTEGER, DIMENSION(:), INTENT(IN) :: bsizes_split, bsizes_orig
4111 :
4112 : INTEGER :: full_sum, iat, iblk, split_sum
4113 :
4114 936 : iat = 1
4115 936 : full_sum = bsizes_orig(iat)
4116 936 : split_sum = 0
4117 4662 : DO iblk = 1, SIZE(bsizes_split)
4118 3726 : split_sum = split_sum + bsizes_split(iblk)
4119 :
4120 3726 : IF (split_sum .GT. full_sum) THEN
4121 1340 : iat = iat + 1
4122 1340 : full_sum = full_sum + bsizes_orig(iat)
4123 : END IF
4124 :
4125 4662 : idx_to_at(iblk) = iat
4126 : END DO
4127 :
4128 936 : END SUBROUTINE get_idx_to_atom
4129 :
4130 : ! **************************************************************************************************
4131 : !> \brief Function for calculating sqrt of a matrix
4132 : !> \param values ...
4133 : !> \return ...
4134 : ! **************************************************************************************************
4135 0 : FUNCTION my_sqrt(values)
4136 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4137 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_sqrt
4138 :
4139 0 : my_sqrt = SQRT(values)
4140 : END FUNCTION
4141 :
4142 : ! **************************************************************************************************
4143 : !> \brief Function for calculation inverse sqrt of a matrix
4144 : !> \param values ...
4145 : !> \return ...
4146 : ! **************************************************************************************************
4147 0 : FUNCTION my_invsqrt(values)
4148 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4149 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_invsqrt
4150 :
4151 0 : my_invsqrt = SQRT(1.0_dp/values)
4152 : END FUNCTION
4153 228 : END MODULE
|