Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief RI-methods for HFX
10 : ! **************************************************************************************************
11 :
12 : MODULE hfx_ri
13 :
14 : USE OMP_LIB, ONLY: omp_get_num_threads,&
15 : omp_get_thread_num
16 : USE arnoldi_api, ONLY: arnoldi_extremal
17 : USE atomic_kind_types, ONLY: atomic_kind_type,&
18 : get_atomic_kind_set
19 : USE basis_set_types, ONLY: gto_basis_set_p_type,&
20 : gto_basis_set_type
21 : USE cell_types, ONLY: cell_type
22 : USE cp_blacs_env, ONLY: cp_blacs_env_type
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_dbcsr_api, ONLY: &
25 : dbcsr_add, dbcsr_add_on_diag, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, &
26 : dbcsr_desymmetrize, dbcsr_distribution_get, dbcsr_distribution_release, &
27 : dbcsr_distribution_type, dbcsr_dot, dbcsr_filter, dbcsr_frobenius_norm, dbcsr_get_info, &
28 : dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, dbcsr_scale, &
29 : dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric
30 : USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
31 : cp_dbcsr_cholesky_invert
32 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
33 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
34 : copy_fm_to_dbcsr,&
35 : cp_dbcsr_dist2d_to_dist,&
36 : dbcsr_deallocate_matrix_set
37 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
38 : cp_fm_struct_release,&
39 : cp_fm_struct_type
40 : USE cp_fm_types, ONLY: cp_fm_create,&
41 : cp_fm_release,&
42 : cp_fm_type,&
43 : cp_fm_write_unformatted
44 : USE cp_log_handling, ONLY: cp_get_default_logger,&
45 : cp_logger_type
46 : USE cp_output_handling, ONLY: cp_p_file,&
47 : cp_print_key_finished_output,&
48 : cp_print_key_should_output,&
49 : cp_print_key_unit_nr
50 : USE dbt_api, ONLY: &
51 : dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
52 : dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
53 : dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
54 : dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
55 : dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
56 : dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
57 : dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
58 : USE distribution_2d_types, ONLY: distribution_2d_type
59 : USE hfx_types, ONLY: alloc_containers,&
60 : block_ind_type,&
61 : dealloc_containers,&
62 : hfx_compression_type,&
63 : hfx_ri_init,&
64 : hfx_ri_release,&
65 : hfx_ri_type
66 : USE input_constants, ONLY: hfx_ri_do_2c_cholesky,&
67 : hfx_ri_do_2c_diag,&
68 : hfx_ri_do_2c_iter
69 : USE input_cp2k_hfx, ONLY: ri_mo,&
70 : ri_pmat
71 : USE input_section_types, ONLY: section_vals_get_subs_vals,&
72 : section_vals_type,&
73 : section_vals_val_get
74 : USE iterate_matrix, ONLY: invert_hotelling,&
75 : matrix_sqrt_newton_schulz
76 : USE kinds, ONLY: default_string_length,&
77 : dp,&
78 : int_8
79 : USE machine, ONLY: m_walltime
80 : USE message_passing, ONLY: mp_cart_type,&
81 : mp_comm_type,&
82 : mp_para_env_type
83 : USE particle_methods, ONLY: get_particle_set
84 : USE particle_types, ONLY: particle_type
85 : USE qs_environment_types, ONLY: get_qs_env,&
86 : qs_environment_type
87 : USE qs_force_types, ONLY: qs_force_type
88 : USE qs_integral_utils, ONLY: basis_set_list_setup
89 : USE qs_interactions, ONLY: init_interaction_radii_orb_basis
90 : USE qs_kind_types, ONLY: qs_kind_type
91 : USE qs_ks_types, ONLY: qs_ks_env_type
92 : USE qs_mo_types, ONLY: get_mo_set,&
93 : mo_set_type
94 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type,&
95 : release_neighbor_list_sets
96 : USE qs_rho_types, ONLY: qs_rho_get,&
97 : qs_rho_type
98 : USE qs_tensors, ONLY: &
99 : build_2c_derivatives, build_2c_integrals, build_2c_neighbor_lists, build_3c_derivatives, &
100 : build_3c_integrals, build_3c_neighbor_lists, calc_2c_virial, calc_3c_virial, &
101 : compress_tensor, decompress_tensor, get_tensor_occupancy, neighbor_list_3c_destroy
102 : USE qs_tensors_types, ONLY: create_2c_tensor,&
103 : create_3c_tensor,&
104 : create_tensor_batches,&
105 : distribution_3d_create,&
106 : distribution_3d_type,&
107 : neighbor_list_3c_type,&
108 : split_block_sizes
109 : USE util, ONLY: sort
110 : USE virial_types, ONLY: virial_type
111 : #include "./base/base_uses.f90"
112 :
113 : !$ USE OMP_LIB, ONLY: omp_get_num_threads
114 :
115 : IMPLICIT NONE
116 : PRIVATE
117 :
118 : PUBLIC :: hfx_ri_update_ks, hfx_ri_update_forces, get_force_from_3c_trace, get_2c_der_force, &
119 : get_idx_to_atom, print_ri_hfx, hfx_ri_pre_scf_calc_tensors, check_inverse
120 :
121 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
122 : CONTAINS
123 :
124 : ! **************************************************************************************************
125 : !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
126 : !> \param ri_data ...
127 : !> \param qs_env ...
128 : !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
129 : ! **************************************************************************************************
130 22 : SUBROUTINE switch_ri_flavor(ri_data, qs_env)
131 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
132 : TYPE(qs_environment_type), POINTER :: qs_env
133 :
134 : CHARACTER(LEN=*), PARAMETER :: routineN = 'switch_ri_flavor'
135 :
136 : INTEGER :: handle, n_mem, new_flavor
137 22 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
138 : TYPE(dft_control_type), POINTER :: dft_control
139 : TYPE(mp_para_env_type), POINTER :: para_env
140 22 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
141 22 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
142 :
143 22 : NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
144 :
145 22 : CALL timeset(routineN, handle)
146 :
147 : CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
148 22 : particle_set=particle_set, qs_kind_set=qs_kind_set)
149 :
150 22 : CALL hfx_ri_release(ri_data, write_stats=.FALSE.)
151 :
152 22 : IF (ri_data%flavor == ri_pmat) THEN
153 : new_flavor = ri_mo
154 : ELSE
155 12 : new_flavor = ri_pmat
156 : END IF
157 22 : ri_data%flavor = new_flavor
158 :
159 22 : n_mem = ri_data%n_mem_input
160 22 : ri_data%n_mem_input = ri_data%n_mem_flavor_switch
161 22 : ri_data%n_mem_flavor_switch = n_mem
162 :
163 22 : CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
164 :
165 : !Need to recalculate the integrals in the new flavor
166 : !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
167 : ! that only makes sense if actual integral calculation is not negligible
168 22 : IF (ri_data%flavor == ri_pmat) THEN
169 12 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
170 : ELSE
171 10 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
172 : END IF
173 :
174 22 : IF (ri_data%unit_nr > 0) THEN
175 0 : IF (ri_data%flavor == ri_pmat) THEN
176 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
177 : ELSE
178 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
179 : END IF
180 : END IF
181 :
182 22 : CALL timestop(handle)
183 :
184 22 : END SUBROUTINE switch_ri_flavor
185 :
186 : ! **************************************************************************************************
187 : !> \brief Pre-SCF steps in MO flavor of RI HFX
188 : !>
189 : !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
190 : !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
191 : !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
192 : !> \param qs_env ...
193 : !> \param ri_data ...
194 : !> \param nspins ...
195 : ! **************************************************************************************************
196 26 : SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
197 : TYPE(qs_environment_type), POINTER :: qs_env
198 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
199 : INTEGER, INTENT(IN) :: nspins
200 :
201 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_mo'
202 :
203 : INTEGER :: handle, handle2, ispin, n_dependent, &
204 : unit_nr, unit_nr_dbcsr
205 : REAL(KIND=dp) :: threshold
206 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
207 156 : TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
208 130 : t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
209 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
210 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
211 : TYPE(mp_para_env_type), POINTER :: para_env
212 :
213 26 : CALL timeset(routineN, handle)
214 :
215 26 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
216 26 : unit_nr = ri_data%unit_nr
217 :
218 26 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
219 :
220 26 : CALL timeset(routineN//"_int", handle2)
221 :
222 806 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
223 26 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int)
224 :
225 26 : CALL timestop(handle2)
226 :
227 26 : CALL timeset(routineN//"_2c", handle2)
228 26 : IF (.NOT. ri_data%same_op) THEN
229 4 : SELECT CASE (ri_data%t2c_method)
230 : CASE (hfx_ri_do_2c_iter)
231 0 : CALL dbcsr_create(t_2c_op_RI_inv(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
232 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
233 0 : CALL invert_hotelling(t_2c_op_RI_inv(1), t_2c_op_RI(1), threshold=threshold, silent=.FALSE.)
234 : CASE (hfx_ri_do_2c_cholesky)
235 4 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
236 4 : CALL cp_dbcsr_cholesky_decompose(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env)
237 4 : CALL cp_dbcsr_cholesky_invert(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
238 : CASE (hfx_ri_do_2c_diag)
239 0 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
240 : CALL cp_dbcsr_power(t_2c_op_RI_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
241 4 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
242 : END SELECT
243 :
244 4 : IF (ri_data%check_2c_inv) THEN
245 0 : CALL check_inverse(t_2c_op_RI_inv(1), t_2c_op_RI(1), unit_nr=unit_nr)
246 : END IF
247 :
248 4 : CALL dbcsr_release(t_2c_op_RI(1))
249 :
250 4 : SELECT CASE (ri_data%t2c_method)
251 : CASE (hfx_ri_do_2c_iter)
252 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
253 0 : CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
254 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
255 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
256 0 : ri_data%max_iter_lanczos)
257 :
258 0 : CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
259 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
260 4 : CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
261 : CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
262 8 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
263 : END SELECT
264 :
265 : !We need S^-1 and (P|Q) for the forces.
266 4 : CALL dbt_create(t_2c_op_RI_inv(1), t_2c_work(1))
267 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_RI_inv(1), t_2c_work(1))
268 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
269 4 : CALL dbt_destroy(t_2c_work(1))
270 4 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
271 :
272 4 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
273 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
274 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
275 4 : CALL dbt_destroy(t_2c_work(1))
276 4 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
277 :
278 4 : IF (ri_data%check_2c_inv) THEN
279 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
280 : END IF
281 4 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
282 : CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_RI_inv(1), t_2c_op_pot_sqrt(1), &
283 4 : 0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
284 4 : CALL dbcsr_release(t_2c_op_RI_inv(1))
285 4 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
286 : ELSE
287 22 : SELECT CASE (ri_data%t2c_method)
288 : CASE (hfx_ri_do_2c_iter)
289 0 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
290 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
291 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
292 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
293 0 : ri_data%max_iter_lanczos)
294 0 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
295 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
296 22 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
297 : CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
298 44 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
299 : END SELECT
300 22 : IF (ri_data%check_2c_inv) THEN
301 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
302 : END IF
303 :
304 : !We need (P|Q)^-1 for the forces
305 22 : CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
306 22 : CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
307 22 : CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
308 22 : CALL dbcsr_release(dbcsr_work_1(1))
309 22 : CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
310 22 : CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
311 22 : CALL dbcsr_release(dbcsr_work_2(1))
312 22 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
313 22 : CALL dbt_destroy(t_2c_work(1))
314 22 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
315 : END IF
316 :
317 26 : CALL dbcsr_release(t_2c_op_pot(1))
318 :
319 26 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
320 26 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
321 26 : CALL dbcsr_release(t_2c_int_mat(1))
322 58 : DO ispin = 1, nspins
323 58 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
324 : END DO
325 26 : CALL dbt_destroy(t_2c_int(1))
326 26 : CALL timestop(handle2)
327 :
328 26 : CALL timeset(routineN//"_3c", handle2)
329 26 : CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
330 26 : CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
331 26 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
332 26 : CALL dbt_destroy(t_3c_int(1, 1))
333 26 : CALL timestop(handle2)
334 :
335 26 : CALL timestop(handle)
336 :
337 260 : END SUBROUTINE
338 :
339 : ! **************************************************************************************************
340 : !> \brief ...
341 : !> \param matrix_1 ...
342 : !> \param matrix_2 ...
343 : !> \param name ...
344 : !> \param unit_nr ...
345 : ! **************************************************************************************************
346 0 : SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
347 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_1, matrix_2
348 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
349 : INTEGER, INTENT(IN) :: unit_nr
350 :
351 : CHARACTER(len=default_string_length) :: name_prv
352 : REAL(KIND=dp) :: error, frob_matrix, frob_matrix_base
353 : TYPE(dbcsr_type) :: matrix_tmp
354 :
355 0 : IF (PRESENT(name)) THEN
356 0 : name_prv = name
357 : ELSE
358 0 : CALL dbcsr_get_info(matrix_1, name=name_prv)
359 : END IF
360 :
361 0 : CALL dbcsr_create(matrix_tmp, template=matrix_1)
362 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
363 0 : 0.0_dp, matrix_tmp)
364 0 : frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
365 0 : CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
366 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
367 0 : error = frob_matrix/frob_matrix_base
368 0 : IF (unit_nr > 0) THEN
369 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
370 0 : "HFX_RI_INFO| Error for INV(", TRIM(name_prv), "):", error
371 : END IF
372 :
373 0 : CALL dbcsr_release(matrix_tmp)
374 0 : END SUBROUTINE
375 :
376 : ! **************************************************************************************************
377 : !> \brief ...
378 : !> \param matrix ...
379 : !> \param matrix_sqrt ...
380 : !> \param matrix_sqrt_inv ...
381 : !> \param name ...
382 : !> \param unit_nr ...
383 : ! **************************************************************************************************
384 0 : SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
385 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix
386 : TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: matrix_sqrt, matrix_sqrt_inv
387 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
388 : INTEGER, INTENT(IN) :: unit_nr
389 :
390 : CHARACTER(len=default_string_length) :: name_prv
391 : REAL(KIND=dp) :: frob_matrix
392 : TYPE(dbcsr_type) :: matrix_copy, matrix_tmp
393 :
394 0 : IF (PRESENT(name)) THEN
395 0 : name_prv = name
396 : ELSE
397 0 : CALL dbcsr_get_info(matrix, name=name_prv)
398 : END IF
399 0 : IF (PRESENT(matrix_sqrt)) THEN
400 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
401 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt)
402 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
403 0 : 0.0_dp, matrix_tmp)
404 0 : CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
405 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
406 0 : IF (unit_nr > 0) THEN
407 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
408 0 : "HFX_RI_INFO| Error for SQRT(", TRIM(name_prv), "):", frob_matrix
409 : END IF
410 0 : CALL dbcsr_release(matrix_tmp)
411 0 : CALL dbcsr_release(matrix_copy)
412 : END IF
413 :
414 0 : IF (PRESENT(matrix_sqrt_inv)) THEN
415 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
416 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
417 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
418 0 : 0.0_dp, matrix_tmp)
419 0 : CALL check_inverse(matrix_tmp, matrix, name="SQRT("//TRIM(name_prv)//")", unit_nr=unit_nr)
420 0 : CALL dbcsr_release(matrix_tmp)
421 0 : CALL dbcsr_release(matrix_copy)
422 : END IF
423 :
424 0 : END SUBROUTINE
425 :
426 : ! **************************************************************************************************
427 : !> \brief Calculate 2-center and 3-center integrals
428 : !>
429 : !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
430 : !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
431 : !> v_1 is HF operator, v_2 is RI metric
432 : !> \param qs_env ...
433 : !> \param ri_data ...
434 : !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
435 : !> \param t_2c_int_pot K_1(P, R)
436 : !> \param t_3c_int int_3c(mu, lambda, P)
437 : !> \param do_kpoints ...
438 : !> \notes the integral tensor arrays are already allocated on entry
439 : ! **************************************************************************************************
440 4004 : SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
441 : TYPE(qs_environment_type), POINTER :: qs_env
442 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
443 : TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT) :: t_2c_int_RI, t_2c_int_pot
444 : TYPE(dbt_type), DIMENSION(:, :) :: t_3c_int
445 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
446 :
447 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_calc_tensors'
448 :
449 : CHARACTER :: symm
450 : INTEGER :: handle, i_img, i_mem, ibasis, j_img, &
451 : n_mem, natom, nblks, nimg, nkind, &
452 : nthreads
453 : INTEGER(int_8) :: nze
454 178 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, dist_RI_ext, &
455 356 : ends_array_mc_block_int, ends_array_mc_int, sizes_AO, sizes_RI, sizes_RI_ext, &
456 178 : sizes_RI_ext_split, starts_array_mc_block_int, starts_array_mc_int
457 : INTEGER, DIMENSION(3) :: pcoord, pdims
458 356 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
459 : LOGICAL :: converged, do_kpoints_prv
460 : REAL(dp) :: max_ev, min_ev, occ, RI_range
461 178 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
462 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
463 1246 : TYPE(dbt_distribution_type) :: t_dist
464 534 : TYPE(dbt_pgrid_type) :: pgrid
465 1246 : TYPE(dbt_type) :: t_3c_tmp
466 178 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
467 : TYPE(dft_control_type), POINTER :: dft_control
468 : TYPE(distribution_2d_type), POINTER :: dist_2d
469 : TYPE(distribution_3d_type) :: dist_3d
470 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
471 178 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
472 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
473 178 : TYPE(mp_cart_type) :: mp_comm_t3c
474 : TYPE(mp_para_env_type), POINTER :: para_env
475 : TYPE(neighbor_list_3c_type) :: nl_3c
476 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
477 178 : POINTER :: nl_2c_pot, nl_2c_RI
478 178 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
479 178 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
480 : TYPE(qs_ks_env_type), POINTER :: ks_env
481 :
482 178 : CALL timeset(routineN, handle)
483 178 : NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_RI, &
484 178 : particle_set, qs_kind_set, ks_env, para_env)
485 :
486 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
487 178 : distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
488 :
489 178 : RI_range = 0.0_dp
490 178 : do_kpoints_prv = .FALSE.
491 178 : IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
492 178 : nimg = 1
493 178 : IF (do_kpoints_prv) THEN
494 70 : nimg = ri_data%nimg
495 70 : RI_range = ri_data%kp_RI_range
496 : END IF
497 :
498 712 : ALLOCATE (sizes_RI(natom), sizes_AO(natom))
499 1332 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
500 178 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
501 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_RI, basis=basis_set_RI)
502 178 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
503 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_AO, basis=basis_set_AO)
504 :
505 488 : DO ibasis = 1, SIZE(basis_set_AO)
506 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
507 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
508 : ! interaction radii should be based on eps_pgf_orb controlled in RI section
509 : ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
510 : ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
511 310 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
512 488 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
513 : END DO
514 :
515 178 : n_mem = ri_data%n_mem
516 : CALL create_tensor_batches(sizes_RI, n_mem, starts_array_mc_int, ends_array_mc_int, &
517 : starts_array_mc_block_int, ends_array_mc_block_int)
518 :
519 178 : DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
520 :
521 : !We separate the K-points and standard 3c integrals, because handle quite differently
522 178 : IF (.NOT. do_kpoints_prv) THEN
523 :
524 108 : nthreads = 1
525 108 : !$ nthreads = omp_get_num_threads()
526 108 : pdims = 0
527 432 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(n_mem*nthreads)), natom, natom])
528 :
529 972 : ALLOCATE (t_3c_int_batched(1, 1))
530 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_RI, dist_AO_1, dist_AO_2, pgrid, &
531 : sizes_RI, sizes_AO, sizes_AO, map1=[1], map2=[2, 3], &
532 108 : name="(RI | AO AO)")
533 :
534 108 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
535 108 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
536 108 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
537 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
538 108 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
539 108 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
540 :
541 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
542 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
543 : map1=[1], map2=[2, 3], &
544 108 : name="O (RI AO | AO)")
545 :
546 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
547 108 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
548 :
549 424 : DO i_mem = 1, n_mem
550 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
551 : basis_set_RI, basis_set_AO, basis_set_AO, &
552 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
553 : desymmetrize=.FALSE., &
554 948 : bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
555 316 : CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.TRUE., move_data=.TRUE.)
556 424 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
557 : END DO
558 :
559 108 : CALL dbt_destroy(t_3c_int_batched(1, 1))
560 :
561 108 : CALL neighbor_list_3c_destroy(nl_3c)
562 :
563 108 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
564 :
565 108 : IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
566 : ! desymmetrize
567 82 : CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
568 82 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
569 :
570 : ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
571 : ! with RI metric but not to bare integral tensor
572 82 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
573 : ELSE
574 26 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
575 : END IF
576 :
577 108 : CALL dbt_destroy(t_3c_tmp)
578 :
579 : ELSE !do_kpoints
580 :
581 70 : nthreads = 1
582 70 : !$ nthreads = omp_get_num_threads()
583 70 : pdims = 0
584 280 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, MAX(1, natom/(n_mem*nthreads))])
585 :
586 : !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
587 : !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
588 70 : nblks = SIZE(ri_data%bsizes_RI_split)
589 350 : ALLOCATE (sizes_RI_ext(natom*ri_data%ncell_RI), sizes_RI_ext_split(nblks*ri_data%ncell_RI))
590 506 : DO i_img = 1, ri_data%ncell_RI
591 1308 : sizes_RI_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_RI(:)
592 2344 : sizes_RI_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
593 : END DO
594 :
595 : CALL create_3c_tensor(t_3c_tmp, dist_AO_1, dist_AO_2, dist_RI, &
596 : pgrid, sizes_AO, sizes_AO, sizes_RI, map1=[1, 2], map2=[3], &
597 70 : name="(AO AO | RI)")
598 70 : CALL dbt_destroy(t_3c_tmp)
599 :
600 : !For the integrals to work, the distribution along the RI direction must be repeated
601 210 : ALLOCATE (dist_RI_ext(natom*ri_data%ncell_RI))
602 506 : DO i_img = 1, ri_data%ncell_RI
603 1378 : dist_RI_ext((i_img - 1)*natom + 1:i_img*natom) = dist_RI(:)
604 : END DO
605 :
606 2416 : ALLOCATE (t_3c_int_batched(nimg, 1))
607 70 : CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
608 : CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
609 70 : sizes_AO, sizes_AO, sizes_RI_ext)
610 1716 : DO i_img = 2, nimg
611 1716 : CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
612 : END DO
613 70 : CALL dbt_distribution_destroy(t_dist)
614 :
615 70 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
616 70 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
617 70 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
618 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
619 70 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
620 70 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
621 :
622 : ! create 3c tensor for storage of ints
623 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
624 70 : "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., own_dist=.TRUE.)
625 :
626 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
627 : sizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
628 : map1=[1], map2=[2, 3], &
629 70 : name="O (RI AO | AO)")
630 1716 : DO j_img = 2, nimg
631 1716 : CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
632 : END DO
633 :
634 70 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
635 192 : DO i_mem = 1, n_mem
636 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
637 : basis_set_AO, basis_set_AO, basis_set_RI, &
638 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
639 : desymmetrize=.FALSE., do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
640 : bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
641 366 : RI_range=RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
642 :
643 3440 : DO i_img = 1, nimg
644 : !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
645 3248 : CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
646 3370 : IF (nze > 0) THEN
647 2154 : CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.TRUE.)
648 2154 : CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
649 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
650 2154 : summation=.TRUE., move_data=.TRUE.)
651 : END IF
652 : END DO
653 : END DO
654 :
655 1786 : DO i_img = 1, nimg
656 1786 : CALL dbt_destroy(t_3c_int_batched(i_img, 1))
657 : END DO
658 1786 : DEALLOCATE (t_3c_int_batched)
659 70 : CALL neighbor_list_3c_destroy(nl_3c)
660 70 : CALL dbt_destroy(t_3c_tmp)
661 : END IF
662 178 : CALL dbt_pgrid_destroy(pgrid)
663 :
664 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
665 : "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
666 178 : dist_2d=dist_2d)
667 :
668 178 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
669 534 : ALLOCATE (row_bsize(SIZE(sizes_RI)))
670 356 : ALLOCATE (col_bsize(SIZE(sizes_RI)))
671 676 : row_bsize(:) = sizes_RI
672 676 : col_bsize(:) = sizes_RI
673 :
674 : !Use non-symmetric nl and matrices for k-points
675 178 : symm = dbcsr_type_symmetric
676 178 : IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
677 :
678 178 : CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
679 1824 : DO i_img = 2, nimg
680 1824 : CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
681 : END DO
682 :
683 178 : IF (.NOT. ri_data%same_op) THEN
684 88 : CALL dbcsr_create(t_2c_int_RI(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
685 1734 : DO i_img = 2, nimg
686 1734 : CALL dbcsr_create(t_2c_int_RI(i_img), template=t_2c_int_RI(1))
687 : END DO
688 : END IF
689 178 : DEALLOCATE (col_bsize, row_bsize)
690 :
691 178 : CALL dbcsr_distribution_release(dbcsr_dist)
692 :
693 : CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_RI, basis_set_RI, &
694 178 : ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
695 178 : CALL release_neighbor_list_sets(nl_2c_pot)
696 :
697 178 : IF (.NOT. ri_data%same_op) THEN
698 : CALL build_2c_neighbor_lists(nl_2c_RI, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
699 : "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
700 88 : dist_2d=dist_2d)
701 :
702 : CALL build_2c_integrals(t_2c_int_RI, ri_data%filter_eps_2c, qs_env, nl_2c_RI, basis_set_RI, basis_set_RI, &
703 88 : ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
704 :
705 88 : CALL release_neighbor_list_sets(nl_2c_RI)
706 : END IF
707 :
708 488 : DO ibasis = 1, SIZE(basis_set_AO)
709 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
710 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
711 : ! reset interaction radii of orb basis
712 310 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
713 488 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
714 : END DO
715 :
716 178 : IF (ri_data%calc_condnum) THEN
717 : CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
718 4 : max_iter=ri_data%max_iter_lanczos, converged=converged)
719 :
720 4 : IF (.NOT. converged) THEN
721 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
722 : END IF
723 :
724 4 : IF (ri_data%unit_nr > 0) THEN
725 2 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
726 2 : IF (min_ev > 0) THEN
727 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
728 2 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
729 : ELSE
730 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
731 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
732 : END IF
733 : END IF
734 :
735 4 : IF (.NOT. ri_data%same_op) THEN
736 : CALL arnoldi_extremal(t_2c_int_RI(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
737 2 : max_iter=ri_data%max_iter_lanczos, converged=converged)
738 :
739 2 : IF (.NOT. converged) THEN
740 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
741 : END IF
742 :
743 2 : IF (ri_data%unit_nr > 0) THEN
744 1 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
745 1 : IF (min_ev > 0) THEN
746 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
747 1 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
748 : ELSE
749 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
750 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
751 : END IF
752 : END IF
753 : END IF
754 : END IF
755 :
756 178 : CALL timestop(handle)
757 1176 : END SUBROUTINE
758 :
759 : ! **************************************************************************************************
760 : !> \brief Pre-SCF steps in rho flavor of RI HFX
761 : !>
762 : !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
763 : !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
764 : !> \param qs_env ...
765 : !> \param ri_data ...
766 : ! **************************************************************************************************
767 82 : SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data)
768 : TYPE(qs_environment_type), POINTER :: qs_env
769 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
770 :
771 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat'
772 :
773 : INTEGER :: handle, handle2, i_mem, j_mem, &
774 : n_dependent, unit_nr, unit_nr_dbcsr
775 : INTEGER(int_8) :: nflop, nze, nze_O
776 82 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI
777 : INTEGER, DIMENSION(2, 1) :: bounds_i
778 : INTEGER, DIMENSION(2, 2) :: bounds_j
779 : INTEGER, DIMENSION(3) :: dims_3c
780 : REAL(KIND=dp) :: compression_factor, memory_3c, occ, &
781 : threshold
782 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
783 328 : TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, &
784 246 : t_2c_tmp, t_2c_tmp_2
785 738 : TYPE(dbt_type) :: t_3c_2
786 164 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
787 82 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_1
788 : TYPE(mp_para_env_type), POINTER :: para_env
789 :
790 82 : CALL timeset(routineN, handle)
791 :
792 82 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
793 82 : unit_nr = ri_data%unit_nr
794 :
795 82 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
796 :
797 82 : CALL timeset(routineN//"_int", handle2)
798 :
799 2542 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
800 82 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int_1)
801 :
802 82 : CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.TRUE.)
803 :
804 82 : CALL dbt_destroy(t_3c_int_1(1, 1))
805 :
806 82 : CALL timestop(handle2)
807 :
808 82 : CALL timeset(routineN//"_2c", handle2)
809 :
810 82 : IF (ri_data%same_op) t_2c_op_RI(1) = t_2c_op_pot(1)
811 82 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
812 82 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
813 :
814 82 : SELECT CASE (ri_data%t2c_method)
815 : CASE (hfx_ri_do_2c_iter)
816 : CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_RI(1), &
817 0 : threshold=threshold, silent=.FALSE.)
818 : CASE (hfx_ri_do_2c_cholesky)
819 82 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
820 82 : CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
821 82 : CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
822 : CASE (hfx_ri_do_2c_diag)
823 0 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
824 : CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
825 82 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
826 : END SELECT
827 :
828 82 : IF (ri_data%check_2c_inv) THEN
829 0 : CALL check_inverse(t_2c_int_mat(1), t_2c_op_RI(1), unit_nr=unit_nr)
830 : END IF
831 :
832 : !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
833 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
834 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
835 82 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
836 82 : CALL dbt_destroy(t_2c_work(1))
837 82 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
838 82 : IF (.NOT. ri_data%same_op) THEN
839 : !Also save the RI (P|Q) integral
840 14 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
841 14 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
842 14 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
843 14 : CALL dbt_destroy(t_2c_work(1))
844 14 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
845 : END IF
846 :
847 82 : IF (ri_data%same_op) THEN
848 68 : CALL dbcsr_release(t_2c_op_pot(1))
849 : ELSE
850 14 : CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
851 14 : CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
852 14 : CALL dbcsr_release(t_2c_op_RI(1))
853 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
854 14 : filter_eps=ri_data%filter_eps)
855 :
856 14 : CALL dbcsr_release(t_2c_op_pot(1))
857 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
858 14 : filter_eps=ri_data%filter_eps)
859 14 : CALL dbcsr_release(t_2c_tmp(1))
860 14 : CALL dbcsr_release(t_2c_int_mat(1))
861 14 : t_2c_int_mat(1) = t_2c_tmp_2(1)
862 : END IF
863 :
864 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
865 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
866 82 : CALL dbcsr_release(t_2c_int_mat(1))
867 82 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.TRUE.)
868 82 : CALL dbt_destroy(t_2c_int(1))
869 82 : CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
870 :
871 82 : CALL timestop(handle2)
872 :
873 82 : CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
874 :
875 82 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
876 :
877 82 : memory_3c = 0.0_dp
878 82 : nze_O = 0
879 :
880 246 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
881 246 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
882 328 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
883 82 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
884 328 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
885 82 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
886 :
887 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_RI, &
888 82 : batch_range_2=batch_ranges_AO)
889 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_RI, &
890 82 : batch_range_2=batch_ranges_AO)
891 :
892 328 : DO i_mem = 1, ri_data%n_mem_RI
893 738 : bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
894 :
895 246 : CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
896 984 : DO j_mem = 1, ri_data%n_mem
897 2214 : bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
898 2214 : bounds_j(:, 2) = [1, dims_3c(3)]
899 738 : CALL timeset(routineN//"_RIx3C", handle2)
900 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
901 : 0.0_dp, t_3c_2, &
902 : contract_1=[2], notcontract_1=[1], &
903 : contract_2=[1], notcontract_2=[2, 3], &
904 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
905 : bounds_2=bounds_i, &
906 : bounds_3=bounds_j, &
907 : unit_nr=unit_nr_dbcsr, &
908 738 : flop=nflop)
909 :
910 738 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
911 738 : CALL timestop(handle2)
912 :
913 738 : CALL timeset(routineN//"_copy_2", handle2)
914 738 : CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
915 :
916 738 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
917 738 : nze_O = nze_O + nze
918 :
919 : CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
920 738 : ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
921 :
922 3198 : CALL timestop(handle2)
923 : END DO
924 328 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
925 : END DO
926 82 : CALL dbt_batched_contract_finalize(t_3c_2)
927 82 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
928 :
929 82 : CALL para_env%sum(memory_3c)
930 82 : compression_factor = REAL(nze_O, dp)*1.0E-06*8.0_dp/memory_3c
931 :
932 82 : IF (unit_nr > 0) THEN
933 : WRITE (UNIT=unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
934 20 : "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
935 :
936 : WRITE (UNIT=unit_nr, FMT="((T3,A,T60,F21.2))") &
937 20 : "MEMORY_INFO| Compression factor: ", compression_factor
938 : END IF
939 :
940 82 : CALL dbt_clear(ri_data%t_2c_int(1, 1))
941 82 : CALL dbt_destroy(t_3c_2)
942 :
943 82 : CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.TRUE.)
944 :
945 82 : CALL timestop(handle)
946 738 : END SUBROUTINE
947 :
948 : ! **************************************************************************************************
949 : !> \brief Sorts 2d indices w.r.t. rows and columns
950 : !> \param blk_ind ...
951 : ! **************************************************************************************************
952 0 : SUBROUTINE sort_unique_blkind_2d(blk_ind)
953 : INTEGER, ALLOCATABLE, DIMENSION(:, :), &
954 : INTENT(INOUT) :: blk_ind
955 :
956 : INTEGER :: end_ind, iblk, iblk_all, irow, nblk, &
957 : ncols, start_ind
958 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ind_1, ind_2, sort_1, sort_2
959 0 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blk_ind_tmp
960 :
961 0 : nblk = SIZE(blk_ind, 1)
962 :
963 0 : ALLOCATE (sort_1(nblk))
964 0 : ALLOCATE (ind_1(nblk))
965 :
966 0 : sort_1(:) = blk_ind(:, 1)
967 0 : CALL sort(sort_1, nblk, ind_1)
968 :
969 0 : blk_ind(:, :) = blk_ind(ind_1, :)
970 :
971 0 : start_ind = 1
972 :
973 0 : DO WHILE (start_ind <= nblk)
974 0 : irow = blk_ind(start_ind, 1)
975 0 : end_ind = start_ind
976 :
977 0 : IF (end_ind + 1 <= nblk) THEN
978 0 : DO WHILE (blk_ind(end_ind + 1, 1) == irow)
979 0 : end_ind = end_ind + 1
980 0 : IF (end_ind + 1 > nblk) EXIT
981 : END DO
982 : END IF
983 :
984 0 : ncols = end_ind - start_ind + 1
985 0 : ALLOCATE (sort_2(ncols))
986 0 : ALLOCATE (ind_2(ncols))
987 0 : sort_2(:) = blk_ind(start_ind:end_ind, 2)
988 0 : CALL sort(sort_2, ncols, ind_2)
989 0 : ind_2 = ind_2 + start_ind - 1
990 :
991 0 : blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
992 0 : start_ind = end_ind + 1
993 :
994 0 : DEALLOCATE (sort_2, ind_2)
995 : END DO
996 :
997 0 : ALLOCATE (blk_ind_tmp(nblk, 2))
998 0 : blk_ind_tmp = 0
999 :
1000 : iblk = 0
1001 0 : DO iblk_all = 1, nblk
1002 0 : IF (iblk >= 1) THEN
1003 0 : IF (ALL(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
1004 : CYCLE
1005 : END IF
1006 : END IF
1007 0 : iblk = iblk + 1
1008 0 : blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
1009 : END DO
1010 0 : nblk = iblk
1011 :
1012 0 : DEALLOCATE (blk_ind)
1013 0 : ALLOCATE (blk_ind(nblk, 2))
1014 :
1015 0 : blk_ind(:, :) = blk_ind_tmp(:nblk, :)
1016 :
1017 0 : END SUBROUTINE
1018 :
1019 : ! **************************************************************************************************
1020 : !> \brief ...
1021 : !> \param qs_env ...
1022 : !> \param ri_data ...
1023 : !> \param ks_matrix ...
1024 : !> \param ehfx ...
1025 : !> \param mos ...
1026 : !> \param rho_ao ...
1027 : !> \param geometry_did_change ...
1028 : !> \param nspins ...
1029 : !> \param hf_fraction ...
1030 : ! **************************************************************************************************
1031 1744 : SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
1032 : geometry_did_change, nspins, hf_fraction)
1033 : TYPE(qs_environment_type), POINTER :: qs_env
1034 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1035 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
1036 : REAL(KIND=dp), INTENT(OUT) :: ehfx
1037 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
1038 : OPTIONAL :: mos
1039 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
1040 : LOGICAL, INTENT(IN) :: geometry_did_change
1041 : INTEGER, INTENT(IN) :: nspins
1042 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
1043 :
1044 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks'
1045 :
1046 : CHARACTER(1) :: mtype
1047 : INTEGER :: handle, handle2, i, ispin, j
1048 : INTEGER(int_8) :: nblks
1049 : INTEGER, DIMENSION(2) :: homo
1050 : LOGICAL :: is_antisymmetric
1051 : REAL(dp) :: etmp, fac
1052 1744 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
1053 : TYPE(cp_fm_type), POINTER :: mo_coeff
1054 1744 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: my_ks_matrix, my_rho_ao
1055 5232 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
1056 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
1057 : TYPE(mp_para_env_type), POINTER :: para_env
1058 :
1059 1744 : CALL timeset(routineN, handle)
1060 :
1061 1744 : IF (nspins == 1) THEN
1062 1380 : fac = 0.5_dp*hf_fraction
1063 : ELSE
1064 364 : fac = 1.0_dp*hf_fraction
1065 : END IF
1066 :
1067 : !If incoming assymetric matrices, need to convert to normal
1068 1744 : NULLIFY (my_ks_matrix, my_rho_ao)
1069 1744 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
1070 1744 : is_antisymmetric = mtype == dbcsr_type_antisymmetric
1071 1744 : IF (is_antisymmetric) THEN
1072 1104 : ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
1073 1104 : ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
1074 :
1075 368 : DO i = 1, SIZE(ks_matrix, 1)
1076 552 : DO j = 1, SIZE(ks_matrix, 2)
1077 184 : ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
1078 : CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
1079 184 : matrix_type=dbcsr_type_no_symmetry)
1080 184 : CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
1081 : CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
1082 184 : matrix_type=dbcsr_type_no_symmetry)
1083 368 : CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
1084 : END DO
1085 : END DO
1086 : ELSE
1087 1560 : my_ks_matrix => ks_matrix
1088 1560 : my_rho_ao => rho_ao
1089 : END IF
1090 :
1091 : !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
1092 : ! the current flavor is not yet RHO. We switch back to MO if there are
1093 : ! MOs available and the current flavor is actually RHO
1094 1744 : IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
1095 12 : CALL switch_ri_flavor(ri_data, qs_env)
1096 1732 : ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
1097 10 : CALL switch_ri_flavor(ri_data, qs_env)
1098 : END IF
1099 :
1100 1972 : SELECT CASE (ri_data%flavor)
1101 : CASE (ri_mo)
1102 228 : CPASSERT(PRESENT(mos))
1103 228 : CALL timeset(routineN//"_MO", handle2)
1104 :
1105 550 : DO ispin = 1, nspins
1106 322 : NULLIFY (mo_coeff_b_tmp)
1107 322 : CPASSERT(mos(ispin)%uniform_occupation)
1108 322 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
1109 :
1110 322 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
1111 550 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
1112 : END DO
1113 :
1114 550 : DO ispin = 1, nspins
1115 322 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
1116 550 : homo(ispin) = mos(ispin)%homo
1117 : END DO
1118 :
1119 228 : CALL timestop(handle2)
1120 :
1121 : CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
1122 228 : geometry_did_change, nspins, fac)
1123 : CASE (ri_pmat)
1124 :
1125 1516 : NULLIFY (para_env)
1126 1516 : CALL get_qs_env(qs_env, para_env=para_env)
1127 3302 : DO ispin = 1, SIZE(my_rho_ao, 1)
1128 1786 : nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
1129 1786 : CALL para_env%sum(nblks)
1130 3302 : IF (nblks == 0) THEN
1131 0 : CPABORT("received empty density matrix")
1132 : END IF
1133 : END DO
1134 :
1135 : CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
1136 3488 : geometry_did_change, nspins, fac)
1137 :
1138 : END SELECT
1139 :
1140 3852 : DO ispin = 1, nspins
1141 3852 : CALL dbcsr_release(mo_coeff_b(ispin))
1142 : END DO
1143 :
1144 3852 : DO ispin = 1, nspins
1145 3852 : CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
1146 : END DO
1147 :
1148 1744 : CALL timeset(routineN//"_energy", handle2)
1149 : ! Calculate the exchange energy
1150 1744 : ehfx = 0.0_dp
1151 3852 : DO ispin = 1, nspins
1152 : CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
1153 2108 : etmp)
1154 3852 : ehfx = ehfx + 0.5_dp*etmp
1155 :
1156 : END DO
1157 1744 : CALL timestop(handle2)
1158 :
1159 : !Anti-symmetric case
1160 1744 : IF (is_antisymmetric) THEN
1161 368 : DO i = 1, SIZE(ks_matrix, 1)
1162 552 : DO j = 1, SIZE(ks_matrix, 2)
1163 184 : CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
1164 368 : CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
1165 : END DO
1166 : END DO
1167 184 : CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
1168 184 : CALL dbcsr_deallocate_matrix_set(my_rho_ao)
1169 : END IF
1170 :
1171 1744 : CALL timestop(handle)
1172 1744 : END SUBROUTINE
1173 :
1174 : ! **************************************************************************************************
1175 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
1176 : !>
1177 : !> C(mu, i) (MO coefficients)
1178 : !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
1179 : !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
1180 : !> \param qs_env ...
1181 : !> \param ri_data ...
1182 : !> \param ks_matrix ...
1183 : !> \param mo_coeff C(mu, i)
1184 : !> \param homo ...
1185 : !> \param geometry_did_change ...
1186 : !> \param nspins ...
1187 : !> \param fac ...
1188 : ! **************************************************************************************************
1189 228 : SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
1190 228 : homo, geometry_did_change, nspins, fac)
1191 : TYPE(qs_environment_type), POINTER :: qs_env
1192 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1193 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix
1194 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1195 : INTEGER, DIMENSION(:) :: homo
1196 : LOGICAL, INTENT(IN) :: geometry_did_change
1197 : INTEGER, INTENT(IN) :: nspins
1198 : REAL(dp), INTENT(IN) :: fac
1199 :
1200 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_mo'
1201 :
1202 : INTEGER :: bsize, bsum, comm_2d_handle, handle, &
1203 : handle2, i_mem, iblock, iproc, ispin, &
1204 : n_mem, n_mos, nblock, unit_nr_dbcsr
1205 : INTEGER(int_8) :: nblks, nflop
1206 228 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
1207 228 : mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
1208 228 : mem_start_block_2, mo_bsizes_1, mo_bsizes_2
1209 228 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
1210 : INTEGER, DIMENSION(2) :: pdims_2d
1211 : INTEGER, DIMENSION(3) :: pdims
1212 : LOGICAL :: do_initialize
1213 : REAL(dp) :: t1, t2
1214 : TYPE(dbcsr_distribution_type) :: ks_dist
1215 1140 : TYPE(dbt_pgrid_type) :: pgrid, pgrid_2d
1216 5700 : TYPE(dbt_type) :: ks_t, ks_t_mat, mo_coeff_t, &
1217 2052 : mo_coeff_t_split
1218 228 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_mo_1, t_3c_int_mo_2
1219 : TYPE(mp_comm_type) :: comm_2d
1220 : TYPE(mp_para_env_type), POINTER :: para_env
1221 :
1222 228 : CALL timeset(routineN, handle)
1223 :
1224 228 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1225 :
1226 228 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1227 :
1228 228 : IF (geometry_did_change) THEN
1229 16 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
1230 : END IF
1231 :
1232 228 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
1233 228 : IF (nblks == 0) THEN
1234 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1235 : END IF
1236 :
1237 550 : DO ispin = 1, nspins
1238 322 : nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
1239 550 : IF (nblks == 0) THEN
1240 0 : CPABORT("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1241 : END IF
1242 : END DO
1243 :
1244 228 : IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
1245 18 : do_initialize = .TRUE.
1246 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
1247 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
1248 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
1249 256 : ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
1250 238 : ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
1251 238 : ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
1252 238 : ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
1253 : ELSE
1254 : do_initialize = .FALSE.
1255 : END IF
1256 :
1257 228 : CALL get_qs_env(qs_env, para_env=para_env)
1258 :
1259 228 : ALLOCATE (bounds(2, 1))
1260 :
1261 228 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
1262 228 : CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
1263 :
1264 228 : CALL comm_2d%set_handle(comm_2d_handle)
1265 228 : pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
1266 :
1267 : CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
1268 228 : name="(AO | AO)")
1269 :
1270 228 : DEALLOCATE (dist1, dist2)
1271 :
1272 228 : CALL para_env%sync()
1273 228 : t1 = m_walltime()
1274 :
1275 5244 : ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
1276 550 : DO ispin = 1, nspins
1277 :
1278 322 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1279 966 : ALLOCATE (mo_bsizes_2(n_mos))
1280 2020 : mo_bsizes_2 = 1
1281 :
1282 : CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
1283 322 : mem_start_block_2, mem_end_block_2)
1284 322 : n_mem = ri_data%n_mem
1285 966 : ALLOCATE (mem_size(n_mem))
1286 :
1287 1542 : DO i_mem = 1, n_mem
1288 2918 : bsize = SUM(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
1289 1542 : mem_size(i_mem) = bsize
1290 : END DO
1291 :
1292 322 : CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
1293 644 : ALLOCATE (mem_start_block_1(n_mem))
1294 644 : ALLOCATE (mem_end_block_1(n_mem))
1295 322 : nblock = SIZE(mo_bsizes_1)
1296 322 : iblock = 0
1297 1542 : DO i_mem = 1, n_mem
1298 : bsum = 0
1299 322 : DO
1300 1220 : iblock = iblock + 1
1301 1220 : CPASSERT(iblock <= nblock)
1302 1220 : bsum = bsum + mo_bsizes_1(iblock)
1303 1220 : IF (bsum == mem_size(i_mem)) THEN
1304 1220 : IF (i_mem == 1) THEN
1305 322 : mem_start_block_1(i_mem) = 1
1306 : ELSE
1307 898 : mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
1308 : END IF
1309 1220 : mem_end_block_1(i_mem) = iblock
1310 : EXIT
1311 : END IF
1312 : END DO
1313 : END DO
1314 :
1315 966 : ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
1316 1542 : batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
1317 322 : batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
1318 :
1319 644 : ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
1320 1542 : batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
1321 322 : batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
1322 :
1323 322 : iproc = para_env%mepos
1324 :
1325 : CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
1326 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
1327 : [1, 2], [3], &
1328 322 : name="(AO RI | MO)")
1329 :
1330 322 : DEALLOCATE (dist1, dist2, dist3)
1331 :
1332 : CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
1333 : mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1334 : [1], [2, 3], &
1335 322 : name="(MO | RI AO)")
1336 :
1337 322 : DEALLOCATE (dist1, dist2, dist3)
1338 :
1339 : CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
1340 : name="(AO | MO)")
1341 :
1342 322 : DEALLOCATE (dist1, dist2)
1343 :
1344 322 : CPASSERT(homo(ispin)/ri_data%n_mem > 0)
1345 :
1346 322 : IF (do_initialize) THEN
1347 22 : pdims(:) = 0
1348 :
1349 : CALL dbt_pgrid_create(para_env, pdims, pgrid, &
1350 : tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
1351 : (homo(ispin) - 1)/ri_data%n_mem + 1, &
1352 88 : SIZE(ri_data%bsizes_AO_fit)])
1353 : CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1354 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1355 : [1], [2, 3], &
1356 22 : name="(RI | MO AO)")
1357 :
1358 22 : DEALLOCATE (dist1, dist2, dist3)
1359 :
1360 : CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1361 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1362 : [1, 2], [3], &
1363 22 : name="(RI MO | AO)")
1364 22 : DEALLOCATE (dist1, dist2, dist3)
1365 22 : CALL dbt_pgrid_destroy(pgrid)
1366 :
1367 22 : CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
1368 22 : CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1369 : END IF
1370 :
1371 322 : CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
1372 322 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
1373 322 : CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.TRUE.)
1374 322 : CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
1375 322 : CALL dbt_destroy(mo_coeff_t)
1376 :
1377 322 : CALL dbt_batched_contract_init(ks_t)
1378 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
1379 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
1380 :
1381 322 : CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
1382 322 : CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
1383 322 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
1384 :
1385 1542 : DO i_mem = 1, n_mem
1386 :
1387 3660 : bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
1388 :
1389 1220 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1390 1220 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
1391 : CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
1392 1220 : batch_range_3=batch_ranges_1)
1393 1220 : CALL timeset(routineN//"_MOx3C_R", handle2)
1394 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
1395 : 0.0_dp, t_3c_int_mo_1(1, 1), &
1396 : contract_1=[1], notcontract_1=[2], &
1397 : contract_2=[3], notcontract_2=[1, 2], &
1398 : map_1=[3], map_2=[1, 2], &
1399 : bounds_2=bounds, &
1400 : filter_eps=ri_data%filter_eps_mo/2, &
1401 : unit_nr=unit_nr_dbcsr, &
1402 : move_data=.FALSE., &
1403 1220 : flop=nflop)
1404 :
1405 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1406 :
1407 1220 : CALL timestop(handle2)
1408 1220 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1409 1220 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1410 1220 : CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
1411 :
1412 1220 : CALL timeset(routineN//"_copy_1", handle2)
1413 1220 : CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.TRUE.)
1414 1220 : CALL timestop(handle2)
1415 :
1416 1220 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1417 1220 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
1418 : CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
1419 1220 : batch_range_1=batch_ranges_1)
1420 :
1421 1220 : CALL timeset(routineN//"_MOx3C_L", handle2)
1422 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
1423 : 0.0_dp, t_3c_int_mo_2(1, 1), &
1424 : contract_1=[1], notcontract_1=[2], &
1425 : contract_2=[1], notcontract_2=[2, 3], &
1426 : map_1=[1], map_2=[2, 3], &
1427 : bounds_2=bounds, &
1428 : filter_eps=ri_data%filter_eps_mo/2, &
1429 : unit_nr=unit_nr_dbcsr, &
1430 : move_data=.FALSE., &
1431 1220 : flop=nflop)
1432 :
1433 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1434 :
1435 1220 : CALL timestop(handle2)
1436 :
1437 1220 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1438 1220 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1439 1220 : CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
1440 :
1441 1220 : CALL timeset(routineN//"_copy_1", handle2)
1442 : CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
1443 1220 : summation=.TRUE., move_data=.TRUE.)
1444 :
1445 1220 : CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
1446 1220 : CALL timestop(handle2)
1447 :
1448 1220 : CALL timeset(routineN//"_RIx3C", handle2)
1449 :
1450 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
1451 : 0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
1452 : contract_1=[1], notcontract_1=[2], &
1453 : contract_2=[1], notcontract_2=[2, 3], &
1454 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
1455 : unit_nr=unit_nr_dbcsr, &
1456 1220 : flop=nflop)
1457 :
1458 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1459 :
1460 1220 : CALL timestop(handle2)
1461 :
1462 1220 : CALL timeset(routineN//"_copy_2", handle2)
1463 :
1464 : ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
1465 1220 : CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.TRUE.)
1466 1220 : CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1467 1220 : CALL timestop(handle2)
1468 :
1469 1220 : CALL timeset(routineN//"_3Cx3C", handle2)
1470 : CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
1471 : 1.0_dp, ks_t, &
1472 : contract_1=[1, 2], notcontract_1=[3], &
1473 : contract_2=[1, 2], notcontract_2=[3], &
1474 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1475 : unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
1476 1220 : flop=nflop)
1477 :
1478 1220 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1479 :
1480 10082 : CALL timestop(handle2)
1481 : END DO
1482 :
1483 322 : CALL dbt_batched_contract_finalize(ks_t)
1484 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
1485 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1486 :
1487 322 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
1488 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
1489 322 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
1490 :
1491 322 : CALL dbt_destroy(t_3c_int_mo_1(1, 1))
1492 322 : CALL dbt_destroy(t_3c_int_mo_2(1, 1))
1493 322 : CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
1494 :
1495 322 : CALL dbt_destroy(mo_coeff_t_split)
1496 :
1497 322 : CALL dbt_filter(ks_t, ri_data%filter_eps)
1498 :
1499 322 : CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
1500 322 : CALL dbt_copy(ks_t, ks_t_mat, move_data=.TRUE.)
1501 322 : CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1502 322 : CALL dbt_destroy(ks_t_mat)
1503 :
1504 0 : DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
1505 1838 : mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
1506 :
1507 : END DO
1508 :
1509 228 : CALL dbt_pgrid_destroy(pgrid_2d)
1510 228 : CALL dbt_destroy(ks_t)
1511 :
1512 228 : CALL para_env%sync()
1513 228 : t2 = m_walltime()
1514 :
1515 228 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1516 :
1517 228 : CALL timestop(handle)
1518 :
1519 2052 : END SUBROUTINE
1520 :
1521 : ! **************************************************************************************************
1522 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
1523 : !>
1524 : !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
1525 : !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
1526 : !> \param qs_env ...
1527 : !> \param ri_data ...
1528 : !> \param ks_matrix ...
1529 : !> \param rho_ao ...
1530 : !> \param geometry_did_change ...
1531 : !> \param nspins ...
1532 : !> \param fac ...
1533 : ! **************************************************************************************************
1534 1516 : SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
1535 : geometry_did_change, nspins, fac)
1536 : TYPE(qs_environment_type), POINTER :: qs_env
1537 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1538 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix, rho_ao
1539 : LOGICAL, INTENT(IN) :: geometry_did_change
1540 : INTEGER, INTENT(IN) :: nspins
1541 : REAL(dp), INTENT(IN) :: fac
1542 :
1543 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_Pmat'
1544 :
1545 : INTEGER :: handle, handle2, i_mem, ispin, j_mem, &
1546 : n_mem, n_mem_RI, unit_nr, unit_nr_dbcsr
1547 : INTEGER(int_8) :: flops_ks_max, flops_p_max, nblks, nflop, &
1548 : nze, nze_3c, nze_3c_1, nze_3c_2, &
1549 : nze_ks, nze_rho
1550 1516 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI, dist1, &
1551 1516 : dist2
1552 : INTEGER, DIMENSION(2, 1) :: bounds_i
1553 : INTEGER, DIMENSION(2, 2) :: bounds_ij, bounds_j
1554 : INTEGER, DIMENSION(3) :: dims_3c
1555 : REAL(dp) :: memory_3c, occ, occ_3c, occ_3c_1, &
1556 : occ_3c_2, occ_ks, occ_rho, t1, t2, &
1557 : unused
1558 37900 : TYPE(dbt_type) :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
1559 19708 : t_3c_3, tensor_old
1560 : TYPE(mp_para_env_type), POINTER :: para_env
1561 :
1562 1516 : IF (.NOT. fac > EPSILON(0.0_dp)) RETURN
1563 :
1564 1516 : CALL timeset(routineN, handle)
1565 :
1566 1516 : NULLIFY (para_env)
1567 :
1568 : ! get a useful output_unit
1569 1516 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1570 1516 : unit_nr = ri_data%unit_nr
1571 :
1572 1516 : CALL get_qs_env(qs_env, para_env=para_env)
1573 :
1574 1516 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1575 :
1576 1516 : IF (geometry_did_change) THEN
1577 70 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
1578 160 : DO ispin = 1, nspins
1579 90 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
1580 160 : CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
1581 : END DO
1582 : END IF
1583 :
1584 1516 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
1585 1516 : IF (nblks == 0) THEN
1586 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1587 : END IF
1588 :
1589 1516 : n_mem = ri_data%n_mem
1590 1516 : n_mem_RI = ri_data%n_mem_RI
1591 :
1592 1516 : CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
1593 1516 : CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
1594 :
1595 : CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
1596 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1597 : name="(AO | AO)")
1598 1516 : DEALLOCATE (dist1, dist2)
1599 :
1600 1516 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
1601 1516 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
1602 :
1603 1516 : CALL para_env%sync()
1604 1516 : t1 = m_walltime()
1605 :
1606 1516 : flops_ks_max = 0; flops_p_max = 0
1607 :
1608 4548 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
1609 4548 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
1610 6064 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
1611 1516 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
1612 6064 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
1613 1516 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
1614 :
1615 1516 : memory_3c = 0.0_dp
1616 3302 : DO ispin = 1, nspins
1617 :
1618 1786 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
1619 :
1620 : nze_rho = 0
1621 : occ_rho = 0.0_dp
1622 1786 : nze_3c_1 = 0
1623 1786 : occ_3c_1 = 0.0_dp
1624 1786 : nze_3c_2 = 0
1625 1786 : occ_3c_2 = 0.0_dp
1626 :
1627 1786 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1628 :
1629 : !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
1630 1786 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
1631 1786 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1632 :
1633 1786 : CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
1634 :
1635 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_AO, &
1636 1786 : batch_range_2=batch_ranges_RI)
1637 1786 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_AO, batch_range_2=batch_ranges_RI)
1638 :
1639 1786 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
1640 :
1641 7144 : DO i_mem = 1, n_mem
1642 :
1643 5358 : CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
1644 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_RI, &
1645 5358 : batch_range_3=batch_ranges_AO)
1646 5358 : CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges_AO)
1647 21432 : DO j_mem = 1, n_mem_RI
1648 :
1649 16074 : CALL timeset(routineN//"_Px3C", handle2)
1650 :
1651 16074 : CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
1652 48222 : bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1653 48222 : bounds_j(:, 1) = [1, dims_3c(1)]
1654 48222 : bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1655 :
1656 : CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
1657 : 0.0_dp, t_3c_1, &
1658 : contract_1=[2], notcontract_1=[1], &
1659 : contract_2=[3], notcontract_2=[1, 2], &
1660 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
1661 : bounds_2=bounds_i, &
1662 : bounds_3=bounds_j, &
1663 : unit_nr=unit_nr_dbcsr, &
1664 16074 : flop=nflop)
1665 :
1666 16074 : CALL timestop(handle2)
1667 :
1668 16074 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1669 :
1670 16074 : CALL get_tensor_occupancy(t_3c_1, nze, occ)
1671 16074 : nze_3c_1 = nze_3c_1 + nze
1672 16074 : occ_3c_1 = occ_3c_1 + occ
1673 :
1674 16074 : CALL timeset(routineN//"_copy_2", handle2)
1675 16074 : CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.TRUE.)
1676 16074 : CALL timestop(handle2)
1677 :
1678 48222 : bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1679 48222 : bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1680 :
1681 : CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
1682 16074 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1683 :
1684 16074 : CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.TRUE.)
1685 :
1686 16074 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
1687 16074 : nze_3c_2 = nze_3c_2 + nze
1688 16074 : occ_3c_2 = occ_3c_2 + occ
1689 16074 : CALL timeset(routineN//"_KS", handle2)
1690 16074 : CALL dbt_batched_contract_init(ks_t)
1691 : CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
1692 : 1.0_dp, ks_t, &
1693 : contract_1=[1, 2], notcontract_1=[3], &
1694 : contract_2=[1, 2], notcontract_2=[3], &
1695 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1696 : bounds_1=bounds_ij, &
1697 : unit_nr=unit_nr_dbcsr, &
1698 16074 : flop=nflop, move_data=.TRUE.)
1699 :
1700 16074 : CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
1701 16074 : CALL timestop(handle2)
1702 :
1703 69654 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1704 :
1705 : END DO
1706 5358 : CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
1707 5358 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1708 7144 : CALL dbt_batched_contract_finalize(t_3c_1)
1709 : END DO
1710 1786 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1711 1786 : CALL dbt_batched_contract_finalize(t_3c_3)
1712 :
1713 7144 : DO i_mem = 1, n_mem
1714 23218 : DO j_mem = 1, n_mem_RI
1715 5358 : ASSOCIATE (blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
1716 : CALL decompress_tensor(tensor_old, blk_indices%ind, &
1717 16074 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1718 16074 : CALL dbt_copy(tensor_old, t_3c, move_data=.TRUE.)
1719 :
1720 16074 : unused = 0
1721 : CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
1722 32148 : ri_data%filter_eps_storage, unused)
1723 : END ASSOCIATE
1724 : END DO
1725 : END DO
1726 :
1727 1786 : CALL dbt_destroy(tensor_old)
1728 :
1729 1786 : CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
1730 :
1731 : !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
1732 1786 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1733 1786 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.TRUE.)
1734 1786 : CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1735 :
1736 1786 : CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
1737 1786 : CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1738 1786 : CALL dbt_clear(ks_tmp)
1739 :
1740 8660 : IF (unit_nr > 0 .AND. geometry_did_change) THEN
1741 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1742 28 : 'Occupancy of density matrix P:', REAL(nze_rho, dp), '/', occ_rho*100, '%'
1743 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1744 28 : 'Occupancy of 3c ints:', REAL(nze_3c, dp), '/', occ_3c*100, '%'
1745 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1746 28 : 'Occupancy after contraction with K:', REAL(nze_3c_2, dp), '/', occ_3c_2*100, '%'
1747 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1748 28 : 'Occupancy after contraction with P:', REAL(nze_3c_1, dp), '/', occ_3c_1*100, '%'
1749 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1750 28 : 'Occupancy of Kohn-Sham matrix:', REAL(nze_ks, dp), '/', occ_ks*100, '%'
1751 : END IF
1752 :
1753 : END DO
1754 :
1755 1516 : CALL para_env%sync()
1756 1516 : t2 = m_walltime()
1757 :
1758 1516 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1759 :
1760 1516 : CALL dbt_destroy(t_3c_1)
1761 1516 : CALL dbt_destroy(t_3c_3)
1762 :
1763 1516 : CALL dbt_destroy(rho_ao_tmp)
1764 1516 : CALL dbt_destroy(ks_t)
1765 1516 : CALL dbt_destroy(ks_tmp)
1766 :
1767 1516 : CALL timestop(handle)
1768 :
1769 4548 : END SUBROUTINE
1770 :
1771 : ! **************************************************************************************************
1772 : !> \brief Implementation based on the MO flavor
1773 : !> \param qs_env ...
1774 : !> \param ri_data ...
1775 : !> \param nspins ...
1776 : !> \param hf_fraction ...
1777 : !> \param mo_coeff ...
1778 : !> \param use_virial ...
1779 : !> \note There is no response code for forces with the MO flavor
1780 : ! **************************************************************************************************
1781 14 : SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
1782 :
1783 : TYPE(qs_environment_type), POINTER :: qs_env
1784 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1785 : INTEGER, INTENT(IN) :: nspins
1786 : REAL(dp), INTENT(IN) :: hf_fraction
1787 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1788 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial
1789 :
1790 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_mo'
1791 :
1792 : INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
1793 : n_mem_input_RI, n_mem_RI, n_mem_RI_fit, n_mos, natom, nkind, unit_nr_dbcsr
1794 : INTEGER(int_8) :: nflop
1795 14 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
1796 14 : batch_end, batch_end_RI, batch_end_RI_fit, batch_ranges, batch_ranges_RI, &
1797 14 : batch_ranges_RI_fit, batch_start, batch_start_RI, batch_start_RI_fit, bsizes_MO, dist1, &
1798 14 : dist2, dist3, idx_to_at_AO, idx_to_at_RI, kind_of
1799 : INTEGER, DIMENSION(2, 1) :: bounds_ctr_1d
1800 : INTEGER, DIMENSION(2, 2) :: bounds_ctr_2d
1801 : INTEGER, DIMENSION(3) :: pdims
1802 : LOGICAL :: use_virial_prv
1803 : REAL(dp) :: pref, spin_fac, t1, t2
1804 14 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1805 14 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
1806 : TYPE(cell_type), POINTER :: cell
1807 14 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
1808 70 : TYPE(dbt_pgrid_type) :: pgrid_1, pgrid_2
1809 602 : TYPE(dbt_type) :: t_2c_RI, t_2c_RI_inv, t_2c_RI_met, t_2c_RI_PQ, t_2c_tmp, t_3c_0, t_3c_1, &
1810 700 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
1811 434 : t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_RI_ctr, t_3c_ri_mo_mo, &
1812 350 : t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
1813 14 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, t_2c_MO_AO, &
1814 28 : t_2c_MO_AO_ctr, t_3c_der_AO, t_3c_der_AO_ctr_1, t_3c_der_RI, t_3c_der_RI_ctr_1, &
1815 14 : t_3c_der_RI_ctr_2
1816 : TYPE(dft_control_type), POINTER :: dft_control
1817 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
1818 14 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
1819 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
1820 : TYPE(hfx_compression_type), ALLOCATABLE, &
1821 14 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
1822 : TYPE(mp_para_env_type), POINTER :: para_env
1823 14 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1824 14 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1825 14 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1826 :
1827 : ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
1828 : ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
1829 : ! 3) Contract all 3c integrals and derivatives with MO coeffs
1830 : ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
1831 : ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
1832 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R)
1833 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
1834 : ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
1835 :
1836 14 : use_virial_prv = .FALSE.
1837 14 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
1838 14 : IF (use_virial_prv) THEN
1839 0 : CPABORT("Stress tensor with RI-HFX MO flavor not implemented.")
1840 : END IF
1841 :
1842 14 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1843 :
1844 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
1845 : atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
1846 : matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
1847 14 : qs_kind_set=qs_kind_set)
1848 :
1849 14 : pdims(:) = 0
1850 : CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
1851 : SIZE(ri_data%bsizes_RI_split), &
1852 56 : SIZE(ri_data%bsizes_AO_split)])
1853 14 : pdims(:) = 0
1854 : CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
1855 : SIZE(ri_data%bsizes_AO_split), &
1856 56 : SIZE(ri_data%bsizes_AO_split)])
1857 :
1858 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
1859 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1860 14 : [1, 2], [3], name="(AO RI | AO)")
1861 14 : DEALLOCATE (dist1, dist2, dist3)
1862 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
1863 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1864 14 : [1], [2, 3], name="(RI | AO AO)")
1865 14 : DEALLOCATE (dist1, dist2, dist3)
1866 :
1867 104 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
1868 14 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
1869 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
1870 14 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
1871 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
1872 :
1873 38 : DO ibasis = 1, SIZE(basis_set_AO)
1874 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1875 24 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
1876 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1877 38 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
1878 : END DO
1879 :
1880 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_2c_MO_AO(3), t_2c_MO_AO_ctr(3), t_3c_der_AO(3), &
1881 1148 : t_3c_der_AO_ctr_1(3), t_3c_der_RI(3), t_3c_der_RI_ctr_1(3), t_3c_der_RI_ctr_2(3))
1882 :
1883 : ! 1) Precompute the derivatives
1884 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
1885 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
1886 14 : basis_set_AO, basis_set_RI, ri_data, qs_env)
1887 :
1888 38 : DO ibasis = 1, SIZE(basis_set_AO)
1889 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1890 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1891 24 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
1892 38 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
1893 : END DO
1894 :
1895 14 : n_mem = SIZE(t_3c_der_RI_comp, 1)
1896 56 : DO i_xyz = 1, 3
1897 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_RI(i_xyz))
1898 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_AO(i_xyz))
1899 :
1900 194 : DO i_mem = 1, n_mem
1901 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
1902 138 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1903 138 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1904 :
1905 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
1906 138 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1907 180 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1908 : END DO
1909 : END DO
1910 :
1911 56 : DO i_xyz = 1, 3
1912 194 : DO i_mem = 1, n_mem
1913 138 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
1914 180 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
1915 : END DO
1916 : END DO
1917 290 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
1918 :
1919 : ! Get the 3c integrals (desymmetrized)
1920 14 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
1921 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
1922 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
1923 14 : summation=.TRUE., move_data=.TRUE.)
1924 :
1925 14 : CALL dbt_destroy(t_3c_ao_ri_ao)
1926 14 : CALL dbt_destroy(t_3c_ri_ao_ao)
1927 :
1928 : ! Some utilities
1929 14 : spin_fac = 0.5_dp
1930 14 : IF (nspins == 2) spin_fac = 1.0_dp
1931 :
1932 42 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
1933 14 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
1934 :
1935 42 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
1936 14 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
1937 :
1938 14 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
1939 :
1940 : ! 2-center RI tensors
1941 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
1942 14 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
1943 14 : DEALLOCATE (dist1, dist2)
1944 :
1945 : CALL create_2c_tensor(t_2c_RI_PQ, dist1, dist2, ri_data%pgrid_2d, &
1946 : ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
1947 14 : DEALLOCATE (dist1, dist2)
1948 :
1949 14 : IF (.NOT. ri_data%same_op) THEN
1950 : !precompute the (P|Q)*S^-1 product
1951 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_inv)
1952 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_met)
1953 4 : CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
1954 :
1955 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
1956 : 0.0_dp, t_2c_tmp, &
1957 : contract_1=[2], notcontract_1=[1], &
1958 : contract_2=[1], notcontract_2=[2], &
1959 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
1960 4 : unit_nr=unit_nr_dbcsr, flop=nflop)
1961 4 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1962 :
1963 4 : CALL dbt_copy(t_2c_tmp, t_2c_RI_inv, move_data=.TRUE.)
1964 4 : CALL dbt_destroy(t_2c_tmp)
1965 : END IF
1966 :
1967 : !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
1968 : !No need to cut memory further because SCF tensors alrady dense
1969 14 : n_mem_input = FLOOR((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
1970 14 : n_mem_input_RI = FLOOR((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
1971 :
1972 : !batches on RI_split and RI_fit blocks
1973 14 : n_mem_RI = n_mem_input_RI
1974 : CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_RI, batch_start_RI, batch_end_RI, &
1975 14 : batch_blk_start, batch_blk_end)
1976 42 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
1977 28 : batch_ranges_RI(1:n_mem_RI) = batch_blk_start(1:n_mem_RI)
1978 14 : batch_ranges_RI(n_mem_RI + 1) = batch_blk_end(n_mem_RI) + 1
1979 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1980 :
1981 14 : n_mem_RI_fit = n_mem_input_RI
1982 : CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_RI_fit, batch_start_RI_fit, batch_end_RI_fit, &
1983 14 : batch_blk_start, batch_blk_end)
1984 42 : ALLOCATE (batch_ranges_RI_fit(n_mem_RI_fit + 1))
1985 28 : batch_ranges_RI_fit(1:n_mem_RI_fit) = batch_blk_start(1:n_mem_RI_fit)
1986 14 : batch_ranges_RI_fit(n_mem_RI_fit + 1) = batch_blk_end(n_mem_RI_fit) + 1
1987 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1988 :
1989 32 : DO ispin = 1, nspins
1990 :
1991 : ! 2 )Prepare the batches for this spin
1992 18 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1993 : !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
1994 36 : CALL split_block_sizes([n_mos], bsizes_MO, max_size=FLOOR(SQRT(ri_data%max_bsize_MO - 0.1)) + 1)
1995 :
1996 : !batching on MO blocks
1997 18 : n_mem = n_mem_input
1998 : CALL create_tensor_batches(bsizes_MO, n_mem, batch_start, batch_end, &
1999 18 : batch_blk_start, batch_blk_end)
2000 54 : ALLOCATE (batch_ranges(n_mem + 1))
2001 40 : batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
2002 18 : batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
2003 18 : DEALLOCATE (batch_blk_start, batch_blk_end)
2004 :
2005 : ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
2006 : CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_MO, &
2007 18 : ri_data%bsizes_AO_split, name="MO coeffs")
2008 18 : DEALLOCATE (dist1, dist2)
2009 18 : CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
2010 18 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
2011 18 : CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.TRUE.)
2012 18 : CALL dbt_destroy(t_2c_tmp)
2013 :
2014 18 : CALL dbt_create(t_mo_coeff, t_mo_cpy)
2015 18 : CALL dbt_copy(t_mo_coeff, t_mo_cpy)
2016 72 : DO i_xyz = 1, 3
2017 54 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO_ctr(i_xyz))
2018 72 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO(i_xyz))
2019 : END DO
2020 :
2021 : CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
2022 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(AO RI| MO)")
2023 18 : DEALLOCATE (dist1, dist2, dist3)
2024 :
2025 18 : CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
2026 18 : CALL dbt_destroy(t_3c_ao_ri_mo)
2027 :
2028 : CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_MO, ri_data%bsizes_RI_split, &
2029 18 : ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
2030 18 : DEALLOCATE (dist1, dist2, dist3)
2031 18 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
2032 :
2033 72 : DO i_xyz = 1, 3
2034 54 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_RI_ctr_1(i_xyz))
2035 72 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_AO_ctr_1(i_xyz))
2036 : END DO
2037 :
2038 : CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_MO, &
2039 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(MO RI | MO)")
2040 18 : DEALLOCATE (dist1, dist2, dist3)
2041 18 : CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
2042 :
2043 : CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
2044 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2045 18 : DEALLOCATE (dist1, dist2, dist3)
2046 :
2047 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
2048 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
2049 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_RI_ctr)
2050 72 : DO i_xyz = 1, 3
2051 72 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_RI_ctr_2(i_xyz))
2052 : END DO
2053 :
2054 : !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
2055 18 : pdims(:) = 0
2056 : CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
2057 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2058 18 : DEALLOCATE (dist1, dist2, dist3)
2059 :
2060 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
2061 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
2062 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
2063 :
2064 18 : CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_RI)
2065 18 : CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges)
2066 :
2067 72 : DO i_xyz = 1, 3
2068 54 : CALL dbt_batched_contract_init(t_3c_der_AO(i_xyz), batch_range_2=batch_ranges_RI)
2069 72 : CALL dbt_batched_contract_init(t_3c_der_RI(i_xyz), batch_range_2=batch_ranges_RI)
2070 : END DO
2071 :
2072 18 : CALL para_env%sync()
2073 18 : t1 = m_walltime()
2074 :
2075 : ! 2) Loop over batches
2076 40 : DO i_mem = 1, n_mem
2077 :
2078 22 : bounds_ctr_1d(1, 1) = batch_start(i_mem)
2079 22 : bounds_ctr_1d(2, 1) = batch_end(i_mem)
2080 :
2081 22 : bounds_ctr_2d(1, 1) = 1
2082 96 : bounds_ctr_2d(2, 1) = SUM(ri_data%bsizes_AO)
2083 :
2084 : ! 3) Do the first AO to MO contraction here
2085 22 : CALL timeset(routineN//"_AO2MO_1", handle)
2086 22 : CALL dbt_batched_contract_init(t_mo_coeff)
2087 44 : DO k_mem = 1, n_mem_RI
2088 22 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2089 22 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2090 :
2091 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
2092 : 1.0_dp, t_3c_0, &
2093 : contract_1=[2], notcontract_1=[1], &
2094 : contract_2=[3], notcontract_2=[1, 2], &
2095 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2096 : bounds_2=bounds_ctr_1d, &
2097 : bounds_3=bounds_ctr_2d, &
2098 22 : unit_nr=unit_nr_dbcsr, flop=nflop)
2099 44 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2100 : END DO
2101 22 : CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.TRUE.)
2102 :
2103 88 : DO i_xyz = 1, 3
2104 132 : DO k_mem = 1, n_mem_RI
2105 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2106 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2107 :
2108 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_AO(i_xyz), &
2109 : 1.0_dp, t_3c_0, &
2110 : contract_1=[2], notcontract_1=[1], &
2111 : contract_2=[3], notcontract_2=[1, 2], &
2112 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2113 : bounds_2=bounds_ctr_1d, &
2114 : bounds_3=bounds_ctr_2d, &
2115 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2116 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2117 : END DO
2118 66 : CALL dbt_copy(t_3c_0, t_3c_der_AO_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2119 :
2120 132 : DO k_mem = 1, n_mem_RI
2121 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2122 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2123 :
2124 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI(i_xyz), &
2125 : 1.0_dp, t_3c_0, &
2126 : contract_1=[2], notcontract_1=[1], &
2127 : contract_2=[3], notcontract_2=[1, 2], &
2128 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2129 : bounds_2=bounds_ctr_1d, &
2130 : bounds_3=bounds_ctr_2d, &
2131 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2132 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2133 : END DO
2134 88 : CALL dbt_copy(t_3c_0, t_3c_der_RI_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2135 : END DO
2136 22 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2137 22 : CALL timestop(handle)
2138 :
2139 22 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI)
2140 : CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI, &
2141 22 : batch_range_3=batch_ranges)
2142 22 : CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2143 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2144 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2145 :
2146 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI_fit, &
2147 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2148 22 : CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2149 :
2150 88 : DO i_xyz = 1, 3
2151 : CALL dbt_batched_contract_init(t_3c_der_RI_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2152 66 : batch_range_2=batch_ranges_RI)
2153 : CALL dbt_batched_contract_init(t_3c_der_AO_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2154 88 : batch_range_2=batch_ranges_RI)
2155 :
2156 : END DO
2157 :
2158 22 : IF (.NOT. ri_data%same_op) THEN
2159 8 : CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2160 : END IF
2161 :
2162 52 : DO j_mem = 1, n_mem
2163 :
2164 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2165 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2166 :
2167 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2168 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2169 :
2170 : ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
2171 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2172 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2173 60 : DO k_mem = 1, n_mem_RI
2174 30 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2175 30 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2176 :
2177 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
2178 : 1.0_dp, t_3c_work, &
2179 : contract_1=[2], notcontract_1=[1], &
2180 : contract_2=[3], notcontract_2=[1, 2], &
2181 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2182 : bounds_2=bounds_ctr_1d, &
2183 : bounds_3=bounds_ctr_2d, &
2184 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2185 60 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2186 : END DO
2187 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2188 30 : CALL timestop(handle)
2189 :
2190 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2191 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2192 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2193 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2194 :
2195 : ! 4) Contract 3c MO integrals with S^-1 as well
2196 30 : CALL timeset(routineN//"_2c_inv", handle)
2197 30 : CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2198 60 : DO k_mem = 1, n_mem_RI
2199 30 : bounds_ctr_1d(1, 1) = batch_start_RI(k_mem)
2200 30 : bounds_ctr_1d(2, 1) = batch_end_RI(k_mem)
2201 :
2202 30 : CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
2203 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
2204 : 1.0_dp, t_3c_2, &
2205 : contract_1=[2], notcontract_1=[1], &
2206 : contract_2=[1], notcontract_2=[2, 3], &
2207 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2208 : bounds_1=bounds_ctr_1d, &
2209 : bounds_3=bounds_ctr_2d, &
2210 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2211 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2212 60 : CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
2213 : END DO
2214 30 : CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
2215 30 : CALL timestop(handle)
2216 :
2217 : !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
2218 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2219 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2220 :
2221 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2222 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2223 :
2224 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2225 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2226 120 : DO i_xyz = 1, 3
2227 180 : DO k_mem = 1, n_mem_RI
2228 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2229 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2230 :
2231 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI_ctr_1(i_xyz), &
2232 : 1.0_dp, t_3c_work, &
2233 : contract_1=[2], notcontract_1=[1], &
2234 : contract_2=[3], notcontract_2=[1, 2], &
2235 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2236 : bounds_2=bounds_ctr_1d, &
2237 : bounds_3=bounds_ctr_2d, &
2238 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2239 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2240 : END DO
2241 120 : CALL dbt_copy(t_3c_work, t_3c_der_RI_ctr_2(i_xyz), order=[2, 1, 3], move_data=.TRUE.)
2242 : END DO
2243 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2244 30 : CALL timestop(handle)
2245 :
2246 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2247 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2248 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2249 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2250 :
2251 : ! 5) Force due to d/dx (P|Q)
2252 30 : CALL timeset(routineN//"_PQ_der", handle)
2253 30 : CALL dbt_copy(t_3c_2, t_3c_4, move_data=.TRUE.)
2254 30 : CALL dbt_copy(t_3c_4, t_3c_5)
2255 60 : DO k_mem = 1, n_mem_RI_fit
2256 30 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2257 30 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2258 :
2259 30 : CALL dbt_batched_contract_init(t_2c_RI_PQ)
2260 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2261 : 1.0_dp, t_2c_RI_PQ, &
2262 : contract_1=[2, 3], notcontract_1=[1], &
2263 : contract_2=[2, 3], notcontract_2=[1], &
2264 : bounds_1=bounds_ctr_2d, &
2265 : bounds_2=bounds_ctr_1d, &
2266 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2267 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2268 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2269 60 : CALL dbt_batched_contract_finalize(t_2c_RI_PQ)
2270 : END DO
2271 30 : CALL timestop(handle)
2272 :
2273 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R) (not on the derivatives)
2274 30 : IF (.NOT. ri_data%same_op) THEN
2275 16 : CALL timeset(routineN//"_metric", handle)
2276 32 : DO k_mem = 1, n_mem_RI_fit
2277 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2278 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2279 :
2280 16 : CALL dbt_batched_contract_init(t_2c_RI_inv)
2281 : CALL dbt_contract(1.0_dp, t_2c_RI_inv, t_3c_4, &
2282 : 1.0_dp, t_3c_6, &
2283 : contract_1=[2], notcontract_1=[1], &
2284 : contract_2=[1], notcontract_2=[2, 3], &
2285 : bounds_1=bounds_ctr_1d, &
2286 : bounds_3=bounds_ctr_2d, &
2287 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2288 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2289 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2290 32 : CALL dbt_batched_contract_finalize(t_2c_RI_inv)
2291 : END DO
2292 16 : CALL dbt_copy(t_3c_6, t_3c_4, move_data=.TRUE.)
2293 :
2294 : ! 8) and get the force due to d/dx S^-1
2295 32 : DO k_mem = 1, n_mem_RI_fit
2296 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2297 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2298 :
2299 16 : CALL dbt_batched_contract_init(t_2c_RI_met)
2300 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2301 : 1.0_dp, t_2c_RI_met, &
2302 : contract_1=[2, 3], notcontract_1=[1], &
2303 : contract_2=[2, 3], notcontract_2=[1], &
2304 : bounds_1=bounds_ctr_2d, &
2305 : bounds_2=bounds_ctr_1d, &
2306 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2307 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2308 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2309 32 : CALL dbt_batched_contract_finalize(t_2c_RI_met)
2310 : END DO
2311 16 : CALL timestop(handle)
2312 : END IF
2313 30 : CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
2314 :
2315 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
2316 :
2317 : ! (ab|P')
2318 30 : CALL timeset(routineN//"_3c_RI", handle)
2319 30 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2320 30 : CALL dbt_copy(t_3c_4, t_3c_RI_ctr, move_data=.TRUE.)
2321 : CALL get_force_from_3c_trace(force, t_3c_RI_ctr, t_3c_der_RI_ctr_2, atom_of_kind, kind_of, &
2322 30 : idx_to_at_RI, pref)
2323 30 : CALL timestop(handle)
2324 :
2325 : ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
2326 : ! which also prevents doing a direct 3-center trace
2327 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2328 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2329 :
2330 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2331 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2332 :
2333 30 : CALL timeset(routineN//"_3c_AO", handle)
2334 30 : CALL dbt_copy(t_3c_RI_ctr, t_3c_work, order=[2, 1, 3], move_data=.TRUE.)
2335 120 : DO i_xyz = 1, 3
2336 :
2337 90 : CALL dbt_batched_contract_init(t_2c_MO_AO_ctr(i_xyz))
2338 180 : DO k_mem = 1, n_mem_RI
2339 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2340 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2341 :
2342 : CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_AO_ctr_1(i_xyz), &
2343 : 1.0_dp, t_2c_MO_AO_ctr(i_xyz), &
2344 : contract_1=[1, 2], notcontract_1=[3], &
2345 : contract_2=[1, 2], notcontract_2=[3], &
2346 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2347 : bounds_1=bounds_ctr_2d, &
2348 : bounds_2=bounds_ctr_1d, &
2349 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2350 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2351 : END DO
2352 120 : CALL dbt_batched_contract_finalize(t_2c_MO_AO_ctr(i_xyz))
2353 : END DO
2354 232 : CALL timestop(handle)
2355 :
2356 : END DO !j_mem
2357 22 : CALL dbt_batched_contract_finalize(t_3c_1)
2358 22 : CALL dbt_batched_contract_finalize(t_3c_work)
2359 22 : CALL dbt_batched_contract_finalize(t_3c_2)
2360 22 : CALL dbt_batched_contract_finalize(t_3c_3)
2361 22 : CALL dbt_batched_contract_finalize(t_3c_4)
2362 22 : CALL dbt_batched_contract_finalize(t_3c_5)
2363 :
2364 88 : DO i_xyz = 1, 3
2365 66 : CALL dbt_batched_contract_finalize(t_3c_der_RI_ctr_1(i_xyz))
2366 88 : CALL dbt_batched_contract_finalize(t_3c_der_AO_ctr_1(i_xyz))
2367 : END DO
2368 :
2369 62 : IF (.NOT. ri_data%same_op) THEN
2370 8 : CALL dbt_batched_contract_finalize(t_3c_6)
2371 : END IF
2372 :
2373 : END DO !i_mem
2374 18 : CALL dbt_batched_contract_finalize(t_3c_desymm)
2375 18 : CALL dbt_batched_contract_finalize(t_3c_0)
2376 :
2377 72 : DO i_xyz = 1, 3
2378 54 : CALL dbt_batched_contract_finalize(t_3c_der_AO(i_xyz))
2379 72 : CALL dbt_batched_contract_finalize(t_3c_der_RI(i_xyz))
2380 : END DO
2381 :
2382 : !Force contribution due to 3-center AO derivatives (a'b|P)
2383 18 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2384 72 : DO i_xyz = 1, 3
2385 54 : CALL dbt_copy(t_2c_MO_AO_ctr(i_xyz), t_2c_MO_AO(i_xyz), move_data=.TRUE.) !ensures matching distributions
2386 54 : CALL get_mo_ao_force(force, t_mo_cpy, t_2c_MO_AO(i_xyz), atom_of_kind, kind_of, idx_to_at_AO, pref, i_xyz)
2387 72 : CALL dbt_clear(t_2c_MO_AO(i_xyz))
2388 : END DO
2389 :
2390 : !Force contribution of d/dx (P|Q)
2391 18 : pref = 0.5_dp*hf_fraction*spin_fac
2392 18 : IF (.NOT. ri_data%same_op) pref = -pref
2393 :
2394 : !Making sure dists of the t_2c_RI tensors match
2395 18 : CALL dbt_copy(t_2c_RI_PQ, t_2c_RI, move_data=.TRUE.)
2396 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, &
2397 18 : kind_of, idx_to_at_RI, pref)
2398 18 : CALL dbt_clear(t_2c_RI)
2399 :
2400 : !Force contribution due to the inverse metric
2401 18 : IF (.NOT. ri_data%same_op) THEN
2402 4 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2403 :
2404 4 : CALL dbt_copy(t_2c_RI_met, t_2c_RI, move_data=.TRUE.)
2405 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, &
2406 4 : kind_of, idx_to_at_RI, pref)
2407 4 : CALL dbt_clear(t_2c_RI)
2408 : END IF
2409 :
2410 18 : CALL dbt_destroy(t_3c_0)
2411 18 : CALL dbt_destroy(t_3c_1)
2412 18 : CALL dbt_destroy(t_3c_2)
2413 18 : CALL dbt_destroy(t_3c_3)
2414 18 : CALL dbt_destroy(t_3c_4)
2415 18 : CALL dbt_destroy(t_3c_5)
2416 18 : CALL dbt_destroy(t_3c_6)
2417 18 : CALL dbt_destroy(t_3c_work)
2418 18 : CALL dbt_destroy(t_3c_RI_ctr)
2419 18 : CALL dbt_destroy(t_3c_mo_ri_ao)
2420 18 : CALL dbt_destroy(t_3c_mo_ri_mo)
2421 18 : CALL dbt_destroy(t_3c_ri_mo_mo)
2422 18 : CALL dbt_destroy(t_3c_ri_mo_mo_fit)
2423 18 : CALL dbt_destroy(t_mo_coeff)
2424 18 : CALL dbt_destroy(t_mo_cpy)
2425 72 : DO i_xyz = 1, 3
2426 54 : CALL dbt_destroy(t_2c_MO_AO(i_xyz))
2427 54 : CALL dbt_destroy(t_2c_MO_AO_ctr(i_xyz))
2428 54 : CALL dbt_destroy(t_3c_der_RI_ctr_1(i_xyz))
2429 54 : CALL dbt_destroy(t_3c_der_AO_ctr_1(i_xyz))
2430 72 : CALL dbt_destroy(t_3c_der_RI_ctr_2(i_xyz))
2431 : END DO
2432 86 : DEALLOCATE (batch_ranges, batch_start, batch_end)
2433 : END DO !ispin
2434 :
2435 : ! Clean-up
2436 14 : CALL dbt_pgrid_destroy(pgrid_1)
2437 14 : CALL dbt_pgrid_destroy(pgrid_2)
2438 14 : CALL dbt_destroy(t_3c_desymm)
2439 14 : CALL dbt_destroy(t_2c_RI)
2440 14 : CALL dbt_destroy(t_2c_RI_PQ)
2441 14 : IF (.NOT. ri_data%same_op) THEN
2442 4 : CALL dbt_destroy(t_2c_RI_met)
2443 4 : CALL dbt_destroy(t_2c_RI_inv)
2444 : END IF
2445 56 : DO i_xyz = 1, 3
2446 42 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2447 42 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2448 42 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
2449 56 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2450 : END DO
2451 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
2452 :
2453 14 : CALL para_env%sync()
2454 14 : t2 = m_walltime()
2455 :
2456 14 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2457 :
2458 476 : END SUBROUTINE hfx_ri_forces_mo
2459 :
2460 : ! **************************************************************************************************
2461 : !> \brief New sparser implementation
2462 : !> \param qs_env ...
2463 : !> \param ri_data ...
2464 : !> \param nspins ...
2465 : !> \param hf_fraction ...
2466 : !> \param rho_ao ...
2467 : !> \param rho_ao_resp ...
2468 : !> \param use_virial ...
2469 : !> \param resp_only ...
2470 : !> \param rescale_factor ...
2471 : ! **************************************************************************************************
2472 116 : SUBROUTINE hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
2473 : use_virial, resp_only, rescale_factor)
2474 :
2475 : TYPE(qs_environment_type), POINTER :: qs_env
2476 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2477 : INTEGER, INTENT(IN) :: nspins
2478 : REAL(dp), INTENT(IN) :: hf_fraction
2479 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
2480 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
2481 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
2482 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
2483 :
2484 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_Pmat'
2485 :
2486 : INTEGER :: dummy_int, handle, i_mem, i_spin, i_xyz, &
2487 : ibasis, j_mem, j_xyz, k_mem, k_xyz, &
2488 : n_mem, n_mem_RI, natom, nkind, &
2489 : unit_nr_dbcsr
2490 : INTEGER(int_8) :: nflop
2491 116 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_RI, batch_ranges, &
2492 116 : batch_ranges_RI, batch_start, batch_start_RI, dist1, dist2, dist3, idx_to_at_AO, &
2493 116 : idx_to_at_RI, kind_of
2494 : INTEGER, DIMENSION(2, 1) :: ibounds, jbounds, kbounds
2495 : INTEGER, DIMENSION(2, 2) :: ijbounds
2496 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
2497 232 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
2498 : LOGICAL :: do_resp, resp_only_prv, use_virial_prv
2499 : REAL(dp) :: pref, spin_fac, t1, t2
2500 : REAL(dp), DIMENSION(3, 3) :: work_virial
2501 116 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2502 116 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
2503 : TYPE(cell_type), POINTER :: cell
2504 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist
2505 : TYPE(dbcsr_type) :: dbcsr_tmp, virial_trace
2506 6728 : TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_RI, t_2c_RI_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
2507 7656 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
2508 5684 : t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_R, t_SVS
2509 116 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, &
2510 116 : t_3c_der_AO, t_3c_der_RI
2511 : TYPE(dft_control_type), POINTER :: dft_control
2512 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2513 116 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
2514 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
2515 : TYPE(hfx_compression_type), ALLOCATABLE, &
2516 116 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
2517 : TYPE(mp_para_env_type), POINTER :: para_env
2518 : TYPE(neighbor_list_3c_type) :: nl_3c
2519 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2520 116 : POINTER :: nl_2c_met, nl_2c_pot
2521 116 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2522 116 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2523 116 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2524 : TYPE(virial_type), POINTER :: virial
2525 :
2526 : !The idea is the following: we need to compute the gradients
2527 : ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
2528 : ! Which we do in a few steps:
2529 : ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
2530 : ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
2531 : ! For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
2532 : ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
2533 : ! R_PS = (acP) M_acS
2534 : ! 4) From there, we can easily calculate the 2c contributions to the force:
2535 : ! Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
2536 : ! Metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2537 :
2538 116 : NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
2539 116 : NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
2540 :
2541 116 : use_virial_prv = .FALSE.
2542 116 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
2543 :
2544 116 : do_resp = .FALSE.
2545 116 : IF (PRESENT(rho_ao_resp)) THEN
2546 30 : IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .TRUE.
2547 : END IF
2548 :
2549 116 : resp_only_prv = .FALSE.
2550 116 : IF (PRESENT(resp_only)) resp_only_prv = resp_only
2551 :
2552 116 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
2553 :
2554 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
2555 : atomic_kind_set=atomic_kind_set, virial=virial, &
2556 : cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
2557 116 : qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
2558 :
2559 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
2560 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
2561 116 : [1, 2], [3], name="(AO RI | AO)")
2562 116 : DEALLOCATE (dist1, dist2, dist3)
2563 :
2564 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
2565 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2566 116 : [1], [2, 3], name="(RI | AO AO)")
2567 116 : DEALLOCATE (dist1, dist2, dist3)
2568 :
2569 916 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
2570 116 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
2571 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
2572 116 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
2573 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
2574 :
2575 342 : DO ibasis = 1, SIZE(basis_set_AO)
2576 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
2577 226 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
2578 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
2579 342 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
2580 : END DO
2581 :
2582 : ! Precompute the derivatives
2583 5684 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_3c_der_AO(3), t_3c_der_RI(3))
2584 116 : IF (use_virial) THEN
2585 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2586 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2587 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
2588 : nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
2589 4 : nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
2590 :
2591 16 : ALLOCATE (col_bsize(natom), row_bsize(natom))
2592 16 : col_bsize(:) = ri_data%bsizes_RI
2593 16 : row_bsize(:) = ri_data%bsizes_RI
2594 4 : CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
2595 4 : CALL dbt_create(virial_trace, t_2c_virial)
2596 4 : DEALLOCATE (col_bsize, row_bsize)
2597 : ELSE
2598 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2599 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2600 112 : basis_set_AO, basis_set_RI, ri_data, qs_env)
2601 : END IF
2602 :
2603 : ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
2604 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
2605 464 : DO i_xyz = 1, 3
2606 1490 : DO i_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2607 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
2608 1026 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2609 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE., move_data=.TRUE.)
2610 :
2611 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
2612 1026 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2613 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE.)
2614 1374 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
2615 : END DO
2616 : END DO
2617 :
2618 464 : DO i_xyz = 1, 3
2619 348 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz))
2620 464 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz))
2621 : END DO
2622 :
2623 : ! Some utilities
2624 116 : spin_fac = 0.5_dp
2625 116 : IF (nspins == 2) spin_fac = 1.0_dp
2626 116 : IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
2627 :
2628 348 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
2629 116 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
2630 :
2631 348 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
2632 116 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2633 :
2634 116 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
2635 :
2636 : ! Go over batches of the 2 AO indices to save memory
2637 116 : n_mem = ri_data%n_mem
2638 464 : ALLOCATE (batch_start(n_mem), batch_end(n_mem))
2639 464 : batch_start(:) = ri_data%starts_array_mem(:)
2640 464 : batch_end(:) = ri_data%ends_array_mem(:)
2641 :
2642 348 : ALLOCATE (batch_ranges(n_mem + 1))
2643 464 : batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
2644 116 : batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
2645 :
2646 116 : n_mem_RI = ri_data%n_mem_RI
2647 464 : ALLOCATE (batch_start_RI(n_mem_RI), batch_end_RI(n_mem_RI))
2648 464 : batch_start_RI(:) = ri_data%starts_array_RI_mem(:)
2649 464 : batch_end_RI(:) = ri_data%ends_array_RI_mem(:)
2650 :
2651 348 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
2652 464 : batch_ranges_RI(:n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
2653 116 : batch_ranges_RI(n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(n_mem_RI) + 1
2654 :
2655 : ! Pre-create all the needed tensors
2656 : CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
2657 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2658 116 : name="(AO | AO)")
2659 116 : DEALLOCATE (dist1, dist2)
2660 116 : CALL dbt_create(rho_ao_1, rho_ao_2)
2661 :
2662 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
2663 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
2664 116 : DEALLOCATE (dist1, dist2)
2665 116 : CALL dbt_create(t_2c_RI, t_SVS)
2666 116 : CALL dbt_create(t_2c_RI, t_R)
2667 116 : CALL dbt_create(t_2c_RI, t_2c_RI_tmp)
2668 :
2669 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
2670 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
2671 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
2672 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
2673 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
2674 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
2675 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
2676 :
2677 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
2678 116 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
2679 :
2680 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
2681 :
2682 116 : CALL para_env%sync()
2683 116 : t1 = m_walltime()
2684 :
2685 : !Pre-calculate the necessary 2-center quantities
2686 116 : IF (.NOT. ri_data%same_op) THEN
2687 : !S^-1 * V * S^-1
2688 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI, &
2689 : contract_1=[2], notcontract_1=[1], &
2690 : contract_2=[1], notcontract_2=[2], &
2691 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2692 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2693 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2694 :
2695 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_inv(1, 1), 0.0_dp, t_SVS, &
2696 : contract_1=[2], notcontract_1=[1], &
2697 : contract_2=[1], notcontract_2=[2], &
2698 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2699 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2700 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2701 : ELSE
2702 : ! Simply V^-1
2703 88 : CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_SVS)
2704 : END IF
2705 :
2706 116 : CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2707 : CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_RI, &
2708 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2709 116 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2710 116 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2711 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2712 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2713 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI, &
2714 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2715 : CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_RI, &
2716 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2717 : CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_RI, &
2718 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2719 :
2720 242 : DO i_spin = 1, nspins
2721 :
2722 : !Prepare Pmat in tensor format
2723 126 : CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2724 126 : CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2725 126 : CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.TRUE.)
2726 126 : CALL dbt_destroy(t_2c_tmp)
2727 :
2728 126 : IF (.NOT. do_resp) THEN
2729 94 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2730 32 : ELSE IF (do_resp .AND. resp_only_prv) THEN
2731 :
2732 24 : CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2733 24 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2734 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2)
2735 : !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
2736 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2737 24 : CALL dbt_destroy(t_2c_tmp)
2738 : ELSE
2739 :
2740 : !if not resp_only, need P-P_resp and P+P_resp
2741 8 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2742 8 : CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
2743 8 : CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
2744 8 : CALL dbt_create(dbcsr_tmp, t_2c_tmp)
2745 8 : CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
2746 8 : CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.TRUE., move_data=.TRUE.)
2747 8 : CALL dbcsr_release(dbcsr_tmp)
2748 :
2749 8 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2750 8 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2751 8 : CALL dbt_destroy(t_2c_tmp)
2752 :
2753 : END IF
2754 126 : work_virial = 0.0_dp
2755 :
2756 126 : CALL timeset(routineN//"_3c", handle)
2757 : !Start looping of the batches
2758 504 : DO i_mem = 1, n_mem
2759 1134 : ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2760 :
2761 378 : CALL dbt_batched_contract_init(rho_ao_1)
2762 : CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
2763 : contract_1=[1], notcontract_1=[2], &
2764 : contract_2=[3], notcontract_2=[1, 2], &
2765 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2766 378 : bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2767 378 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2768 378 : CALL dbt_batched_contract_finalize(rho_ao_1)
2769 :
2770 378 : CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.TRUE.)
2771 :
2772 1512 : DO j_mem = 1, n_mem
2773 3402 : jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
2774 :
2775 1134 : CALL dbt_batched_contract_init(rho_ao_2)
2776 : CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
2777 : contract_1=[1], notcontract_1=[2], &
2778 : contract_2=[3], notcontract_2=[1, 2], &
2779 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2780 1134 : bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2781 1134 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2782 1134 : CALL dbt_batched_contract_finalize(rho_ao_2)
2783 :
2784 3402 : bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2785 6840 : bounds_cpy(:, 2) = [1, SUM(ri_data%bsizes_RI)]
2786 3402 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2787 1134 : CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
2788 1134 : CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2789 :
2790 4914 : DO k_mem = 1, n_mem_RI
2791 10206 : kbounds(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2792 :
2793 10206 : bounds_cpy(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2794 10206 : bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
2795 10206 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2796 3402 : CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
2797 :
2798 : !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
2799 3402 : CALL dbt_batched_contract_init(t_SVS)
2800 : CALL dbt_contract(1.0_dp, t_SVS, t_3c_3, 0.0_dp, t_3c_4, &
2801 : contract_1=[2], notcontract_1=[1], &
2802 : contract_2=[1], notcontract_2=[2, 3], &
2803 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2804 3402 : retain_sparsity=.TRUE., unit_nr=unit_nr_dbcsr, flop=nflop)
2805 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2806 3402 : CALL dbt_batched_contract_finalize(t_SVS)
2807 :
2808 3402 : CALL dbt_copy(t_3c_4, t_3c_5, summation=.TRUE., move_data=.TRUE.)
2809 :
2810 10206 : ijbounds(:, 1) = ibounds(:, 1)
2811 10206 : ijbounds(:, 2) = jbounds(:, 1)
2812 :
2813 : !Contract R_PS = (acP) M_acS
2814 3402 : CALL dbt_batched_contract_init(t_R)
2815 : CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_R, &
2816 : contract_1=[2, 3], notcontract_1=[1], &
2817 : contract_2=[2, 3], notcontract_2=[1], &
2818 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2819 : bounds_1=ijbounds, bounds_3=kbounds, &
2820 3402 : unit_nr=unit_nr_dbcsr, flop=nflop)
2821 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2822 4536 : CALL dbt_batched_contract_finalize(t_R)
2823 :
2824 : END DO !k_mem
2825 : END DO !j_mem
2826 :
2827 378 : CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.TRUE.)
2828 :
2829 : !The force from the 3c derivatives
2830 378 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2831 :
2832 1476 : DO k_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2833 4392 : DO i_xyz = 1, 3
2834 3294 : CALL dbt_clear(t_3c_der_RI(i_xyz))
2835 : CALL decompress_tensor(t_3c_der_RI(i_xyz), t_3c_der_RI_ind(k_mem, i_xyz)%ind, &
2836 4392 : t_3c_der_RI_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2837 : END DO
2838 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_RI, atom_of_kind, kind_of, &
2839 1476 : idx_to_at_RI, pref)
2840 : END DO
2841 :
2842 378 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2843 378 : IF (do_resp) THEN
2844 96 : pref = 0.5_dp*pref
2845 96 : CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
2846 : END IF
2847 :
2848 1476 : DO k_mem = 1, SIZE(t_3c_der_AO_comp, 1)
2849 4392 : DO i_xyz = 1, 3
2850 3294 : CALL dbt_clear(t_3c_der_AO(i_xyz))
2851 : CALL decompress_tensor(t_3c_der_AO(i_xyz), t_3c_der_AO_ind(k_mem, i_xyz)%ind, &
2852 4392 : t_3c_der_AO_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2853 : END DO
2854 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_AO, atom_of_kind, kind_of, &
2855 1098 : idx_to_at_AO, pref, deriv_dim=2)
2856 :
2857 1476 : IF (do_resp) THEN
2858 : CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_AO, atom_of_kind, kind_of, &
2859 276 : idx_to_at_AO, pref, deriv_dim=2)
2860 : END IF
2861 : END DO
2862 :
2863 : !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
2864 378 : IF (use_virial) THEN
2865 12 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2866 12 : CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.TRUE.)
2867 : CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_RI, &
2868 : basis_set_AO, basis_set_AO, ri_data%ri_metric, &
2869 12 : der_eps=ri_data%eps_schwarz_forces, op_pos=1)
2870 :
2871 12 : CALL dbt_clear(t_3c_virial)
2872 : END IF
2873 :
2874 378 : CALL dbt_clear(t_3c_help_1)
2875 504 : CALL dbt_clear(t_3c_help_2)
2876 : END DO !i_mem
2877 126 : CALL timestop(handle)
2878 :
2879 126 : CALL timeset(routineN//"_2c", handle)
2880 : !Now we deal with all the 2-center quantities
2881 : !Calculate S^-1 * R * S^-1
2882 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_R, 0.0_dp, t_2c_RI_tmp, &
2883 : contract_1=[2], notcontract_1=[1], &
2884 : contract_2=[1], notcontract_2=[2], &
2885 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2886 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2887 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2888 :
2889 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2890 : contract_1=[2], notcontract_1=[1], &
2891 : contract_2=[1], notcontract_2=[2], &
2892 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2893 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2894 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2895 :
2896 : !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
2897 126 : pref = 0.5_dp*hf_fraction*spin_fac
2898 126 : IF (.NOT. ri_data%same_op) pref = -pref
2899 126 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, kind_of, idx_to_at_RI, pref)
2900 :
2901 : !Calculate the contribution to the virial on the fly
2902 126 : IF (use_virial_prv) THEN
2903 4 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2904 4 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2905 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
2906 4 : basis_set_RI, basis_set_RI, ri_data%hfx_pot)
2907 : END IF
2908 :
2909 : !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2910 126 : IF (.NOT. ri_data%same_op) THEN
2911 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI_tmp, &
2912 : contract_1=[2], notcontract_1=[1], &
2913 : contract_2=[1], notcontract_2=[2], &
2914 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2915 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2916 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2917 :
2918 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2919 : contract_1=[2], notcontract_1=[1], &
2920 : contract_2=[1], notcontract_2=[2], &
2921 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2922 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2923 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2924 :
2925 28 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2926 28 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_RI, pref)
2927 :
2928 28 : IF (use_virial_prv) THEN
2929 0 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2930 0 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2931 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
2932 0 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
2933 : END IF
2934 : END IF
2935 126 : CALL dbt_clear(t_2c_RI)
2936 126 : CALL dbt_clear(t_2c_RI_tmp)
2937 126 : CALL dbt_clear(t_R)
2938 126 : CALL dbt_clear(t_3c_help_1)
2939 126 : CALL dbt_clear(t_3c_help_2)
2940 126 : CALL timestop(handle)
2941 :
2942 494 : IF (use_virial_prv) THEN
2943 16 : DO k_xyz = 1, 3
2944 52 : DO j_xyz = 1, 3
2945 156 : DO i_xyz = 1, 3
2946 : virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
2947 144 : + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
2948 : END DO
2949 : END DO
2950 : END DO
2951 : END IF
2952 : END DO !i_spin
2953 :
2954 116 : CALL dbt_batched_contract_finalize(t_3c_int)
2955 116 : CALL dbt_batched_contract_finalize(t_3c_int_2)
2956 116 : CALL dbt_batched_contract_finalize(t_3c_1)
2957 116 : CALL dbt_batched_contract_finalize(t_3c_2)
2958 116 : CALL dbt_batched_contract_finalize(t_3c_3)
2959 116 : CALL dbt_batched_contract_finalize(t_3c_4)
2960 116 : CALL dbt_batched_contract_finalize(t_3c_5)
2961 116 : CALL dbt_batched_contract_finalize(t_3c_sparse)
2962 :
2963 116 : CALL para_env%sync()
2964 116 : t2 = m_walltime()
2965 :
2966 116 : CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.TRUE.)
2967 :
2968 : !clean-up
2969 116 : CALL dbt_destroy(rho_ao_1)
2970 116 : CALL dbt_destroy(rho_ao_2)
2971 116 : CALL dbt_destroy(t_3c_ao_ri_ao)
2972 116 : CALL dbt_destroy(t_3c_ri_ao_ao)
2973 116 : CALL dbt_destroy(t_3c_int)
2974 116 : CALL dbt_destroy(t_3c_int_2)
2975 116 : CALL dbt_destroy(t_3c_1)
2976 116 : CALL dbt_destroy(t_3c_2)
2977 116 : CALL dbt_destroy(t_3c_3)
2978 116 : CALL dbt_destroy(t_3c_4)
2979 116 : CALL dbt_destroy(t_3c_5)
2980 116 : CALL dbt_destroy(t_3c_help_1)
2981 116 : CALL dbt_destroy(t_3c_help_2)
2982 116 : CALL dbt_destroy(t_3c_sparse)
2983 116 : CALL dbt_destroy(t_SVS)
2984 116 : CALL dbt_destroy(t_R)
2985 116 : CALL dbt_destroy(t_2c_RI)
2986 116 : CALL dbt_destroy(t_2c_RI_tmp)
2987 :
2988 464 : DO i_xyz = 1, 3
2989 348 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2990 348 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2991 348 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
2992 464 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2993 : END DO
2994 :
2995 464 : DO i_xyz = 1, 3
2996 1490 : DO i_mem = 1, SIZE(t_3c_der_AO_comp, 1)
2997 1026 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
2998 1374 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
2999 : END DO
3000 : END DO
3001 2168 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
3002 :
3003 342 : DO ibasis = 1, SIZE(basis_set_AO)
3004 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3005 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3006 226 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3007 342 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3008 : END DO
3009 :
3010 116 : IF (use_virial) THEN
3011 4 : CALL release_neighbor_list_sets(nl_2c_met)
3012 4 : CALL release_neighbor_list_sets(nl_2c_pot)
3013 4 : CALL neighbor_list_3c_destroy(nl_3c)
3014 4 : CALL dbcsr_release(virial_trace)
3015 4 : CALL dbt_destroy(t_2c_virial)
3016 4 : CALL dbt_destroy(t_3c_virial)
3017 : END IF
3018 :
3019 2552 : END SUBROUTINE hfx_ri_forces_Pmat
3020 :
3021 : ! **************************************************************************************************
3022 : !> \brief the general routine that calls the relevant force code
3023 : !> \param qs_env ...
3024 : !> \param ri_data ...
3025 : !> \param nspins ...
3026 : !> \param hf_fraction ...
3027 : !> \param rho_ao ...
3028 : !> \param rho_ao_resp ...
3029 : !> \param mos ...
3030 : !> \param use_virial ...
3031 : !> \param resp_only ...
3032 : !> \param rescale_factor ...
3033 : ! **************************************************************************************************
3034 130 : SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
3035 130 : mos, use_virial, resp_only, rescale_factor)
3036 :
3037 : TYPE(qs_environment_type), POINTER :: qs_env
3038 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3039 : INTEGER, INTENT(IN) :: nspins
3040 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
3041 : TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL :: rho_ao
3042 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
3043 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
3044 : OPTIONAL :: mos
3045 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
3046 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
3047 :
3048 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces'
3049 :
3050 : INTEGER :: handle, ispin
3051 : INTEGER, DIMENSION(2) :: homo
3052 130 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
3053 : TYPE(cp_fm_type), POINTER :: mo_coeff
3054 390 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
3055 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
3056 :
3057 130 : CALL timeset(routineN, handle)
3058 :
3059 144 : SELECT CASE (ri_data%flavor)
3060 : CASE (ri_mo)
3061 :
3062 32 : DO ispin = 1, nspins
3063 18 : NULLIFY (mo_coeff_b_tmp)
3064 18 : CPASSERT(mos(ispin)%uniform_occupation)
3065 18 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
3066 :
3067 18 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
3068 32 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
3069 : END DO
3070 :
3071 32 : DO ispin = 1, nspins
3072 18 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
3073 14 : homo(ispin) = mos(ispin)%homo
3074 : END DO
3075 :
3076 14 : CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
3077 :
3078 : CASE (ri_pmat)
3079 :
3080 : CALL hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
3081 216 : resp_only, rescale_factor)
3082 : END SELECT
3083 :
3084 274 : DO ispin = 1, nspins
3085 274 : CALL dbcsr_release(mo_coeff_b(ispin))
3086 : END DO
3087 :
3088 130 : CALL timestop(handle)
3089 :
3090 130 : END SUBROUTINE hfx_ri_update_forces
3091 :
3092 : ! **************************************************************************************************
3093 : !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
3094 : !> \param t_3c_der_RI_comp compressed RI derivatives
3095 : !> \param t_3c_der_AO_comp compressed AO derivatives
3096 : !> \param t_3c_der_RI_ind ...
3097 : !> \param t_3c_der_AO_ind ...
3098 : !> \param t_2c_der_RI format based on standard atomic block sizes
3099 : !> \param t_2c_der_metric format based on standard atomic block sizes
3100 : !> \param ri_ao_ao_template ...
3101 : !> \param basis_set_AO ...
3102 : !> \param basis_set_RI ...
3103 : !> \param ri_data ...
3104 : !> \param qs_env ...
3105 : !> \param nl_2c_pot ...
3106 : !> \param nl_2c_met ...
3107 : !> \param nl_3c_out ...
3108 : !> \param t_3c_virial ...
3109 : ! **************************************************************************************************
3110 4160 : SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
3111 : t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
3112 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
3113 : nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
3114 :
3115 : TYPE(hfx_compression_type), ALLOCATABLE, &
3116 : DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_RI_comp, t_3c_der_AO_comp
3117 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_RI_ind, t_3c_der_AO_ind
3118 : TYPE(dbt_type), DIMENSION(3), INTENT(OUT) :: t_2c_der_RI, t_2c_der_metric
3119 : TYPE(dbt_type), INTENT(INOUT) :: ri_ao_ao_template
3120 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3121 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3122 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3123 : TYPE(qs_environment_type), POINTER :: qs_env
3124 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3125 : OPTIONAL, POINTER :: nl_2c_pot, nl_2c_met
3126 : TYPE(neighbor_list_3c_type), OPTIONAL :: nl_3c_out
3127 : TYPE(dbt_type), INTENT(INOUT), OPTIONAL :: t_3c_virial
3128 :
3129 : CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
3130 :
3131 : INTEGER :: handle, i_mem, i_xyz, n_mem, natom, &
3132 : nkind, nthreads
3133 : INTEGER(int_8) :: nze, nze_tot
3134 130 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist_AO_1, dist_AO_2, &
3135 130 : dist_RI, dummy_end, dummy_start, &
3136 260 : end_blocks, start_blocks
3137 : INTEGER, DIMENSION(3) :: pcoord, pdims
3138 260 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3139 : REAL(dp) :: compression_factor, memory, occ
3140 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3141 1690 : TYPE(dbcsr_type), DIMENSION(1, 3) :: t_2c_der_metric_prv, t_2c_der_RI_prv
3142 390 : TYPE(dbt_pgrid_type) :: pgrid
3143 3380 : TYPE(dbt_type) :: t_2c_template, t_2c_tmp, t_3c_template
3144 130 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_AO_prv, t_3c_der_RI_prv
3145 : TYPE(dft_control_type), POINTER :: dft_control
3146 : TYPE(distribution_2d_type), POINTER :: dist_2d
3147 : TYPE(distribution_3d_type) :: dist_3d, dist_3d_out
3148 130 : TYPE(mp_cart_type) :: mp_comm_t3c, mp_comm_t3c_out
3149 : TYPE(mp_para_env_type), POINTER :: para_env
3150 : TYPE(neighbor_list_3c_type) :: nl_3c
3151 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3152 130 : POINTER :: nl_2c
3153 130 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3154 130 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3155 :
3156 130 : NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
3157 :
3158 130 : CALL timeset(routineN, handle)
3159 :
3160 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
3161 130 : particle_set=particle_set, dft_control=dft_control, para_env=para_env)
3162 :
3163 : !TODO: is such a pgrid correct?
3164 : !Dealing with the 3c derivatives
3165 130 : nthreads = 1
3166 130 : !$ nthreads = omp_get_num_threads()
3167 130 : pdims = 0
3168 520 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
3169 :
3170 : CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, pgrid, &
3171 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
3172 : map1=[1], map2=[2, 3], &
3173 130 : name="der (RI AO | AO)")
3174 :
3175 4550 : ALLOCATE (t_3c_der_AO_prv(1, 1, 3), t_3c_der_RI_prv(1, 1, 3))
3176 520 : DO i_xyz = 1, 3
3177 390 : CALL dbt_create(t_3c_template, t_3c_der_RI_prv(1, 1, i_xyz))
3178 520 : CALL dbt_create(t_3c_template, t_3c_der_AO_prv(1, 1, i_xyz))
3179 : END DO
3180 130 : IF (PRESENT(t_3c_virial)) THEN
3181 4 : CALL dbt_create(t_3c_template, t_3c_virial)
3182 : END IF
3183 130 : CALL dbt_destroy(t_3c_template)
3184 :
3185 130 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3186 130 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
3187 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
3188 130 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
3189 :
3190 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
3191 130 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
3192 :
3193 130 : IF (PRESENT(nl_3c_out)) THEN
3194 4 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3195 4 : CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
3196 : CALL distribution_3d_create(dist_3d_out, dist_RI, dist_AO_1, dist_AO_2, &
3197 4 : nkind, particle_set, mp_comm_t3c_out, own_comm=.TRUE.)
3198 : CALL build_3c_neighbor_lists(nl_3c_out, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d_out, &
3199 : ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.FALSE., &
3200 4 : own_dist=.TRUE.)
3201 : END IF
3202 130 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
3203 130 : CALL dbt_pgrid_destroy(pgrid)
3204 :
3205 130 : n_mem = ri_data%n_mem
3206 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
3207 130 : start_blocks, end_blocks)
3208 130 : DEALLOCATE (dummy_start, dummy_end)
3209 :
3210 553008 : ALLOCATE (t_3c_der_AO_comp(n_mem, 3), t_3c_der_RI_comp(n_mem, 3))
3211 3628 : ALLOCATE (t_3c_der_AO_ind(n_mem, 3), t_3c_der_RI_ind(n_mem, 3))
3212 :
3213 130 : memory = 0.0_dp
3214 130 : nze_tot = 0
3215 518 : DO i_mem = 1, n_mem
3216 : CALL build_3c_derivatives(t_3c_der_RI_prv, t_3c_der_AO_prv, ri_data%filter_eps, qs_env, &
3217 : nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, &
3218 : ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
3219 1164 : bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
3220 :
3221 1682 : DO i_xyz = 1, 3
3222 1164 : CALL dbt_copy(t_3c_der_RI_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.TRUE.)
3223 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3224 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3225 1164 : nze_tot = nze_tot + nze
3226 :
3227 1164 : CALL alloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), 1)
3228 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
3229 1164 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3230 1164 : CALL dbt_clear(ri_ao_ao_template)
3231 :
3232 : !put AO derivative as middle index
3233 1164 : CALL dbt_copy(t_3c_der_AO_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.TRUE.)
3234 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3235 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3236 1164 : nze_tot = nze_tot + nze
3237 :
3238 1164 : CALL alloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), 1)
3239 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
3240 1164 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3241 3880 : CALL dbt_clear(ri_ao_ao_template)
3242 : END DO
3243 : END DO
3244 :
3245 130 : CALL neighbor_list_3c_destroy(nl_3c)
3246 520 : DO i_xyz = 1, 3
3247 390 : CALL dbt_destroy(t_3c_der_RI_prv(1, 1, i_xyz))
3248 520 : CALL dbt_destroy(t_3c_der_AO_prv(1, 1, i_xyz))
3249 : END DO
3250 :
3251 130 : CALL para_env%sum(memory)
3252 130 : compression_factor = REAL(nze_tot, dp)*1.0E-06*8.0_dp/memory
3253 130 : IF (ri_data%unit_nr > 0) THEN
3254 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
3255 14 : "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
3256 :
3257 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T60,F21.2))") &
3258 14 : "MEMORY_INFO| Compression factor: ", compression_factor
3259 : END IF
3260 :
3261 : !Deal with the 2-center derivatives
3262 130 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3263 390 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3264 260 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3265 530 : row_bsize(:) = ri_data%bsizes_RI
3266 530 : col_bsize(:) = ri_data%bsizes_RI
3267 :
3268 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3269 130 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3270 :
3271 520 : DO i_xyz = 1, 3
3272 : CALL dbcsr_create(t_2c_der_RI_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3273 520 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3274 : END DO
3275 :
3276 : CALL build_2c_derivatives(t_2c_der_RI_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3277 130 : basis_set_RI, ri_data%hfx_pot)
3278 130 : CALL release_neighbor_list_sets(nl_2c)
3279 :
3280 130 : IF (PRESENT(nl_2c_pot)) THEN
3281 4 : NULLIFY (nl_2c_pot)
3282 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3283 4 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3284 : END IF
3285 :
3286 : !copy 2c derivative tensor into the standard format
3287 : CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
3288 130 : ri_data%bsizes_RI_split, name='(RI| RI)')
3289 130 : DEALLOCATE (dist1, dist2)
3290 :
3291 520 : DO i_xyz = 1, 3
3292 390 : CALL dbt_create(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3293 390 : CALL dbt_copy_matrix_to_tensor(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3294 :
3295 390 : CALL dbt_create(t_2c_template, t_2c_der_RI(i_xyz))
3296 390 : CALL dbt_copy(t_2c_tmp, t_2c_der_RI(i_xyz), move_data=.TRUE.)
3297 :
3298 390 : CALL dbt_destroy(t_2c_tmp)
3299 520 : CALL dbcsr_release(t_2c_der_RI_prv(1, i_xyz))
3300 : END DO
3301 :
3302 : !Repeat with the metric, if required
3303 130 : IF (.NOT. ri_data%same_op) THEN
3304 :
3305 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3306 32 : "HFX_2c_nl_RI", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3307 :
3308 128 : DO i_xyz = 1, 3
3309 : CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3310 128 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3311 : END DO
3312 :
3313 : CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
3314 32 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
3315 32 : CALL release_neighbor_list_sets(nl_2c)
3316 :
3317 32 : IF (PRESENT(nl_2c_met)) THEN
3318 0 : NULLIFY (nl_2c_met)
3319 : CALL build_2c_neighbor_lists(nl_2c_met, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3320 0 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3321 : END IF
3322 :
3323 128 : DO i_xyz = 1, 3
3324 96 : CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3325 96 : CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3326 :
3327 96 : CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
3328 96 : CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.TRUE.)
3329 :
3330 96 : CALL dbt_destroy(t_2c_tmp)
3331 128 : CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
3332 : END DO
3333 :
3334 : END IF
3335 :
3336 130 : CALL dbt_destroy(t_2c_template)
3337 130 : CALL dbcsr_distribution_release(dbcsr_dist)
3338 130 : DEALLOCATE (row_bsize, col_bsize)
3339 :
3340 130 : CALL timestop(handle)
3341 :
3342 1690 : END SUBROUTINE precalc_derivatives
3343 :
3344 : ! **************************************************************************************************
3345 : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
3346 : !> force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
3347 : !> the atom on which the force acts
3348 : !> \param force ...
3349 : !> \param t_3c_contr ...
3350 : !> \param t_3c_der ...
3351 : !> \param atom_of_kind ...
3352 : !> \param kind_of ...
3353 : !> \param idx_to_at ...
3354 : !> \param pref ...
3355 : !> \param do_mp2 ...
3356 : !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
3357 : ! **************************************************************************************************
3358 3862 : SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
3359 : pref, do_mp2, deriv_dim)
3360 :
3361 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3362 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
3363 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der
3364 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3365 : REAL(dp), INTENT(IN) :: pref
3366 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2
3367 : INTEGER, INTENT(IN), OPTIONAL :: deriv_dim
3368 :
3369 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
3370 :
3371 : INTEGER :: deriv_dim_prv, handle, i_xyz, iat, &
3372 : iat_of_kind, ikind, j_xyz
3373 : INTEGER, DIMENSION(3) :: ind
3374 : LOGICAL :: do_mp2_prv, found
3375 : REAL(dp) :: new_force
3376 3862 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk
3377 : REAL(dp), DIMENSION(3) :: scoord
3378 : TYPE(dbt_iterator_type) :: iter
3379 :
3380 3862 : CALL timeset(routineN, handle)
3381 :
3382 3862 : do_mp2_prv = .FALSE.
3383 3862 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3384 :
3385 3862 : deriv_dim_prv = 1
3386 3862 : IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
3387 :
3388 : !$OMP PARALLEL DEFAULT(NONE) &
3389 : !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3390 3862 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
3391 : DO i_xyz = 1, 3
3392 : CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
3393 : DO WHILE (dbt_iterator_blocks_left(iter))
3394 : CALL dbt_iterator_next_block(iter, ind)
3395 :
3396 : CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
3397 : CPASSERT(found)
3398 : CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
3399 :
3400 : IF (found) THEN
3401 :
3402 : !take the trace of the blocks
3403 : new_force = pref*SUM(der_blk(:, :, :)*contr_blk(:, :, :))
3404 :
3405 : !the first index of the derivative tensor defines the atom
3406 : iat = idx_to_at(ind(deriv_dim_prv))
3407 : iat_of_kind = atom_of_kind(iat)
3408 : ikind = kind_of(iat)
3409 :
3410 : IF (.NOT. do_mp2_prv) THEN
3411 : !$OMP ATOMIC
3412 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3413 : + new_force
3414 : ELSE
3415 : !$OMP ATOMIC
3416 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3417 : + new_force
3418 : END IF
3419 :
3420 : DEALLOCATE (contr_blk)
3421 : END IF
3422 : DEALLOCATE (der_blk)
3423 : END DO !iter
3424 : CALL dbt_iterator_stop(iter)
3425 : END DO
3426 : !$OMP END PARALLEL
3427 3862 : CALL timestop(handle)
3428 :
3429 7724 : END SUBROUTINE get_force_from_3c_trace
3430 :
3431 : ! **************************************************************************************************
3432 : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
3433 : !> \param force ...
3434 : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
3435 : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
3436 : !> \param atom_of_kind ...
3437 : !> \param kind_of ...
3438 : !> \param idx_to_at ...
3439 : !> \param pref ...
3440 : !> \param do_mp2 ...
3441 : !> \param do_ovlp ...
3442 : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
3443 : ! **************************************************************************************************
3444 610 : SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
3445 : pref, do_mp2, do_ovlp)
3446 :
3447 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3448 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
3449 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_2c_der
3450 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3451 : REAL(dp), INTENT(IN) :: pref
3452 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2, do_ovlp
3453 :
3454 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_2c_der_force'
3455 :
3456 : INTEGER :: handle, i_xyz, iat, iat_of_kind, ikind, &
3457 : j_xyz, jat, jat_of_kind, jkind
3458 : INTEGER, DIMENSION(2) :: ind
3459 : LOGICAL :: do_mp2_prv, do_ovlp_prv, found
3460 : REAL(dp) :: new_force
3461 610 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
3462 : REAL(dp), DIMENSION(3) :: scoord
3463 : TYPE(dbt_iterator_type) :: iter
3464 :
3465 : !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
3466 : !update the relevant force
3467 :
3468 610 : CALL timeset(routineN, handle)
3469 :
3470 610 : do_mp2_prv = .FALSE.
3471 610 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3472 :
3473 610 : do_ovlp_prv = .FALSE.
3474 610 : IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
3475 :
3476 : !$OMP PARALLEL DEFAULT(NONE) &
3477 : !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3478 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
3479 610 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
3480 : DO i_xyz = 1, 3
3481 : CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
3482 : DO WHILE (dbt_iterator_blocks_left(iter))
3483 : CALL dbt_iterator_next_block(iter, ind)
3484 :
3485 : IF (ind(1) == ind(2)) CYCLE
3486 :
3487 : CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
3488 : CPASSERT(found)
3489 : CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
3490 :
3491 : IF (found) THEN
3492 :
3493 : !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
3494 : !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
3495 : new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
3496 :
3497 : iat = idx_to_at(ind(1))
3498 : iat_of_kind = atom_of_kind(iat)
3499 : ikind = kind_of(iat)
3500 :
3501 : IF (do_mp2_prv) THEN
3502 : !$OMP ATOMIC
3503 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3504 : + new_force
3505 : ELSE IF (do_ovlp_prv) THEN
3506 : !$OMP ATOMIC
3507 : force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
3508 : + new_force
3509 : ELSE
3510 : !$OMP ATOMIC
3511 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3512 : + new_force
3513 : END IF
3514 :
3515 : jat = idx_to_at(ind(2))
3516 : jat_of_kind = atom_of_kind(jat)
3517 : jkind = kind_of(jat)
3518 :
3519 : IF (do_mp2_prv) THEN
3520 : !$OMP ATOMIC
3521 : force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
3522 : - new_force
3523 : ELSE IF (do_ovlp_prv) THEN
3524 : !$OMP ATOMIC
3525 : force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
3526 : - new_force
3527 : ELSE
3528 : !$OMP ATOMIC
3529 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
3530 : - new_force
3531 : END IF
3532 :
3533 : DEALLOCATE (contr_blk)
3534 : END IF
3535 :
3536 : DEALLOCATE (der_blk)
3537 : END DO !iter
3538 : CALL dbt_iterator_stop(iter)
3539 :
3540 : END DO !i_xyz
3541 : !$OMP END PARALLEL
3542 610 : CALL timestop(handle)
3543 :
3544 1220 : END SUBROUTINE get_2c_der_force
3545 :
3546 : ! **************************************************************************************************
3547 : !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
3548 : !> and a is a MO
3549 : !> \param force ...
3550 : !> \param t_mo_coeff ...
3551 : !> \param t_2c_MO_AO ...
3552 : !> \param atom_of_kind ...
3553 : !> \param kind_of ...
3554 : !> \param idx_to_at ...
3555 : !> \param pref ...
3556 : !> \param i_xyz ...
3557 : ! **************************************************************************************************
3558 54 : SUBROUTINE get_MO_AO_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
3559 :
3560 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3561 : TYPE(dbt_type), INTENT(INOUT) :: t_mo_coeff, t_2c_MO_AO
3562 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3563 : REAL(dp), INTENT(IN) :: pref
3564 : INTEGER, INTENT(IN) :: i_xyz
3565 :
3566 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_MO_AO_force'
3567 :
3568 : INTEGER :: handle, iat, iat_of_kind, ikind, j_xyz
3569 : INTEGER, DIMENSION(2) :: ind
3570 : LOGICAL :: found
3571 : REAL(dp) :: new_force
3572 54 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: mo_ao_blk, mo_coeff_blk
3573 : REAL(dp), DIMENSION(3) :: scoord
3574 : TYPE(dbt_iterator_type) :: iter
3575 :
3576 54 : CALL timeset(routineN, handle)
3577 :
3578 : !$OMP PARALLEL DEFAULT(NONE) &
3579 : !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
3580 54 : !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
3581 : CALL dbt_iterator_start(iter, t_2c_MO_AO)
3582 : DO WHILE (dbt_iterator_blocks_left(iter))
3583 : CALL dbt_iterator_next_block(iter, ind)
3584 :
3585 : CALL dbt_get_block(t_2c_MO_AO, ind, mo_ao_blk, found)
3586 : CPASSERT(found)
3587 : CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
3588 :
3589 : IF (found) THEN
3590 :
3591 : new_force = pref*SUM(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
3592 :
3593 : iat = idx_to_at(ind(2)) !AO index is column index
3594 : iat_of_kind = atom_of_kind(iat)
3595 : ikind = kind_of(iat)
3596 :
3597 : !$OMP ATOMIC
3598 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3599 : + new_force
3600 :
3601 : DEALLOCATE (mo_coeff_blk)
3602 : END IF
3603 :
3604 : DEALLOCATE (mo_ao_blk)
3605 : END DO !iter
3606 : CALL dbt_iterator_stop(iter)
3607 : !$OMP END PARALLEL
3608 :
3609 54 : CALL timestop(handle)
3610 :
3611 108 : END SUBROUTINE get_MO_AO_force
3612 :
3613 : ! **************************************************************************************************
3614 : !> \brief Print RI-HFX quantities, as required by the PRINT subsection
3615 : !> \param ri_data ...
3616 : !> \param qs_env ...
3617 : ! **************************************************************************************************
3618 148 : SUBROUTINE print_ri_hfx(ri_data, qs_env)
3619 :
3620 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3621 : TYPE(qs_environment_type), POINTER :: qs_env
3622 :
3623 : INTEGER :: i_RI, ibasis, nkind, nspins, unit_nr
3624 296 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3625 : LOGICAL :: mult_by_S, print_density, print_ri_metric
3626 148 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs, density_coeffs_2
3627 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3628 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
3629 : TYPE(cp_fm_type) :: matrix_s_fm
3630 : TYPE(cp_logger_type), POINTER :: logger
3631 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3632 148 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
3633 296 : TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3634 : TYPE(dft_control_type), POINTER :: dft_control
3635 : TYPE(distribution_2d_type), POINTER :: dist_2d
3636 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3637 148 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3638 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
3639 : TYPE(mp_para_env_type), POINTER :: para_env
3640 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3641 148 : POINTER :: nl_2c
3642 148 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3643 148 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3644 : TYPE(qs_rho_type), POINTER :: rho
3645 : TYPE(section_vals_type), POINTER :: input, print_section
3646 :
3647 148 : NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, nl_2c, &
3648 148 : dist_2d, col_bsize, row_bsize, para_env, blacs_env, fm_struct, orb_basis, dft_control)
3649 :
3650 148 : CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
3651 148 : logger => cp_get_default_logger()
3652 148 : print_density = .FALSE.
3653 148 : print_ri_metric = .FALSE.
3654 :
3655 : !Do we print the RI density coeffs and/or RI_metric 2c integrals?
3656 148 : print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
3657 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
3658 0 : cp_p_file)) print_density = .TRUE.
3659 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
3660 0 : cp_p_file)) print_ri_metric = .TRUE.
3661 :
3662 : !common stuff
3663 148 : IF (print_density .OR. print_ri_metric) THEN
3664 :
3665 : !Re-calculate the 2-center RI_metric integrals (because not stored and cheap)
3666 : !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
3667 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
3668 0 : distribution_2d=dist_2d, para_env=para_env, blacs_env=blacs_env)
3669 0 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
3670 0 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
3671 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
3672 0 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
3673 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
3674 :
3675 0 : DO ibasis = 1, nkind
3676 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3677 0 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
3678 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3679 0 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
3680 : END DO
3681 :
3682 0 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3683 0 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3684 0 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3685 0 : row_bsize(:) = ri_data%bsizes_RI
3686 0 : col_bsize(:) = ri_data%bsizes_RI
3687 :
3688 0 : CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
3689 :
3690 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3691 0 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3692 : CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3693 0 : basis_set_RI, ri_data%ri_metric)
3694 :
3695 0 : CALL release_neighbor_list_sets(nl_2c)
3696 0 : CALL dbcsr_distribution_release(dbcsr_dist)
3697 : END IF
3698 :
3699 0 : IF (print_density) THEN
3700 0 : CALL get_qs_env(qs_env, rho=rho)
3701 0 : CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
3702 0 : nspins = SIZE(rho_ao, 1)
3703 :
3704 0 : CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
3705 :
3706 : CALL get_RI_density_coeffs(density_coeffs, matrix_s(1), rho_ao, 1, basis_set_AO, basis_set_RI, &
3707 0 : mult_by_s, ri_data, qs_env)
3708 0 : IF (nspins == 2) &
3709 : CALL get_RI_density_coeffs(density_coeffs_2, matrix_s(1), rho_ao, 2, basis_set_AO, &
3710 0 : basis_set_RI, mult_by_s, ri_data, qs_env)
3711 :
3712 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
3713 : extension=".dat", file_status="REPLACE", &
3714 0 : file_action="WRITE", file_form="FORMATTED")
3715 :
3716 0 : IF (unit_nr > 0) THEN
3717 0 : IF (nspins == 1) THEN
3718 : WRITE (unit_nr, FMT="(A,A,A)") &
3719 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3720 0 : TRIM(logger%iter_info%project_name), " project"
3721 0 : DO i_RI = 1, SIZE(density_coeffs)
3722 0 : WRITE (unit_nr, FMT="(F20.12)") density_coeffs(i_RI)
3723 : END DO
3724 : ELSE
3725 : WRITE (unit_nr, FMT="(A,A,A)") &
3726 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3727 0 : TRIM(logger%iter_info%project_name), " project. Spin up, spin down"
3728 0 : DO i_RI = 1, SIZE(density_coeffs)
3729 0 : WRITE (unit_nr, FMT="(F20.12,F20.12)") density_coeffs(i_RI), density_coeffs_2(i_RI)
3730 : END DO
3731 : END IF
3732 : END IF
3733 :
3734 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
3735 : END IF
3736 :
3737 148 : IF (print_ri_metric) THEN
3738 :
3739 : !convert 2c integrals to fm for dumping
3740 : CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
3741 0 : nrow_global=SUM(row_bsize), ncol_global=SUM(col_bsize))
3742 0 : CALL cp_fm_create(matrix_s_fm, fm_struct)
3743 :
3744 0 : CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
3745 :
3746 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
3747 : extension=".fm", file_status="REPLACE", &
3748 0 : file_action="WRITE", file_form="UNFORMATTED")
3749 0 : CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
3750 :
3751 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
3752 :
3753 0 : CALL cp_fm_struct_release(fm_struct)
3754 0 : CALL cp_fm_release(matrix_s_fm)
3755 : END IF
3756 :
3757 : !clean-up
3758 148 : IF (print_density .OR. print_ri_metric) THEN
3759 0 : DO ibasis = 1, nkind
3760 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3761 0 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3762 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3763 0 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3764 : END DO
3765 :
3766 0 : CALL dbcsr_release(matrix_s(1))
3767 0 : DEALLOCATE (row_bsize, col_bsize)
3768 : END IF
3769 :
3770 444 : END SUBROUTINE print_ri_hfx
3771 :
3772 : ! **************************************************************************************************
3773 : !> \brief Projects the density on the RI basis and return the array of the RI coefficients
3774 : !> \param density_coeffs ...
3775 : !> \param ri_2c_ints ...
3776 : !> \param rho_ao ...
3777 : !> \param ispin ...
3778 : !> \param basis_set_AO ...
3779 : !> \param basis_set_RI ...
3780 : !> \param multiply_by_S ...
3781 : !> \param ri_data ...
3782 : !> \param qs_env ...
3783 : ! **************************************************************************************************
3784 0 : SUBROUTINE get_RI_density_coeffs(density_coeffs, ri_2c_ints, rho_ao, ispin, basis_set_AO, &
3785 0 : basis_set_RI, multiply_by_S, ri_data, qs_env)
3786 :
3787 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs
3788 : TYPE(dbcsr_type), INTENT(INOUT) :: ri_2c_ints
3789 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
3790 : INTEGER, INTENT(IN) :: ispin
3791 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_AO, basis_set_RI
3792 : LOGICAL, INTENT(IN) :: multiply_by_S
3793 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3794 : TYPE(qs_environment_type), POINTER :: qs_env
3795 :
3796 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_RI_density_coeffs'
3797 :
3798 : INTEGER :: a, b, handle, i_mem, idx, n_dependent, &
3799 : n_mem, n_mem_RI, natom, &
3800 : nblk_per_thread, nblks, nkind
3801 : INTEGER(int_8) :: nze
3802 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_block_end, batch_block_start, &
3803 0 : dist1, dist2, dist3, dummy1, dummy2, &
3804 0 : idx1, idx2, idx3
3805 : INTEGER, DIMENSION(2) :: ind, pdims_2d
3806 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
3807 : INTEGER, DIMENSION(3) :: dims_3c, pcoord_3d, pdims_3d
3808 : LOGICAL :: calc_ints, found
3809 : REAL(dp) :: occ, threshold
3810 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3811 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk_3d
3812 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3813 : TYPE(dbcsr_type) :: ri_2c_inv
3814 0 : TYPE(dbt_distribution_type) :: dist_2d, dist_3d
3815 : TYPE(dbt_iterator_type) :: iter
3816 0 : TYPE(dbt_pgrid_type) :: pgrid_2d, pgrid_3d
3817 0 : TYPE(dbt_type) :: density_coeffs_t, density_tmp, rho_ao_t, &
3818 0 : rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
3819 0 : t2c_ri_inv, t2c_ri_tmp
3820 0 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
3821 : TYPE(dft_control_type), POINTER :: dft_control
3822 : TYPE(distribution_3d_type) :: dist_nl3c
3823 0 : TYPE(mp_cart_type) :: mp_comm_t3c
3824 : TYPE(mp_para_env_type), POINTER :: para_env
3825 : TYPE(neighbor_list_3c_type) :: nl_3c
3826 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3827 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3828 :
3829 0 : NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
3830 :
3831 0 : CALL timeset(routineN, handle)
3832 :
3833 : ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
3834 : ! = sum_nu d_nu nu(r)
3835 : ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
3836 :
3837 0 : IF (.NOT. ri_data%flavor == ri_pmat) THEN
3838 0 : CPABORT("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
3839 : END IF
3840 :
3841 : CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
3842 0 : particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
3843 0 : n_mem = ri_data%n_mem
3844 0 : n_mem_RI = ri_data%n_mem_RI
3845 :
3846 : ! The RI 2c int tensor and its inverse
3847 0 : CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints, matrix_type=dbcsr_type_no_symmetry)
3848 :
3849 0 : SELECT CASE (ri_data%t2c_method)
3850 : CASE (hfx_ri_do_2c_iter)
3851 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
3852 0 : CALL invert_hotelling(ri_2c_inv, ri_2c_ints, threshold=threshold, silent=.FALSE.)
3853 : CASE (hfx_ri_do_2c_cholesky)
3854 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3855 0 : CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
3856 0 : CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
3857 : CASE (hfx_ri_do_2c_diag)
3858 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3859 : CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
3860 0 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
3861 : END SELECT
3862 :
3863 0 : CALL dbt_create(ri_2c_ints, t2c_ri_tmp)
3864 : CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
3865 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
3866 : name="(RI | RI)")
3867 0 : CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
3868 :
3869 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_ints, t2c_ri_tmp)
3870 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.TRUE.)
3871 0 : CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
3872 :
3873 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
3874 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.TRUE.)
3875 0 : CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
3876 :
3877 0 : CALL dbcsr_release(ri_2c_inv)
3878 0 : CALL dbt_destroy(t2c_ri_tmp)
3879 0 : DEALLOCATE (dist1, dist2)
3880 :
3881 : ! The AO density tensor
3882 0 : CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3883 : CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
3884 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3885 : name="(AO | AO)")
3886 0 : DEALLOCATE (dist1, dist2)
3887 :
3888 0 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3889 0 : CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
3890 0 : CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
3891 0 : CALL dbt_destroy(rho_ao_tmp)
3892 :
3893 : ! Put in in 3D
3894 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
3895 0 : dist3(1) = 0
3896 0 : CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
3897 0 : CALL dbt_default_distvec(1, 1, [1], dist3)
3898 :
3899 0 : pdims_3d(1) = pdims_2d(1)
3900 0 : pdims_3d(2) = pdims_2d(2)
3901 0 : pdims_3d(3) = 1
3902 :
3903 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
3904 0 : CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
3905 : CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
3906 0 : ri_data%bsizes_AO_split, [1])
3907 0 : DEALLOCATE (dist1, dist2, dist3)
3908 0 : CALL dbt_pgrid_destroy(pgrid_3d)
3909 0 : CALL dbt_distribution_destroy(dist_3d)
3910 :
3911 : ! copy density
3912 0 : nblks = 0
3913 : !$OMP PARALLEL DEFAULT(NONE) &
3914 : !$OMP SHARED(rho_ao_t,nblks) &
3915 0 : !$OMP PRIVATE(iter,ind,blk,found)
3916 : CALL dbt_iterator_start(iter, rho_ao_t)
3917 : DO WHILE (dbt_iterator_blocks_left(iter))
3918 : CALL dbt_iterator_next_block(iter, ind)
3919 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3920 : IF (found) THEN
3921 : !$OMP ATOMIC
3922 : nblks = nblks + 1
3923 : DEALLOCATE (blk)
3924 : END IF
3925 : END DO
3926 : CALL dbt_iterator_stop(iter)
3927 : !$OMP END PARALLEL
3928 :
3929 0 : ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
3930 0 : idx3 = 1
3931 0 : nblks = 0
3932 : !$OMP PARALLEL DEFAULT(NONE) &
3933 : !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
3934 0 : !$OMP PRIVATE(iter,ind,blk,found)
3935 : CALL dbt_iterator_start(iter, rho_ao_t)
3936 : DO WHILE (dbt_iterator_blocks_left(iter))
3937 : CALL dbt_iterator_next_block(iter, ind)
3938 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3939 : IF (found) THEN
3940 : !$OMP CRITICAL
3941 : nblks = nblks + 1
3942 : idx1(nblks) = ind(1)
3943 : idx2(nblks) = ind(2)
3944 : !$OMP END CRITICAL
3945 : DEALLOCATE (blk)
3946 : END IF
3947 : END DO
3948 : CALL dbt_iterator_stop(iter)
3949 : !$OMP END PARALLEL
3950 :
3951 0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
3952 : nblk_per_thread = nblks/omp_get_num_threads() + 1
3953 : a = omp_get_thread_num()*nblk_per_thread + 1
3954 : b = MIN(a + nblk_per_thread, nblks)
3955 : CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
3956 : !$OMP END PARALLEL
3957 :
3958 : !$OMP PARALLEL DEFAULT(NONE) &
3959 : !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
3960 0 : !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
3961 : CALL dbt_iterator_start(iter, rho_ao_t)
3962 : DO WHILE (dbt_iterator_blocks_left(iter))
3963 : CALL dbt_iterator_next_block(iter, ind)
3964 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
3965 : IF (found) THEN
3966 : ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
3967 : blk_3d(:, :, 1) = blk(:, :)
3968 : !$OMP CRITICAL
3969 : CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
3970 : !$OMP END CRITICAL
3971 : DEALLOCATE (blk, blk_3d)
3972 : END IF
3973 : END DO
3974 : CALL dbt_iterator_stop(iter)
3975 : !$OMP END PARALLEL
3976 :
3977 : ! The 1D tensor with the density coeffs
3978 0 : pdims_2d(1) = para_env%num_pe
3979 0 : pdims_2d(2) = 1
3980 :
3981 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
3982 0 : CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
3983 0 : CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
3984 :
3985 0 : CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
3986 0 : CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
3987 0 : CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
3988 0 : CALL dbt_create(density_coeffs_t, density_tmp)
3989 0 : DEALLOCATE (dist1, dist2)
3990 0 : CALL dbt_pgrid_destroy(pgrid_2d)
3991 0 : CALL dbt_distribution_destroy(dist_2d)
3992 :
3993 0 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
3994 :
3995 : ! The 3c integrals tensor, in case we compute them here
3996 0 : pdims_3d = 0
3997 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[MAX(1, natom/n_mem), natom, natom])
3998 0 : ALLOCATE (t_3c_int_batched(1, 1))
3999 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
4000 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
4001 0 : name="(RI | AO AO)")
4002 :
4003 0 : CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
4004 0 : CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
4005 : CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
4006 0 : mp_comm_t3c, own_comm=.TRUE.)
4007 0 : DEALLOCATE (dist1, dist2, dist3)
4008 0 : CALL dbt_pgrid_destroy(pgrid_3d)
4009 :
4010 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_nl3c, ri_data%ri_metric, &
4011 0 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
4012 :
4013 0 : n_mem = ri_data%n_mem
4014 0 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
4015 :
4016 0 : calc_ints = .FALSE.
4017 0 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
4018 0 : IF (nze == 0) calc_ints = .TRUE.
4019 :
4020 0 : DO i_mem = 1, n_mem
4021 0 : IF (calc_ints) THEN
4022 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
4023 : basis_set_RI, basis_set_AO, basis_set_AO, &
4024 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
4025 : desymmetrize=.FALSE., &
4026 0 : bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
4027 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
4028 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.TRUE., summation=.TRUE.)
4029 0 : CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
4030 : ELSE
4031 : bounds_cpy(:, 2) = [SUM(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
4032 0 : SUM(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
4033 0 : bounds_cpy(:, 1) = [1, SUM(ri_data%bsizes_AO)]
4034 0 : bounds_cpy(:, 3) = [1, SUM(ri_data%bsizes_AO)]
4035 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
4036 0 : order=[2, 1, 3], bounds=bounds_cpy)
4037 : END IF
4038 :
4039 : !contract the integrals with the density P_pq (pq|R)
4040 : CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
4041 : contract_1=[2, 3], notcontract_1=[1], &
4042 : contract_2=[1, 2], notcontract_2=[3], &
4043 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4044 0 : CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
4045 :
4046 : !contract the above vector with the inverse metric
4047 : CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
4048 : contract_1=[2], notcontract_1=[1], &
4049 : contract_2=[1], notcontract_2=[2], &
4050 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4051 :
4052 : END DO
4053 0 : CALL neighbor_list_3c_destroy(nl_3c)
4054 :
4055 0 : IF (multiply_by_s) THEN
4056 : CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
4057 : contract_1=[2], notcontract_1=[1], &
4058 : contract_2=[1], notcontract_2=[2], &
4059 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4060 0 : CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
4061 : END IF
4062 :
4063 0 : ALLOCATE (density_coeffs(SUM(ri_data%bsizes_RI)))
4064 0 : density_coeffs = 0.0
4065 :
4066 : !$OMP PARALLEL DEFAULT(NONE) &
4067 : !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
4068 0 : !$OMP PRIVATE(iter,ind,blk,found,idx)
4069 : CALL dbt_iterator_start(iter, density_coeffs_t)
4070 : DO WHILE (dbt_iterator_blocks_left(iter))
4071 : CALL dbt_iterator_next_block(iter, ind)
4072 : CALL dbt_get_block(density_coeffs_t, ind, blk, found)
4073 : IF (found) THEN
4074 :
4075 : idx = SUM(ri_data%bsizes_RI_split(1:ind(1) - 1))
4076 : !$OMP CRITICAL
4077 : density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
4078 : !$OMP END CRITICAL
4079 : DEALLOCATE (blk)
4080 : END IF
4081 : END DO
4082 : CALL dbt_iterator_stop(iter)
4083 : !$OMP END PARALLEL
4084 0 : CALL para_env%sum(density_coeffs)
4085 :
4086 0 : CALL dbt_destroy(t2c_ri_ints)
4087 0 : CALL dbt_destroy(t2c_ri_inv)
4088 0 : CALL dbt_destroy(density_tmp)
4089 0 : CALL dbt_destroy(rho_ao_t)
4090 0 : CALL dbt_destroy(rho_ao_t_3d)
4091 0 : CALL dbt_destroy(density_coeffs_t)
4092 0 : CALL dbt_destroy(t_3c_int_batched(1, 1))
4093 :
4094 0 : CALL timestop(handle)
4095 :
4096 0 : END SUBROUTINE get_RI_density_coeffs
4097 :
4098 : ! **************************************************************************************************
4099 : !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
4100 : !> \param idx_to_at ...
4101 : !> \param bsizes_split ...
4102 : !> \param bsizes_orig ...
4103 : !> \return ...
4104 : ! **************************************************************************************************
4105 936 : SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
4106 : INTEGER, DIMENSION(:), INTENT(INOUT) :: idx_to_at
4107 : INTEGER, DIMENSION(:), INTENT(IN) :: bsizes_split, bsizes_orig
4108 :
4109 : INTEGER :: full_sum, iat, iblk, split_sum
4110 :
4111 936 : iat = 1
4112 936 : full_sum = bsizes_orig(iat)
4113 936 : split_sum = 0
4114 4662 : DO iblk = 1, SIZE(bsizes_split)
4115 3726 : split_sum = split_sum + bsizes_split(iblk)
4116 :
4117 3726 : IF (split_sum .GT. full_sum) THEN
4118 1340 : iat = iat + 1
4119 1340 : full_sum = full_sum + bsizes_orig(iat)
4120 : END IF
4121 :
4122 4662 : idx_to_at(iblk) = iat
4123 : END DO
4124 :
4125 936 : END SUBROUTINE get_idx_to_atom
4126 :
4127 : ! **************************************************************************************************
4128 : !> \brief Function for calculating sqrt of a matrix
4129 : !> \param values ...
4130 : !> \return ...
4131 : ! **************************************************************************************************
4132 0 : FUNCTION my_sqrt(values)
4133 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4134 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_sqrt
4135 :
4136 0 : my_sqrt = SQRT(values)
4137 : END FUNCTION
4138 :
4139 : ! **************************************************************************************************
4140 : !> \brief Function for calculation inverse sqrt of a matrix
4141 : !> \param values ...
4142 : !> \return ...
4143 : ! **************************************************************************************************
4144 0 : FUNCTION my_invsqrt(values)
4145 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4146 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_invsqrt
4147 :
4148 0 : my_invsqrt = SQRT(1.0_dp/values)
4149 : END FUNCTION
4150 228 : END MODULE
|