Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Utility methods to build 3-center integral tensors of various types.
10 : ! **************************************************************************************************
11 : MODULE qs_tensors
12 : USE OMP_LIB, ONLY: omp_get_num_threads,&
13 : omp_get_thread_num
14 : USE ai_contraction, ONLY: block_add
15 : USE ai_contraction_sphi, ONLY: ab_contract,&
16 : abc_contract_xsmm
17 : USE atomic_kind_types, ONLY: atomic_kind_type
18 : USE basis_set_types, ONLY: get_gto_basis_set,&
19 : gto_basis_set_p_type,&
20 : gto_basis_set_type
21 : USE block_p_types, ONLY: block_p_type
22 : USE cell_types, ONLY: cell_type,&
23 : real_to_scaled
24 : USE cp_array_utils, ONLY: cp_2d_r_p_type
25 : USE cp_control_types, ONLY: dft_control_type
26 : USE cp_dbcsr_api, ONLY: dbcsr_filter,&
27 : dbcsr_finalize,&
28 : dbcsr_get_block_p,&
29 : dbcsr_get_matrix_type,&
30 : dbcsr_has_symmetry,&
31 : dbcsr_type,&
32 : dbcsr_type_antisymmetric,&
33 : dbcsr_type_no_symmetry
34 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
35 : USE cp_files, ONLY: close_file,&
36 : open_file
37 : USE dbt_api, ONLY: &
38 : dbt_blk_sizes, dbt_clear, dbt_copy, dbt_create, dbt_destroy, dbt_filter, dbt_get_block, &
39 : dbt_get_info, dbt_get_num_blocks, dbt_get_nze_total, dbt_iterator_next_block, &
40 : dbt_iterator_num_blocks, dbt_iterator_start, dbt_iterator_stop, dbt_iterator_type, &
41 : dbt_ndims, dbt_put_block, dbt_reserve_blocks, dbt_type
42 : USE distribution_1d_types, ONLY: distribution_1d_type
43 : USE distribution_2d_types, ONLY: distribution_2d_type
44 : USE gamma, ONLY: init_md_ftable
45 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements,&
46 : hfx_add_single_cache_element,&
47 : hfx_decompress_first_cache,&
48 : hfx_flush_last_cache,&
49 : hfx_get_mult_cache_elements,&
50 : hfx_get_single_cache_element,&
51 : hfx_reset_cache_and_container
52 : USE hfx_types, ONLY: alloc_containers,&
53 : dealloc_containers,&
54 : hfx_cache_type,&
55 : hfx_compression_type,&
56 : hfx_container_type,&
57 : hfx_init_container
58 : USE input_constants, ONLY: do_potential_coulomb,&
59 : do_potential_id,&
60 : do_potential_short,&
61 : do_potential_truncated
62 : USE input_section_types, ONLY: section_vals_val_get
63 : USE kinds, ONLY: dp,&
64 : int_8
65 : USE kpoint_types, ONLY: get_kpoint_info,&
66 : kpoint_type
67 : USE libint_2c_3c, ONLY: cutoff_screen_factor,&
68 : eri_2center,&
69 : eri_2center_derivs,&
70 : eri_3center,&
71 : eri_3center_derivs,&
72 : libint_potential_type
73 : USE libint_wrapper, ONLY: &
74 : cp_libint_cleanup_2eri, cp_libint_cleanup_2eri1, cp_libint_cleanup_3eri, &
75 : cp_libint_cleanup_3eri1, cp_libint_init_2eri, cp_libint_init_2eri1, cp_libint_init_3eri, &
76 : cp_libint_init_3eri1, cp_libint_set_contrdepth, cp_libint_t
77 : USE message_passing, ONLY: mp_para_env_type
78 : USE molecule_types, ONLY: molecule_type
79 : USE orbital_pointers, ONLY: ncoset
80 : USE particle_types, ONLY: particle_type
81 : USE qs_environment_types, ONLY: get_qs_env,&
82 : qs_environment_type
83 : USE qs_kind_types, ONLY: qs_kind_type
84 : USE qs_neighbor_list_types, ONLY: &
85 : get_iterator_info, get_neighbor_list_set_p, neighbor_list_iterate, &
86 : neighbor_list_iterator_create, neighbor_list_iterator_p_type, &
87 : neighbor_list_iterator_release, neighbor_list_set_p_type, nl_sub_iterate, &
88 : release_neighbor_list_sets
89 : USE qs_neighbor_lists, ONLY: atom2d_build,&
90 : atom2d_cleanup,&
91 : build_neighbor_lists,&
92 : local_atoms_type,&
93 : pair_radius_setup
94 : USE qs_tensors_types, ONLY: &
95 : distribution_3d_destroy, distribution_3d_type, neighbor_list_3c_iterator_type, &
96 : neighbor_list_3c_type, symmetric_ij, symmetric_ijk, symmetric_jk, symmetric_none, &
97 : symmetrik_ik
98 : USE t_c_g0, ONLY: get_lmax_init,&
99 : init
100 : USE util, ONLY: get_limit
101 :
102 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
103 : #include "./base/base_uses.f90"
104 :
105 : IMPLICIT NONE
106 :
107 : PRIVATE
108 :
109 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_tensors'
110 :
111 : PUBLIC :: build_3c_neighbor_lists, &
112 : neighbor_list_3c_destroy, neighbor_list_3c_iterate, neighbor_list_3c_iterator_create, &
113 : neighbor_list_3c_iterator_destroy, get_3c_iterator_info, build_3c_integrals, &
114 : build_2c_neighbor_lists, build_2c_integrals, cutoff_screen_factor, &
115 : get_tensor_occupancy, compress_tensor, decompress_tensor, &
116 : build_3c_derivatives, build_2c_derivatives, calc_2c_virial, calc_3c_virial
117 :
118 : TYPE one_dim_int_array
119 : INTEGER, DIMENSION(:), ALLOCATABLE :: array
120 : END TYPE
121 :
122 : ! cache size for integral compression
123 : INTEGER, PARAMETER, PRIVATE :: cache_size = 1024
124 :
125 : CONTAINS
126 :
127 : ! **************************************************************************************************
128 : !> \brief Build 2-center neighborlists adapted to different operators
129 : !> This mainly wraps build_neighbor_lists for consistency with build_3c_neighbor_lists
130 : !> \param ij_list 2c neighbor list for atom pairs i, j
131 : !> \param basis_i basis object for atoms i
132 : !> \param basis_j basis object for atoms j
133 : !> \param potential_parameter ...
134 : !> \param name name of 2c neighbor list
135 : !> \param qs_env ...
136 : !> \param sym_ij Symmetry in i, j (default .TRUE.)
137 : !> \param molecular ...
138 : !> \param dist_2d optionally a custom 2d distribution
139 : !> \param pot_to_rad which radius (1 for i, 2 for j) should be adapted to incorporate potential
140 : ! **************************************************************************************************
141 9978 : SUBROUTINE build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, &
142 : sym_ij, molecular, dist_2d, pot_to_rad)
143 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
144 : POINTER :: ij_list
145 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j
146 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
147 : CHARACTER(LEN=*), INTENT(IN) :: name
148 : TYPE(qs_environment_type), POINTER :: qs_env
149 : LOGICAL, INTENT(IN), OPTIONAL :: sym_ij, molecular
150 : TYPE(distribution_2d_type), OPTIONAL, POINTER :: dist_2d
151 : INTEGER, INTENT(IN), OPTIONAL :: pot_to_rad
152 :
153 : INTEGER :: ikind, nkind, pot_to_rad_prv
154 : LOGICAL, ALLOCATABLE, DIMENSION(:) :: i_present, j_present
155 9978 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: pair_radius
156 : REAL(kind=dp) :: subcells
157 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: i_radius, j_radius
158 9978 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
159 : TYPE(cell_type), POINTER :: cell
160 : TYPE(distribution_1d_type), POINTER :: local_particles
161 : TYPE(distribution_2d_type), POINTER :: dist_2d_prv
162 9978 : TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:) :: atom2d
163 9978 : TYPE(molecule_type), DIMENSION(:), POINTER :: molecule_set
164 9978 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
165 :
166 9978 : NULLIFY (atomic_kind_set, cell, local_particles, molecule_set, &
167 9978 : particle_set, dist_2d_prv)
168 :
169 9978 : IF (PRESENT(pot_to_rad)) THEN
170 1348 : pot_to_rad_prv = pot_to_rad
171 : ELSE
172 : pot_to_rad_prv = 1
173 : END IF
174 :
175 : CALL get_qs_env(qs_env, &
176 : nkind=nkind, &
177 : cell=cell, &
178 : particle_set=particle_set, &
179 : atomic_kind_set=atomic_kind_set, &
180 : local_particles=local_particles, &
181 : distribution_2d=dist_2d_prv, &
182 9978 : molecule_set=molecule_set)
183 :
184 9978 : CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
185 :
186 39912 : ALLOCATE (i_present(nkind), j_present(nkind))
187 39912 : ALLOCATE (i_radius(nkind), j_radius(nkind))
188 :
189 25899 : i_present = .FALSE.
190 25899 : j_present = .FALSE.
191 25899 : i_radius = 0.0_dp
192 25899 : j_radius = 0.0_dp
193 :
194 9978 : IF (PRESENT(dist_2d)) dist_2d_prv => dist_2d
195 :
196 : ! Set up the radii, depending on the operator type
197 9978 : IF (potential_parameter%potential_type == do_potential_id) THEN
198 :
199 : !overlap => use the kind radius for both i and j
200 21297 : DO ikind = 1, nkind
201 13101 : IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
202 13101 : i_present(ikind) = .TRUE.
203 13101 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=i_radius(ikind))
204 : END IF
205 21297 : IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
206 13101 : j_present(ikind) = .TRUE.
207 13101 : CALL get_gto_basis_set(basis_j(ikind)%gto_basis_set, kind_radius=j_radius(ikind))
208 : END IF
209 : END DO
210 :
211 1782 : ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
212 :
213 : !Coulomb operator, virtually infinite range => set j_radius to arbitrarily large number
214 1110 : DO ikind = 1, nkind
215 736 : IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
216 736 : i_present(ikind) = .TRUE.
217 736 : IF (pot_to_rad_prv == 1) i_radius(ikind) = 1000000.0_dp
218 : END IF
219 1110 : IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
220 736 : j_present(ikind) = .TRUE.
221 736 : IF (pot_to_rad_prv == 2) j_radius(ikind) = 1000000.0_dp
222 : END IF
223 : END DO !ikind
224 :
225 1408 : ELSE IF (potential_parameter%potential_type == do_potential_truncated .OR. &
226 : potential_parameter%potential_type == do_potential_short) THEN
227 :
228 : !Truncated coulomb/short range: set j_radius to r_cutoff + the kind_radii
229 3492 : DO ikind = 1, nkind
230 2084 : IF (ASSOCIATED(basis_i(ikind)%gto_basis_set)) THEN
231 2084 : i_present(ikind) = .TRUE.
232 2084 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=i_radius(ikind))
233 2084 : IF (pot_to_rad_prv == 1) i_radius(ikind) = i_radius(ikind) + cutoff_screen_factor*potential_parameter%cutoff_radius
234 : END IF
235 3492 : IF (ASSOCIATED(basis_j(ikind)%gto_basis_set)) THEN
236 2084 : j_present(ikind) = .TRUE.
237 2084 : CALL get_gto_basis_set(basis_j(ikind)%gto_basis_set, kind_radius=j_radius(ikind))
238 2084 : IF (pot_to_rad_prv == 2) j_radius(ikind) = j_radius(ikind) + cutoff_screen_factor*potential_parameter%cutoff_radius
239 : END IF
240 : END DO
241 :
242 : ELSE
243 0 : CPABORT("Operator not implemented.")
244 : END IF
245 :
246 39912 : ALLOCATE (pair_radius(nkind, nkind))
247 53742 : pair_radius = 0.0_dp
248 9978 : CALL pair_radius_setup(i_present, j_present, i_radius, j_radius, pair_radius)
249 :
250 45855 : ALLOCATE (atom2d(nkind))
251 :
252 : CALL atom2d_build(atom2d, local_particles, dist_2d_prv, atomic_kind_set, &
253 9978 : molecule_set, molecule_only=.FALSE., particle_set=particle_set)
254 : CALL build_neighbor_lists(ij_list, particle_set, atom2d, cell, pair_radius, subcells, &
255 9978 : symmetric=sym_ij, molecular=molecular, nlname=TRIM(name))
256 :
257 9978 : CALL atom2d_cleanup(atom2d)
258 :
259 29934 : END SUBROUTINE
260 :
261 : ! **************************************************************************************************
262 : !> \brief Build a 3-center neighbor list
263 : !> \param ijk_list 3c neighbor list for atom triples i, j, k
264 : !> \param basis_i basis object for atoms i
265 : !> \param basis_j basis object for atoms j
266 : !> \param basis_k basis object for atoms k
267 : !> \param dist_3d 3d distribution object
268 : !> \param potential_parameter ...
269 : !> \param name name of 3c neighbor list
270 : !> \param qs_env ...
271 : !> \param sym_ij Symmetry in i, j (default .FALSE.)
272 : !> \param sym_jk Symmetry in j, k (default .FALSE.)
273 : !> \param sym_ik Symmetry in i, k (default .FALSE.)
274 : !> \param molecular ??? not tested
275 : !> \param op_pos ...
276 : !> \param own_dist ...
277 : ! **************************************************************************************************
278 674 : SUBROUTINE build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, &
279 : dist_3d, potential_parameter, name, qs_env, &
280 : sym_ij, sym_jk, sym_ik, molecular, op_pos, &
281 : own_dist)
282 : TYPE(neighbor_list_3c_type), INTENT(OUT) :: ijk_list
283 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j, basis_k
284 : TYPE(distribution_3d_type), INTENT(IN) :: dist_3d
285 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
286 : CHARACTER(LEN=*), INTENT(IN) :: name
287 : TYPE(qs_environment_type), POINTER :: qs_env
288 : LOGICAL, INTENT(IN), OPTIONAL :: sym_ij, sym_jk, sym_ik, molecular
289 : INTEGER, INTENT(IN), OPTIONAL :: op_pos
290 : LOGICAL, INTENT(IN), OPTIONAL :: own_dist
291 :
292 : CHARACTER(len=*), PARAMETER :: routineN = 'build_3c_neighbor_lists'
293 :
294 : INTEGER :: handle, op_pos_prv, sym_level
295 : TYPE(libint_potential_type) :: pot_par_1, pot_par_2
296 :
297 674 : CALL timeset(routineN, handle)
298 :
299 674 : IF (PRESENT(op_pos)) THEN
300 478 : op_pos_prv = op_pos
301 : ELSE
302 : op_pos_prv = 1
303 : END IF
304 :
305 492 : SELECT CASE (op_pos_prv)
306 : CASE (1)
307 492 : pot_par_1 = potential_parameter
308 492 : pot_par_2%potential_type = do_potential_id
309 : CASE (2)
310 182 : pot_par_2 = potential_parameter
311 478 : pot_par_1%potential_type = do_potential_id
312 : END SELECT
313 :
314 : CALL build_2c_neighbor_lists(ijk_list%ij_list, basis_i, basis_j, pot_par_1, TRIM(name)//"_sub_1", &
315 : qs_env, sym_ij=.FALSE., molecular=molecular, &
316 674 : dist_2d=dist_3d%dist_2d_1, pot_to_rad=1)
317 :
318 : CALL build_2c_neighbor_lists(ijk_list%jk_list, basis_j, basis_k, pot_par_2, TRIM(name)//"_sub_2", &
319 : qs_env, sym_ij=.FALSE., molecular=molecular, &
320 674 : dist_2d=dist_3d%dist_2d_2, pot_to_rad=2)
321 :
322 674 : ijk_list%sym = symmetric_none
323 :
324 674 : sym_level = 0
325 674 : IF (PRESENT(sym_ij)) THEN
326 140 : IF (sym_ij) THEN
327 0 : ijk_list%sym = symmetric_ij
328 0 : sym_level = sym_level + 1
329 : END IF
330 : END IF
331 :
332 674 : IF (PRESENT(sym_jk)) THEN
333 472 : IF (sym_jk) THEN
334 418 : ijk_list%sym = symmetric_jk
335 418 : sym_level = sym_level + 1
336 : END IF
337 : END IF
338 :
339 674 : IF (PRESENT(sym_ik)) THEN
340 0 : IF (sym_ik) THEN
341 0 : ijk_list%sym = symmetrik_ik
342 0 : sym_level = sym_level + 1
343 : END IF
344 : END IF
345 :
346 674 : IF (sym_level >= 2) THEN
347 0 : ijk_list%sym = symmetric_ijk
348 : END IF
349 :
350 674 : ijk_list%dist_3d = dist_3d
351 674 : IF (PRESENT(own_dist)) THEN
352 674 : ijk_list%owns_dist = own_dist
353 : ELSE
354 0 : ijk_list%owns_dist = .FALSE.
355 : END IF
356 :
357 674 : CALL timestop(handle)
358 674 : END SUBROUTINE
359 :
360 : ! **************************************************************************************************
361 : !> \brief Symmetry criterion
362 : !> \param a ...
363 : !> \param b ...
364 : !> \return ...
365 : ! **************************************************************************************************
366 3048714 : PURE FUNCTION include_symmetric(a, b)
367 : INTEGER, INTENT(IN) :: a, b
368 : LOGICAL :: include_symmetric
369 :
370 3048714 : IF (a > b) THEN
371 1171848 : include_symmetric = (MODULO(a + b, 2) /= 0)
372 : ELSE
373 1876866 : include_symmetric = (MODULO(a + b, 2) == 0)
374 : END IF
375 :
376 3048714 : END FUNCTION
377 :
378 : ! **************************************************************************************************
379 : !> \brief Destroy 3c neighborlist
380 : !> \param ijk_list ...
381 : ! **************************************************************************************************
382 674 : SUBROUTINE neighbor_list_3c_destroy(ijk_list)
383 : TYPE(neighbor_list_3c_type), INTENT(INOUT) :: ijk_list
384 :
385 674 : CALL release_neighbor_list_sets(ijk_list%ij_list)
386 674 : CALL release_neighbor_list_sets(ijk_list%jk_list)
387 :
388 674 : IF (ijk_list%owns_dist) THEN
389 674 : CALL distribution_3d_destroy(ijk_list%dist_3d)
390 : END IF
391 :
392 674 : END SUBROUTINE
393 :
394 : ! **************************************************************************************************
395 : !> \brief Create a 3-center neighborlist iterator
396 : !> \param iterator ...
397 : !> \param ijk_nl ...
398 : ! **************************************************************************************************
399 66114 : SUBROUTINE neighbor_list_3c_iterator_create(iterator, ijk_nl)
400 : TYPE(neighbor_list_3c_iterator_type), INTENT(OUT) :: iterator
401 : TYPE(neighbor_list_3c_type), INTENT(IN) :: ijk_nl
402 :
403 : CHARACTER(len=*), PARAMETER :: routineN = 'neighbor_list_3c_iterator_create'
404 :
405 : INTEGER :: handle
406 :
407 7346 : CALL timeset(routineN, handle)
408 :
409 7346 : CALL neighbor_list_iterator_create(iterator%iter_ij, ijk_nl%ij_list)
410 7346 : CALL neighbor_list_iterator_create(iterator%iter_jk, ijk_nl%jk_list, search=.TRUE.)
411 :
412 7346 : iterator%iter_level = 0
413 7346 : iterator%ijk_nl = ijk_nl
414 :
415 22038 : iterator%bounds_i = 0
416 22038 : iterator%bounds_j = 0
417 22038 : iterator%bounds_k = 0
418 :
419 7346 : CALL timestop(handle)
420 7346 : END SUBROUTINE
421 :
422 : ! **************************************************************************************************
423 : !> \brief impose atomic upper and lower bounds
424 : !> \param iterator ...
425 : !> \param bounds_i ...
426 : !> \param bounds_j ...
427 : !> \param bounds_k ...
428 : ! **************************************************************************************************
429 7276 : SUBROUTINE nl_3c_iter_set_bounds(iterator, bounds_i, bounds_j, bounds_k)
430 : TYPE(neighbor_list_3c_iterator_type), &
431 : INTENT(INOUT) :: iterator
432 : INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL :: bounds_i, bounds_j, bounds_k
433 :
434 23200 : IF (PRESENT(bounds_i)) iterator%bounds_i = bounds_i
435 13324 : IF (PRESENT(bounds_j)) iterator%bounds_j = bounds_j
436 16942 : IF (PRESENT(bounds_k)) iterator%bounds_k = bounds_k
437 :
438 7276 : END SUBROUTINE
439 :
440 : ! **************************************************************************************************
441 : !> \brief Destroy 3c-nl iterator
442 : !> \param iterator ...
443 : ! **************************************************************************************************
444 7346 : SUBROUTINE neighbor_list_3c_iterator_destroy(iterator)
445 : TYPE(neighbor_list_3c_iterator_type), &
446 : INTENT(INOUT) :: iterator
447 :
448 : CHARACTER(len=*), PARAMETER :: routineN = 'neighbor_list_3c_iterator_destroy'
449 :
450 : INTEGER :: handle
451 :
452 7346 : CALL timeset(routineN, handle)
453 7346 : CALL neighbor_list_iterator_release(iterator%iter_ij)
454 7346 : CALL neighbor_list_iterator_release(iterator%iter_jk)
455 7346 : NULLIFY (iterator%iter_ij)
456 7346 : NULLIFY (iterator%iter_jk)
457 :
458 7346 : CALL timestop(handle)
459 7346 : END SUBROUTINE
460 :
461 : ! **************************************************************************************************
462 : !> \brief Iterate 3c-nl iterator
463 : !> \param iterator ...
464 : !> \return 0 if successful; 1 if end was reached
465 : ! **************************************************************************************************
466 4538971 : RECURSIVE FUNCTION neighbor_list_3c_iterate(iterator) RESULT(iter_stat)
467 : TYPE(neighbor_list_3c_iterator_type), &
468 : INTENT(INOUT) :: iterator
469 : INTEGER :: iter_stat
470 :
471 : INTEGER :: iatom, iter_level, jatom, jatom_1, &
472 : jatom_2, katom
473 : LOGICAL :: skip_this
474 :
475 4538971 : iter_level = iterator%iter_level
476 :
477 4538971 : IF (iter_level == 0) THEN
478 174231 : iter_stat = neighbor_list_iterate(iterator%iter_ij)
479 :
480 174231 : IF (iter_stat /= 0) THEN
481 : RETURN
482 : END IF
483 :
484 166885 : CALL get_iterator_info(iterator%iter_ij, iatom=iatom, jatom=jatom)
485 166885 : skip_this = .FALSE.
486 : IF ((iterator%bounds_i(1) > 0 .AND. iatom < iterator%bounds_i(1)) &
487 166885 : .OR. (iterator%bounds_i(2) > 0 .AND. iatom > iterator%bounds_i(2))) skip_this = .TRUE.
488 : IF ((iterator%bounds_j(1) > 0 .AND. jatom < iterator%bounds_j(1)) &
489 166885 : .OR. (iterator%bounds_j(2) > 0 .AND. jatom > iterator%bounds_j(2))) skip_this = .TRUE.
490 :
491 166885 : IF (skip_this) THEN
492 70662 : iter_stat = neighbor_list_3c_iterate(iterator)
493 70662 : RETURN
494 : END IF
495 :
496 : END IF
497 4460963 : iter_stat = nl_sub_iterate(iterator%iter_jk, iterator%iter_ij)
498 4460963 : IF (iter_stat /= 0) THEN
499 96223 : iterator%iter_level = 0
500 96223 : iter_stat = neighbor_list_3c_iterate(iterator)
501 96223 : RETURN
502 : ELSE
503 4364740 : iterator%iter_level = 1
504 : END IF
505 :
506 : CPASSERT(iter_stat == 0)
507 : CPASSERT(iterator%iter_level == 1)
508 4364740 : CALL get_iterator_info(iterator%iter_ij, iatom=iatom, jatom=jatom_1)
509 4364740 : CALL get_iterator_info(iterator%iter_jk, iatom=jatom_2, jatom=katom)
510 :
511 4364740 : CPASSERT(jatom_1 == jatom_2)
512 4364740 : jatom = jatom_1
513 :
514 4364740 : skip_this = .FALSE.
515 : IF ((iterator%bounds_k(1) > 0 .AND. katom < iterator%bounds_k(1)) &
516 4364740 : .OR. (iterator%bounds_k(2) > 0 .AND. katom > iterator%bounds_k(2))) skip_this = .TRUE.
517 :
518 : IF (skip_this) THEN
519 240840 : iter_stat = neighbor_list_3c_iterate(iterator)
520 240840 : RETURN
521 : END IF
522 :
523 4123900 : SELECT CASE (iterator%ijk_nl%sym)
524 : CASE (symmetric_none)
525 0 : skip_this = .FALSE.
526 : CASE (symmetric_ij)
527 0 : skip_this = .NOT. include_symmetric(iatom, jatom)
528 : CASE (symmetric_jk)
529 3048714 : skip_this = .NOT. include_symmetric(jatom, katom)
530 : CASE (symmetrik_ik)
531 0 : skip_this = .NOT. include_symmetric(iatom, katom)
532 : CASE (symmetric_ijk)
533 0 : skip_this = .NOT. include_symmetric(iatom, jatom) .OR. .NOT. include_symmetric(jatom, katom)
534 : CASE DEFAULT
535 4123900 : CPABORT("should not happen")
536 : END SELECT
537 :
538 3048714 : IF (skip_this) THEN
539 1173987 : iter_stat = neighbor_list_3c_iterate(iterator)
540 1173987 : RETURN
541 : END IF
542 :
543 : END FUNCTION
544 :
545 : ! **************************************************************************************************
546 : !> \brief Get info of current iteration
547 : !> \param iterator ...
548 : !> \param ikind ...
549 : !> \param jkind ...
550 : !> \param kkind ...
551 : !> \param nkind ...
552 : !> \param iatom ...
553 : !> \param jatom ...
554 : !> \param katom ...
555 : !> \param rij ...
556 : !> \param rjk ...
557 : !> \param rik ...
558 : !> \param cell_j ...
559 : !> \param cell_k ...
560 : !> \return ...
561 : ! **************************************************************************************************
562 2949913 : SUBROUTINE get_3c_iterator_info(iterator, ikind, jkind, kkind, nkind, iatom, jatom, katom, &
563 : rij, rjk, rik, cell_j, cell_k)
564 : TYPE(neighbor_list_3c_iterator_type), &
565 : INTENT(INOUT) :: iterator
566 : INTEGER, INTENT(OUT), OPTIONAL :: ikind, jkind, kkind, nkind, iatom, &
567 : jatom, katom
568 : REAL(KIND=dp), DIMENSION(3), INTENT(OUT), OPTIONAL :: rij, rjk, rik
569 : INTEGER, DIMENSION(3), INTENT(OUT), OPTIONAL :: cell_j, cell_k
570 :
571 : INTEGER, DIMENSION(2) :: atoms_1, atoms_2, kinds_1, kinds_2
572 : INTEGER, DIMENSION(3) :: cell_1, cell_2
573 : REAL(KIND=dp), DIMENSION(3) :: r_1, r_2
574 :
575 2949913 : CPASSERT(iterator%iter_level == 1)
576 :
577 : CALL get_iterator_info(iterator%iter_ij, &
578 : ikind=kinds_1(1), jkind=kinds_1(2), nkind=nkind, &
579 : iatom=atoms_1(1), jatom=atoms_1(2), r=r_1, &
580 2949913 : cell=cell_1)
581 :
582 : CALL get_iterator_info(iterator%iter_jk, &
583 : ikind=kinds_2(1), jkind=kinds_2(2), &
584 : iatom=atoms_2(1), jatom=atoms_2(2), r=r_2, &
585 2949913 : cell=cell_2)
586 :
587 2949913 : IF (PRESENT(ikind)) ikind = kinds_1(1)
588 2949913 : IF (PRESENT(jkind)) jkind = kinds_1(2)
589 2949913 : IF (PRESENT(kkind)) kkind = kinds_2(2)
590 2949913 : IF (PRESENT(iatom)) iatom = atoms_1(1)
591 2949913 : IF (PRESENT(jatom)) jatom = atoms_1(2)
592 2949913 : IF (PRESENT(katom)) katom = atoms_2(2)
593 :
594 2949913 : IF (PRESENT(rij)) rij = r_1
595 2949913 : IF (PRESENT(rjk)) rjk = r_2
596 14749565 : IF (PRESENT(rik)) rik = r_1 + r_2
597 :
598 2949913 : IF (PRESENT(cell_j)) cell_j = cell_1
599 14070365 : IF (PRESENT(cell_k)) cell_k = cell_1 + cell_2
600 :
601 2949913 : END SUBROUTINE
602 :
603 : ! **************************************************************************************************
604 : !> \brief Allocate blocks of a 3-center tensor based on neighborlist
605 : !> \param t3c empty DBCSR tensor
606 : !> Should be of shape (1,1) if no kpoints are used and of shape (nimages, nimages)
607 : !> if k-points are used
608 : !> \param nl_3c 3-center neighborlist
609 : !> \param basis_i ...
610 : !> \param basis_j ...
611 : !> \param basis_k ...
612 : !> \param qs_env ...
613 : !> \param potential_parameter ...
614 : !> \param op_pos ...
615 : !> \param do_kpoints ...
616 : !> \param do_hfx_kpoints ...
617 : !> \param bounds_i ...
618 : !> \param bounds_j ...
619 : !> \param bounds_k ...
620 : !> \param RI_range ...
621 : !> \param img_to_RI_cell ...
622 : !> \param cell_to_index ...
623 : !> \param cell_sym ...
624 : ! **************************************************************************************************
625 2420 : SUBROUTINE alloc_block_3c(t3c, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, op_pos, &
626 : do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, RI_range, &
627 2420 : img_to_RI_cell, cell_to_index, cell_sym)
628 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t3c
629 : TYPE(neighbor_list_3c_type), INTENT(INOUT) :: nl_3c
630 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j, basis_k
631 : TYPE(qs_environment_type), POINTER :: qs_env
632 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
633 : INTEGER, INTENT(IN), OPTIONAL :: op_pos
634 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints, do_hfx_kpoints
635 : INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL :: bounds_i, bounds_j, bounds_k
636 : REAL(dp), INTENT(IN), OPTIONAL :: RI_range
637 : INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: img_to_RI_cell
638 : INTEGER, DIMENSION(:, :, :), OPTIONAL, POINTER :: cell_to_index
639 : LOGICAL, INTENT(IN), OPTIONAL :: cell_sym
640 :
641 : CHARACTER(LEN=*), PARAMETER :: routineN = 'alloc_block_3c'
642 :
643 : INTEGER :: handle, iatom, ikind, j_img, jatom, &
644 : jcell, jkind, k_img, katom, kcell, &
645 : kkind, natom, ncell_RI, nimg, op_ij, &
646 : op_jk, op_pos_prv
647 : INTEGER(int_8) :: a, b, nblk_per_thread
648 2420 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:, :) :: nblk
649 2420 : INTEGER, ALLOCATABLE, DIMENSION(:) :: img_to_RI_cell_prv
650 : INTEGER, DIMENSION(3) :: blk_idx, cell_j, cell_k, &
651 : kp_index_lbounds, kp_index_ubounds
652 : LOGICAL :: cell_sym_prv, do_hfx_kpoints_prv, &
653 : do_kpoints_prv
654 : REAL(KIND=dp) :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
655 : kind_radius_i, kind_radius_j, &
656 : kind_radius_k
657 : REAL(KIND=dp), DIMENSION(3) :: rij, rik, rjk
658 2420 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
659 : TYPE(cell_type), POINTER :: cell
660 : TYPE(dft_control_type), POINTER :: dft_control
661 : TYPE(mp_para_env_type), POINTER :: para_env
662 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
663 : TYPE(one_dim_int_array), ALLOCATABLE, &
664 2420 : DIMENSION(:, :) :: alloc_i, alloc_j, alloc_k
665 2420 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
666 :
667 2420 : CALL timeset(routineN, handle)
668 2420 : NULLIFY (qs_kind_set, atomic_kind_set, cell)
669 :
670 2420 : IF (PRESENT(do_kpoints)) THEN
671 442 : do_kpoints_prv = do_kpoints
672 : ELSE
673 : do_kpoints_prv = .FALSE.
674 : END IF
675 2420 : IF (PRESENT(do_hfx_kpoints)) THEN
676 196 : do_hfx_kpoints_prv = do_hfx_kpoints
677 : ELSE
678 : do_hfx_kpoints_prv = .FALSE.
679 : END IF
680 196 : IF (do_hfx_kpoints_prv) THEN
681 196 : CPASSERT(do_kpoints_prv)
682 196 : CPASSERT(PRESENT(RI_range))
683 196 : CPASSERT(PRESENT(img_to_RI_cell))
684 : END IF
685 :
686 2224 : IF (PRESENT(img_to_RI_cell)) THEN
687 588 : ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
688 9564 : img_to_RI_cell_prv(:) = img_to_RI_cell
689 : END IF
690 :
691 2420 : IF (PRESENT(cell_sym)) THEN
692 1858 : cell_sym_prv = cell_sym
693 : ELSE
694 : cell_sym_prv = .FALSE.
695 : END IF
696 :
697 2420 : dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
698 :
699 2420 : op_ij = do_potential_id; op_jk = do_potential_id
700 :
701 2420 : IF (PRESENT(op_pos)) THEN
702 2420 : op_pos_prv = op_pos
703 : ELSE
704 : op_pos_prv = 1
705 : END IF
706 :
707 2224 : SELECT CASE (op_pos_prv)
708 : CASE (1)
709 2224 : op_ij = potential_parameter%potential_type
710 : CASE (2)
711 2420 : op_jk = potential_parameter%potential_type
712 : END SELECT
713 :
714 2420 : IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
715 1398 : dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
716 1398 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
717 1022 : ELSEIF (op_ij == do_potential_coulomb) THEN
718 436 : dr_ij = 1000000.0_dp
719 436 : dr_ik = 1000000.0_dp
720 : END IF
721 :
722 2420 : IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
723 20 : dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
724 20 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
725 2400 : ELSEIF (op_jk == do_potential_coulomb) THEN
726 0 : dr_jk = 1000000.0_dp
727 0 : dr_ik = 1000000.0_dp
728 : END IF
729 :
730 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, natom=natom, &
731 2420 : dft_control=dft_control, para_env=para_env, cell=cell)
732 :
733 2420 : IF (do_kpoints_prv) THEN
734 208 : CPASSERT(PRESENT(cell_to_index))
735 208 : CPASSERT(ASSOCIATED(cell_to_index))
736 : ! nimg = dft_control%nimages
737 30768 : nimg = MAXVAL(cell_to_index)
738 208 : ncell_RI = nimg
739 208 : IF (do_hfx_kpoints_prv) THEN
740 196 : nimg = SIZE(t3c, 1)
741 196 : ncell_RI = SIZE(t3c, 2)
742 : END IF
743 : ELSE
744 : nimg = 1
745 : ncell_RI = 1
746 : END IF
747 :
748 2420 : IF (do_kpoints_prv) THEN
749 832 : kp_index_lbounds = LBOUND(cell_to_index)
750 832 : kp_index_ubounds = UBOUND(cell_to_index)
751 : END IF
752 :
753 : !Do a first loop over the nl and count the blocks present
754 9680 : ALLOCATE (nblk(nimg, ncell_RI))
755 12820 : nblk(:, :) = 0
756 :
757 2420 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
758 :
759 2420 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bounds_k)
760 :
761 909645 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
762 : CALL get_3c_iterator_info(nl_3c_iter, iatom=iatom, ikind=ikind, jkind=jkind, kkind=kkind, &
763 907225 : rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
764 :
765 3628900 : djk = NORM2(rjk)
766 3628900 : dij = NORM2(rij)
767 3628900 : dik = NORM2(rik)
768 :
769 907225 : IF (do_kpoints_prv) THEN
770 :
771 619038 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
772 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
773 :
774 88434 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
775 88434 : IF (jcell > nimg .OR. jcell < 1) CYCLE
776 :
777 529972 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
778 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
779 :
780 71837 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
781 71837 : IF (kcell > nimg .OR. kcell < 1) CYCLE
782 51759 : IF (do_hfx_kpoints_prv) THEN
783 49497 : IF (dik > RI_range) CYCLE
784 : kcell = 1
785 : END IF
786 : ELSE
787 : jcell = 1; kcell = 1
788 : END IF
789 :
790 829279 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=kind_radius_i)
791 829279 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, kind_radius=kind_radius_j)
792 829279 : CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, kind_radius=kind_radius_k)
793 :
794 829279 : IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
795 829279 : IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
796 829279 : IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
797 :
798 901529 : nblk(jcell, kcell) = nblk(jcell, kcell) + 1
799 : END DO
800 2420 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
801 :
802 : !Do a second loop over the nl to give block indices
803 20080 : ALLOCATE (alloc_i(nimg, ncell_RI))
804 17660 : ALLOCATE (alloc_j(nimg, ncell_RI))
805 17660 : ALLOCATE (alloc_k(nimg, ncell_RI))
806 4902 : DO k_img = 1, ncell_RI
807 12820 : DO j_img = 1, nimg
808 20864 : ALLOCATE (alloc_i(j_img, k_img)%array(nblk(j_img, k_img)))
809 20864 : ALLOCATE (alloc_j(j_img, k_img)%array(nblk(j_img, k_img)))
810 23346 : ALLOCATE (alloc_k(j_img, k_img)%array(nblk(j_img, k_img)))
811 : END DO
812 : END DO
813 12820 : nblk(:, :) = 0
814 :
815 2420 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
816 :
817 2420 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bounds_k)
818 :
819 909645 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
820 : CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
821 : iatom=iatom, jatom=jatom, katom=katom, &
822 907225 : rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
823 :
824 3628900 : djk = NORM2(rjk)
825 3628900 : dij = NORM2(rij)
826 3628900 : dik = NORM2(rik)
827 :
828 907225 : IF (do_kpoints_prv) THEN
829 :
830 619038 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
831 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
832 :
833 88434 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
834 88434 : IF (jcell > nimg .OR. jcell < 1) CYCLE
835 :
836 529972 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
837 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
838 :
839 71837 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
840 71837 : IF (kcell > nimg .OR. kcell < 1) CYCLE
841 51759 : IF (do_hfx_kpoints_prv) THEN
842 49497 : IF (dik > RI_range) CYCLE
843 8226 : kcell = img_to_RI_cell_prv(kcell)
844 : END IF
845 : ELSE
846 : jcell = 1; kcell = 1
847 : END IF
848 :
849 3317116 : blk_idx = [iatom, jatom, katom]
850 829279 : IF (do_hfx_kpoints_prv) THEN
851 8226 : blk_idx(3) = (kcell - 1)*natom + katom
852 8226 : kcell = 1
853 : END IF
854 :
855 829279 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, kind_radius=kind_radius_i)
856 829279 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, kind_radius=kind_radius_j)
857 829279 : CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, kind_radius=kind_radius_k)
858 :
859 829279 : IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
860 829279 : IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
861 829279 : IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
862 :
863 415244 : nblk(jcell, kcell) = nblk(jcell, kcell) + 1
864 :
865 : !Note: there may be repeated indices due to periodic images => dbt_reserve_blocks takes care of it
866 415244 : alloc_i(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(1)
867 415244 : alloc_j(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(2)
868 901529 : alloc_k(jcell, kcell)%array(nblk(jcell, kcell)) = blk_idx(3)
869 :
870 : END DO
871 2420 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
872 :
873 : !TODO: Parallelize creation of block list.
874 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t3c,nimg,nblk,alloc_i,alloc_j,alloc_k,ncell_RI,cell_sym_prv) &
875 2420 : !$OMP PRIVATE(k_img,j_img,nblk_per_thread,A,b)
876 : DO k_img = 1, ncell_RI
877 : DO j_img = 1, nimg
878 : IF (cell_sym_prv .AND. j_img < k_img) CYCLE
879 : IF (ALLOCATED(alloc_i(j_img, k_img)%array)) THEN
880 : nblk_per_thread = nblk(j_img, k_img)/omp_get_num_threads() + 1
881 : a = omp_get_thread_num()*nblk_per_thread + 1
882 : b = MIN(a + nblk_per_thread, nblk(j_img, k_img))
883 : CALL dbt_reserve_blocks(t3c(j_img, k_img), &
884 : alloc_i(j_img, k_img)%array(a:b), &
885 : alloc_j(j_img, k_img)%array(a:b), &
886 : alloc_k(j_img, k_img)%array(a:b))
887 : END IF
888 : END DO
889 : END DO
890 : !$OMP END PARALLEL
891 :
892 2420 : CALL timestop(handle)
893 :
894 47954 : END SUBROUTINE
895 :
896 : ! **************************************************************************************************
897 : !> \brief Build 3-center derivative tensors
898 : !> \param t3c_der_i empty DBCSR tensor which will contain the 1st center derivatives
899 : !> \param t3c_der_k empty DBCSR tensor which will contain the 3rd center derivatives
900 : !> \param filter_eps Filter threshold for tensor blocks
901 : !> \param qs_env ...
902 : !> \param nl_3c 3-center neighborlist
903 : !> \param basis_i ...
904 : !> \param basis_j ...
905 : !> \param basis_k ...
906 : !> \param potential_parameter ...
907 : !> \param der_eps neglect integrals smaller than der_eps
908 : !> \param op_pos operator position.
909 : !> 1: calculate (i|jk) integrals,
910 : !> 2: calculate (ij|k) integrals
911 : !> \param do_kpoints ...
912 : !> this routine requires that libint has been static initialised somewhere else
913 : !> \param do_hfx_kpoints ...
914 : !> \param bounds_i ...
915 : !> \param bounds_j ...
916 : !> \param bounds_k ...
917 : !> \param RI_range ...
918 : !> \param img_to_RI_cell ...
919 : ! **************************************************************************************************
920 562 : SUBROUTINE build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, &
921 562 : nl_3c, basis_i, basis_j, basis_k, &
922 : potential_parameter, der_eps, &
923 : op_pos, do_kpoints, do_hfx_kpoints, &
924 : bounds_i, bounds_j, bounds_k, &
925 562 : RI_range, img_to_RI_cell)
926 :
927 : TYPE(dbt_type), DIMENSION(:, :, :), INTENT(INOUT) :: t3c_der_i, t3c_der_k
928 : REAL(KIND=dp), INTENT(IN) :: filter_eps
929 : TYPE(qs_environment_type), POINTER :: qs_env
930 : TYPE(neighbor_list_3c_type), INTENT(INOUT) :: nl_3c
931 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j, basis_k
932 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
933 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: der_eps
934 : INTEGER, INTENT(IN), OPTIONAL :: op_pos
935 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints, do_hfx_kpoints
936 : INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL :: bounds_i, bounds_j, bounds_k
937 : REAL(dp), INTENT(IN), OPTIONAL :: RI_range
938 : INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: img_to_RI_cell
939 :
940 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_3c_derivatives'
941 :
942 : INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
943 : block_start_k, egfi, handle, handle2, i, i_img, i_xyz, iatom, ibasis, ikind, ilist, imax, &
944 : iset, j_img, jatom, jcell, jkind, jset, katom, kcell, kkind, kset, m_max, max_ncoj, &
945 : max_nset, max_nsgfi, maxli, maxlj, maxlk, mepos, natom, nbasis, ncell_RI, ncoi, ncoj, &
946 : ncok, nimg, nseti, nsetj, nsetk, nthread, op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, &
947 : unit_id
948 562 : INTEGER, ALLOCATABLE, DIMENSION(:) :: img_to_RI_cell_prv
949 : INTEGER, DIMENSION(2) :: bo
950 : INTEGER, DIMENSION(3) :: blk_idx, blk_size, cell_j, cell_k, &
951 : kp_index_lbounds, kp_index_ubounds, sp
952 562 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
953 1124 : lmin_k, npgfi, npgfj, npgfk, nsgfi, &
954 562 : nsgfj, nsgfk
955 562 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j, first_sgf_k
956 562 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
957 : LOGICAL :: do_hfx_kpoints_prv, do_kpoints_prv, &
958 : found, skip
959 : LOGICAL, DIMENSION(3) :: block_j_not_zero, block_k_not_zero, &
960 : der_j_zero, der_k_zero
961 : REAL(dp), DIMENSION(3) :: der_ext_i, der_ext_j, der_ext_k
962 : REAL(KIND=dp) :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
963 : kind_radius_i, kind_radius_j, &
964 : kind_radius_k, prefac
965 562 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ccp_buffer, cpp_buffer, &
966 562 : max_contraction_i, max_contraction_j, &
967 562 : max_contraction_k
968 562 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: dijk_contr, dummy_block_t
969 562 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: block_t_i, block_t_j, block_t_k, dijk_j, &
970 1124 : dijk_k, tmp_ijk_i, tmp_ijk_j
971 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rik, rj, rjk, rk
972 562 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j, set_radius_k
973 562 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
974 1124 : sphi_k, zeti, zetj, zetk
975 562 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
976 1124 : TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER :: spi, spk, tspj
977 : TYPE(cp_libint_t) :: lib
978 3934 : TYPE(dbt_type) :: t3c_tmp
979 562 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t3c_template
980 562 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t3c_der_j
981 : TYPE(dft_control_type), POINTER :: dft_control
982 : TYPE(gto_basis_set_type), POINTER :: basis_set
983 : TYPE(kpoint_type), POINTER :: kpoints
984 : TYPE(mp_para_env_type), POINTER :: para_env
985 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
986 562 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
987 :
988 562 : CALL timeset(routineN, handle)
989 :
990 562 : IF (PRESENT(do_kpoints)) THEN
991 74 : do_kpoints_prv = do_kpoints
992 : ELSE
993 : do_kpoints_prv = .FALSE.
994 : END IF
995 :
996 562 : IF (PRESENT(do_hfx_kpoints)) THEN
997 74 : do_hfx_kpoints_prv = do_hfx_kpoints
998 : ELSE
999 : do_hfx_kpoints_prv = .FALSE.
1000 : END IF
1001 74 : IF (do_hfx_kpoints_prv) THEN
1002 74 : CPASSERT(do_kpoints_prv)
1003 74 : CPASSERT(PRESENT(RI_range))
1004 74 : CPASSERT(PRESENT(img_to_RI_cell))
1005 : END IF
1006 :
1007 488 : IF (PRESENT(img_to_RI_cell)) THEN
1008 222 : ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
1009 3586 : img_to_RI_cell_prv(:) = img_to_RI_cell
1010 : END IF
1011 :
1012 562 : op_ij = do_potential_id; op_jk = do_potential_id
1013 :
1014 562 : IF (PRESENT(op_pos)) THEN
1015 562 : op_pos_prv = op_pos
1016 : ELSE
1017 0 : op_pos_prv = 1
1018 : END IF
1019 :
1020 488 : SELECT CASE (op_pos_prv)
1021 : CASE (1)
1022 488 : op_ij = potential_parameter%potential_type
1023 : CASE (2)
1024 562 : op_jk = potential_parameter%potential_type
1025 : END SELECT
1026 :
1027 562 : dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
1028 :
1029 562 : IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
1030 50 : dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
1031 50 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
1032 512 : ELSEIF (op_ij == do_potential_coulomb) THEN
1033 244 : dr_ij = 1000000.0_dp
1034 244 : dr_ik = 1000000.0_dp
1035 : END IF
1036 :
1037 562 : IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
1038 8 : dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
1039 8 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
1040 554 : ELSEIF (op_jk == do_potential_coulomb) THEN
1041 0 : dr_jk = 1000000.0_dp
1042 0 : dr_ik = 1000000.0_dp
1043 : END IF
1044 :
1045 562 : NULLIFY (qs_kind_set, atomic_kind_set)
1046 :
1047 : ! get stuff
1048 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
1049 562 : natom=natom, kpoints=kpoints, dft_control=dft_control, para_env=para_env)
1050 :
1051 562 : IF (do_kpoints_prv) THEN
1052 74 : nimg = dft_control%nimages
1053 74 : ncell_RI = nimg
1054 74 : IF (do_hfx_kpoints_prv) THEN
1055 74 : nimg = SIZE(t3c_der_k, 1)
1056 74 : ncell_RI = SIZE(t3c_der_k, 2)
1057 : END IF
1058 74 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
1059 : ELSE
1060 : nimg = 1
1061 : ncell_RI = 1
1062 : END IF
1063 :
1064 562 : IF (do_hfx_kpoints_prv) THEN
1065 74 : CPASSERT(op_pos_prv == 2)
1066 : ELSE
1067 1952 : CPASSERT(ALL(SHAPE(t3c_der_k) == [nimg, ncell_RI, 3]))
1068 : END IF
1069 :
1070 8590 : ALLOCATE (t3c_template(nimg, ncell_RI))
1071 1124 : DO j_img = 1, ncell_RI
1072 3532 : DO i_img = 1, nimg
1073 2970 : CALL dbt_create(t3c_der_i(i_img, j_img, 1), t3c_template(i_img, j_img))
1074 : END DO
1075 : END DO
1076 :
1077 : CALL alloc_block_3c(t3c_template, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, &
1078 : op_pos=op_pos_prv, do_kpoints=do_kpoints, do_hfx_kpoints=do_hfx_kpoints, &
1079 : bounds_i=bounds_i, bounds_j=bounds_j, bounds_k=bounds_k, &
1080 1050 : RI_range=RI_range, img_to_RI_cell=img_to_RI_cell, cell_to_index=cell_to_index)
1081 2248 : DO i_xyz = 1, 3
1082 3934 : DO j_img = 1, ncell_RI
1083 10596 : DO i_img = 1, nimg
1084 7224 : CALL dbt_copy(t3c_template(i_img, j_img), t3c_der_i(i_img, j_img, i_xyz))
1085 8910 : CALL dbt_copy(t3c_template(i_img, j_img), t3c_der_k(i_img, j_img, i_xyz))
1086 : END DO
1087 : END DO
1088 : END DO
1089 :
1090 1124 : DO j_img = 1, ncell_RI
1091 3532 : DO i_img = 1, nimg
1092 2970 : CALL dbt_destroy(t3c_template(i_img, j_img))
1093 : END DO
1094 : END DO
1095 2970 : DEALLOCATE (t3c_template)
1096 :
1097 562 : IF (nl_3c%sym == symmetric_jk) THEN
1098 9760 : ALLOCATE (t3c_der_j(nimg, ncell_RI, 3))
1099 1952 : DO i_xyz = 1, 3
1100 3416 : DO j_img = 1, ncell_RI
1101 4392 : DO i_img = 1, nimg
1102 1464 : CALL dbt_create(t3c_der_k(i_img, j_img, i_xyz), t3c_der_j(i_img, j_img, i_xyz))
1103 2928 : CALL dbt_copy(t3c_der_k(i_img, j_img, i_xyz), t3c_der_j(i_img, j_img, i_xyz))
1104 : END DO
1105 : END DO
1106 : END DO
1107 : END IF
1108 :
1109 : !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
1110 562 : nbasis = SIZE(basis_i)
1111 562 : max_nsgfi = 0
1112 562 : max_nset = 0
1113 562 : maxli = 0
1114 1614 : DO ibasis = 1, nbasis
1115 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
1116 1052 : nset=iset, nsgf_set=nsgfi, npgf=npgfi)
1117 1052 : maxli = MAX(maxli, imax)
1118 1052 : max_nset = MAX(max_nset, iset)
1119 5740 : max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
1120 : END DO
1121 : max_ncoj = 0
1122 : maxlj = 0
1123 1614 : DO ibasis = 1, nbasis
1124 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
1125 1052 : nset=jset, nsgf_set=nsgfj, npgf=npgfj)
1126 1052 : maxlj = MAX(maxlj, imax)
1127 1052 : max_nset = MAX(max_nset, jset)
1128 4432 : max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
1129 : END DO
1130 : maxlk = 0
1131 1614 : DO ibasis = 1, nbasis
1132 : CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
1133 1052 : nset=kset, nsgf_set=nsgfk, npgf=npgfk)
1134 1052 : maxlk = MAX(maxlk, imax)
1135 1614 : max_nset = MAX(max_nset, kset)
1136 : END DO
1137 562 : m_max = maxli + maxlj + maxlk + 1
1138 :
1139 : !To minimize expensive memory ops and generally optimize contraction, pre-allocate
1140 : !contiguous sphi arrays (and transposed in the case of sphi_i)
1141 :
1142 562 : NULLIFY (tspj, spi, spk)
1143 26396 : ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
1144 :
1145 1614 : DO ibasis = 1, nbasis
1146 7862 : DO iset = 1, max_nset
1147 6248 : NULLIFY (spi(iset, ibasis)%array)
1148 6248 : NULLIFY (tspj(iset, ibasis)%array)
1149 7300 : NULLIFY (spk(iset, ibasis)%array)
1150 : END DO
1151 : END DO
1152 :
1153 2248 : DO ilist = 1, 3
1154 5404 : DO ibasis = 1, nbasis
1155 3156 : IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
1156 3156 : IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
1157 3156 : IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
1158 :
1159 14404 : DO iset = 1, basis_set%nset
1160 :
1161 9562 : ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
1162 9562 : sgfi = basis_set%first_sgf(1, iset)
1163 9562 : egfi = sgfi + basis_set%nsgf_set(iset) - 1
1164 :
1165 12718 : IF (ilist == 1) THEN
1166 16504 : ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
1167 3073846 : spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
1168 :
1169 5436 : ELSE IF (ilist == 2) THEN
1170 11272 : ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
1171 176530 : tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
1172 :
1173 : ELSE
1174 10472 : ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
1175 833426 : spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
1176 : END IF
1177 :
1178 : END DO !iset
1179 : END DO !ibasis
1180 : END DO !ilist
1181 :
1182 : !Init the truncated Coulomb operator
1183 562 : IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
1184 :
1185 58 : IF (m_max > get_lmax_init()) THEN
1186 8 : IF (para_env%mepos == 0) THEN
1187 4 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
1188 : END IF
1189 8 : CALL init(m_max, unit_id, para_env%mepos, para_env)
1190 8 : IF (para_env%mepos == 0) THEN
1191 4 : CALL close_file(unit_id)
1192 : END IF
1193 : END IF
1194 : END IF
1195 :
1196 562 : CALL init_md_ftable(nmax=m_max)
1197 :
1198 562 : IF (do_kpoints_prv) THEN
1199 296 : kp_index_lbounds = LBOUND(cell_to_index)
1200 296 : kp_index_ubounds = UBOUND(cell_to_index)
1201 : END IF
1202 :
1203 562 : nthread = 1
1204 562 : !$ nthread = omp_get_max_threads()
1205 :
1206 : !$OMP PARALLEL DEFAULT(NONE) &
1207 : !$OMP SHARED (nthread,do_kpoints_prv,kp_index_lbounds,kp_index_ubounds,maxli,maxlk,maxlj,bounds_i,&
1208 : !$OMP bounds_j,bounds_k,nimg,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,ncoset,op_pos_prv,&
1209 : !$OMP potential_parameter,der_eps,tspj,spi,spk,cell_to_index,RI_range,natom,nl_3c,&
1210 : !$OMP t3c_der_i,t3c_der_k,t3c_der_j,ncell_RI,img_to_RI_cell_prv,do_hfx_kpoints_prv) &
1211 : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,cell_j,cell_k,&
1212 : !$OMP prefac,jcell,kcell,first_sgf_i,lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,&
1213 : !$OMP sphi_i,zeti,kind_radius_i,first_sgf_j,lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,&
1214 : !$OMP set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,lmax_k,lmin_k,npgfk,nsetk,nsgfk,&
1215 : !$OMP rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,ncoi,ncoj,ncok,sgfi,sgfj,&
1216 : !$OMP sgfk,dijk_j,dijk_k,ri,rj,rk,max_contraction_i,max_contraction_j,blk_idx,&
1217 : !$OMP max_contraction_k,iset,jset,kset,block_t_i,blk_size,dijk_contr,cpp_buffer,ccp_buffer,&
1218 : !$OMP block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,found,&
1219 : !$OMP dummy_block_t,sp,handle2,mepos,bo,block_t_k,der_ext_i,der_ext_j,der_ext_k,tmp_ijk_i,&
1220 562 : !$OMP block_k_not_zero,der_k_zero,skip,der_j_zero,block_t_j,block_j_not_zero,tmp_ijk_j,i)
1221 :
1222 : mepos = 0
1223 : !$ mepos = omp_get_thread_num()
1224 :
1225 : CALL cp_libint_init_3eri1(lib, MAX(maxli, maxlj, maxlk))
1226 : CALL cp_libint_set_contrdepth(lib, 1)
1227 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
1228 :
1229 : !We split the provided bounds among the threads such that each threads works on a different set of atoms
1230 : IF (PRESENT(bounds_i)) THEN
1231 : bo = get_limit(bounds_i(2) - bounds_i(1) + 1, nthread, mepos)
1232 : bo(:) = bo(:) + bounds_i(1) - 1
1233 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
1234 : ELSE IF (PRESENT(bounds_j)) THEN
1235 : bo = get_limit(bounds_j(2) - bounds_j(1) + 1, nthread, mepos)
1236 : bo(:) = bo(:) + bounds_j(1) - 1
1237 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bo, bounds_k)
1238 : ELSE IF (PRESENT(bounds_k)) THEN
1239 : bo = get_limit(bounds_k(2) - bounds_k(1) + 1, nthread, mepos)
1240 : bo(:) = bo(:) + bounds_k(1) - 1
1241 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bo)
1242 : ELSE
1243 : bo = get_limit(natom, nthread, mepos)
1244 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
1245 : END IF
1246 :
1247 : skip = .FALSE.
1248 : IF (bo(1) > bo(2)) skip = .TRUE.
1249 :
1250 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
1251 : CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
1252 : iatom=iatom, jatom=jatom, katom=katom, &
1253 : rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
1254 : IF (skip) EXIT
1255 :
1256 : djk = NORM2(rjk)
1257 : dij = NORM2(rij)
1258 : dik = NORM2(rik)
1259 :
1260 : IF (do_kpoints_prv) THEN
1261 : prefac = 0.5_dp
1262 : ELSEIF (nl_3c%sym == symmetric_jk) THEN
1263 : IF (jatom == katom) THEN
1264 : prefac = 0.5_dp
1265 : ELSE
1266 : prefac = 1.0_dp
1267 : END IF
1268 : ELSE
1269 : prefac = 1.0_dp
1270 : END IF
1271 : IF (do_hfx_kpoints_prv) prefac = 1.0_dp
1272 :
1273 : IF (do_kpoints_prv) THEN
1274 :
1275 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
1276 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
1277 :
1278 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1279 : IF (jcell > nimg .OR. jcell < 1) CYCLE
1280 :
1281 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
1282 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
1283 :
1284 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
1285 : IF (kcell > nimg .OR. kcell < 1) CYCLE
1286 : !In case of HFX k-points, we only consider P^k if d_ik <= RI_range
1287 : IF (do_hfx_kpoints_prv) THEN
1288 : IF (dik > RI_range) CYCLE
1289 : kcell = img_to_RI_cell_prv(kcell)
1290 : END IF
1291 : ELSE
1292 : jcell = 1; kcell = 1
1293 : END IF
1294 :
1295 : blk_idx = [iatom, jatom, katom]
1296 : IF (do_hfx_kpoints_prv) THEN
1297 : blk_idx(3) = (kcell - 1)*natom + katom
1298 : kcell = 1
1299 : END IF
1300 :
1301 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
1302 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
1303 : sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
1304 :
1305 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
1306 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
1307 : sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
1308 :
1309 : CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
1310 : npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
1311 : sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
1312 :
1313 : IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
1314 : IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
1315 : IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
1316 :
1317 : ALLOCATE (max_contraction_i(nseti))
1318 : max_contraction_i = 0.0_dp
1319 : DO iset = 1, nseti
1320 : sgfi = first_sgf_i(1, iset)
1321 : max_contraction_i(iset) = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
1322 : END DO
1323 :
1324 : ALLOCATE (max_contraction_j(nsetj))
1325 : max_contraction_j = 0.0_dp
1326 : DO jset = 1, nsetj
1327 : sgfj = first_sgf_j(1, jset)
1328 : max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
1329 : END DO
1330 :
1331 : ALLOCATE (max_contraction_k(nsetk))
1332 : max_contraction_k = 0.0_dp
1333 : DO kset = 1, nsetk
1334 : sgfk = first_sgf_k(1, kset)
1335 : max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
1336 : END DO
1337 :
1338 : CALL dbt_blk_sizes(t3c_der_i(jcell, kcell, 1), blk_idx, blk_size)
1339 :
1340 : ALLOCATE (block_t_i(blk_size(2), blk_size(3), blk_size(1), 3))
1341 : ALLOCATE (block_t_j(blk_size(2), blk_size(3), blk_size(1), 3))
1342 : ALLOCATE (block_t_k(blk_size(2), blk_size(3), blk_size(1), 3))
1343 :
1344 : block_t_i = 0.0_dp
1345 : block_t_j = 0.0_dp
1346 : block_t_k = 0.0_dp
1347 : block_j_not_zero = .FALSE.
1348 : block_k_not_zero = .FALSE.
1349 :
1350 : DO iset = 1, nseti
1351 :
1352 : DO jset = 1, nsetj
1353 :
1354 : IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
1355 :
1356 : DO kset = 1, nsetk
1357 :
1358 : IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
1359 : IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
1360 :
1361 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
1362 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
1363 : ncok = npgfk(kset)*ncoset(lmax_k(kset))
1364 :
1365 : sgfi = first_sgf_i(1, iset)
1366 : sgfj = first_sgf_j(1, jset)
1367 : sgfk = first_sgf_k(1, kset)
1368 :
1369 : IF (ncoj*ncok*ncoi > 0) THEN
1370 : ALLOCATE (dijk_j(ncoj, ncok, ncoi, 3))
1371 : ALLOCATE (dijk_k(ncoj, ncok, ncoi, 3))
1372 : dijk_j(:, :, :, :) = 0.0_dp
1373 : dijk_k(:, :, :, :) = 0.0_dp
1374 :
1375 : der_j_zero = .FALSE.
1376 : der_k_zero = .FALSE.
1377 :
1378 : !need positions for libint. Only relative positions are needed => set ri to 0.0
1379 : ri = 0.0_dp
1380 : rj = rij ! ri + rij
1381 : rk = rik ! ri + rik
1382 :
1383 : IF (op_pos_prv == 1) THEN
1384 : CALL eri_3center_derivs(dijk_j, dijk_k, &
1385 : lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
1386 : lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
1387 : lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
1388 : djk, dij, dik, lib, potential_parameter, &
1389 : der_abc_1_ext=der_ext_j, der_abc_2_ext=der_ext_k)
1390 : ELSE
1391 : ALLOCATE (tmp_ijk_i(ncoi, ncoj, ncok, 3))
1392 : ALLOCATE (tmp_ijk_j(ncoi, ncoj, ncok, 3))
1393 : tmp_ijk_i(:, :, :, :) = 0.0_dp
1394 : tmp_ijk_j(:, :, :, :) = 0.0_dp
1395 :
1396 : CALL eri_3center_derivs(tmp_ijk_i, tmp_ijk_j, &
1397 : lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
1398 : lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
1399 : lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
1400 : dij, dik, djk, lib, potential_parameter, &
1401 : der_abc_1_ext=der_ext_i, der_abc_2_ext=der_ext_j)
1402 :
1403 : !TODO: is that inefficient?
1404 : der_ext_k = 0
1405 : DO i_xyz = 1, 3
1406 : DO i = 1, ncoi
1407 : dijk_j(:, :, i, i_xyz) = tmp_ijk_j(i, :, :, i_xyz)
1408 : dijk_k(:, :, i, i_xyz) = -(dijk_j(:, :, i, i_xyz) + tmp_ijk_i(i, :, :, i_xyz))
1409 : der_ext_k(i_xyz) = MAX(der_ext_k(i_xyz), MAXVAL(ABS(dijk_k(:, :, i, i_xyz))))
1410 : END DO
1411 : END DO
1412 : DEALLOCATE (tmp_ijk_i, tmp_ijk_j)
1413 :
1414 : END IF
1415 :
1416 : IF (PRESENT(der_eps)) THEN
1417 : DO i_xyz = 1, 3
1418 : IF (der_eps > der_ext_j(i_xyz)*(max_contraction_i(iset)* &
1419 : max_contraction_j(jset)* &
1420 : max_contraction_k(kset))) THEN
1421 : der_j_zero(i_xyz) = .TRUE.
1422 : END IF
1423 : END DO
1424 :
1425 : DO i_xyz = 1, 3
1426 : IF (der_eps > der_ext_k(i_xyz)*(max_contraction_i(iset)* &
1427 : max_contraction_j(jset)* &
1428 : max_contraction_k(kset))) THEN
1429 : der_k_zero(i_xyz) = .TRUE.
1430 : END IF
1431 : END DO
1432 : IF (ALL(der_j_zero) .AND. ALL(der_k_zero)) THEN
1433 : DEALLOCATE (dijk_j, dijk_k)
1434 : CYCLE
1435 : END IF
1436 : END IF
1437 :
1438 : ALLOCATE (dijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
1439 :
1440 : block_start_j = sgfj
1441 : block_end_j = sgfj + nsgfj(jset) - 1
1442 : block_start_k = sgfk
1443 : block_end_k = sgfk + nsgfk(kset) - 1
1444 : block_start_i = sgfi
1445 : block_end_i = sgfi + nsgfi(iset) - 1
1446 :
1447 : DO i_xyz = 1, 3
1448 : IF (der_j_zero(i_xyz)) CYCLE
1449 :
1450 : block_j_not_zero(i_xyz) = .TRUE.
1451 : CALL abc_contract_xsmm(dijk_contr, dijk_j(:, :, :, i_xyz), tspj(jset, jkind)%array, &
1452 : spk(kset, kkind)%array, spi(iset, ikind)%array, &
1453 : ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
1454 : nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
1455 :
1456 : block_t_j(block_start_j:block_end_j, &
1457 : block_start_k:block_end_k, &
1458 : block_start_i:block_end_i, i_xyz) = &
1459 : block_t_j(block_start_j:block_end_j, &
1460 : block_start_k:block_end_k, &
1461 : block_start_i:block_end_i, i_xyz) + &
1462 : dijk_contr(:, :, :)
1463 :
1464 : END DO
1465 :
1466 : DO i_xyz = 1, 3
1467 : IF (der_k_zero(i_xyz)) CYCLE
1468 :
1469 : block_k_not_zero(i_xyz) = .TRUE.
1470 : CALL abc_contract_xsmm(dijk_contr, dijk_k(:, :, :, i_xyz), tspj(jset, jkind)%array, &
1471 : spk(kset, kkind)%array, spi(iset, ikind)%array, &
1472 : ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
1473 : nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
1474 :
1475 : block_t_k(block_start_j:block_end_j, &
1476 : block_start_k:block_end_k, &
1477 : block_start_i:block_end_i, i_xyz) = &
1478 : block_t_k(block_start_j:block_end_j, &
1479 : block_start_k:block_end_k, &
1480 : block_start_i:block_end_i, i_xyz) + &
1481 : dijk_contr(:, :, :)
1482 :
1483 : END DO
1484 :
1485 : DEALLOCATE (dijk_j, dijk_k, dijk_contr)
1486 : END IF ! number of triples > 0
1487 : END DO
1488 : END DO
1489 : END DO
1490 :
1491 : CALL timeset(routineN//"_put_dbcsr", handle2)
1492 : !$OMP CRITICAL
1493 : sp = SHAPE(block_t_i(:, :, :, 1))
1494 : sp([2, 3, 1]) = sp
1495 :
1496 : DO i_xyz = 1, 3
1497 : !Derivatives wrt to center i can be obtained by translational invariance
1498 : IF ((.NOT. block_j_not_zero(i_xyz)) .AND. (.NOT. block_k_not_zero(i_xyz))) CYCLE
1499 : block_t_i(:, :, :, i_xyz) = -(block_t_j(:, :, :, i_xyz) + block_t_k(:, :, :, i_xyz))
1500 :
1501 : CALL dbt_put_block(t3c_der_i(jcell, kcell, i_xyz), blk_idx, sp, &
1502 : RESHAPE(block_t_i(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
1503 : summation=.TRUE.)
1504 : END DO
1505 :
1506 : sp = SHAPE(block_t_k(:, :, :, 1))
1507 : sp([2, 3, 1]) = sp
1508 :
1509 : DO i_xyz = 1, 3
1510 : IF (.NOT. block_k_not_zero(i_xyz)) CYCLE
1511 : CALL dbt_put_block(t3c_der_k(jcell, kcell, i_xyz), blk_idx, sp, &
1512 : RESHAPE(block_t_k(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
1513 : summation=.TRUE.)
1514 : END DO
1515 : !$OMP END CRITICAL
1516 :
1517 : IF (nl_3c%sym == symmetric_jk) THEN
1518 : sp = SHAPE(block_t_j(:, :, :, 1))
1519 : sp([2, 3, 1]) = sp
1520 : !$OMP CRITICAL
1521 : DO i_xyz = 1, 3
1522 : IF (.NOT. block_j_not_zero(i_xyz)) CYCLE
1523 : CALL dbt_put_block(t3c_der_j(jcell, kcell, i_xyz), blk_idx, sp, &
1524 : RESHAPE(block_t_j(:, :, :, i_xyz), SHAPE=sp, ORDER=[2, 3, 1]), &
1525 : summation=.TRUE.)
1526 : END DO
1527 : !$OMP END CRITICAL
1528 : END IF
1529 :
1530 : CALL timestop(handle2)
1531 :
1532 : DEALLOCATE (block_t_i, block_t_j, block_t_k)
1533 : DEALLOCATE (max_contraction_i, max_contraction_j, max_contraction_k)
1534 : END DO
1535 :
1536 : IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
1537 : IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
1538 :
1539 : CALL cp_libint_cleanup_3eri1(lib)
1540 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
1541 : !$OMP END PARALLEL
1542 :
1543 562 : IF (do_kpoints_prv .AND. .NOT. do_hfx_kpoints_prv) THEN
1544 0 : DO i_xyz = 1, 3
1545 0 : DO kcell = 1, nimg
1546 0 : DO jcell = 1, nimg
1547 : ! need half of filter eps because afterwards we add transposed tensor
1548 0 : CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps/2)
1549 0 : CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps/2)
1550 : END DO
1551 : END DO
1552 : END DO
1553 :
1554 562 : ELSEIF (nl_3c%sym == symmetric_jk) THEN
1555 : !Add the transpose of t3c_der_j to t3c_der_k to get the fully populated tensor
1556 488 : CALL dbt_create(t3c_der_k(1, 1, 1), t3c_tmp)
1557 1952 : DO i_xyz = 1, 3
1558 3416 : DO kcell = 1, nimg
1559 4392 : DO jcell = 1, nimg
1560 : CALL dbt_copy(t3c_der_j(jcell, kcell, i_xyz), t3c_der_k(jcell, kcell, i_xyz), &
1561 1464 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
1562 1464 : CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps)
1563 :
1564 1464 : CALL dbt_copy(t3c_der_i(jcell, kcell, i_xyz), t3c_tmp)
1565 : CALL dbt_copy(t3c_tmp, t3c_der_i(jcell, kcell, i_xyz), &
1566 1464 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
1567 2928 : CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps)
1568 : END DO
1569 : END DO
1570 : END DO
1571 488 : CALL dbt_destroy(t3c_tmp)
1572 :
1573 74 : ELSEIF (nl_3c%sym == symmetric_none) THEN
1574 296 : DO i_xyz = 1, 3
1575 518 : DO kcell = 1, ncell_RI
1576 6204 : DO jcell = 1, nimg
1577 5760 : CALL dbt_filter(t3c_der_i(jcell, kcell, i_xyz), filter_eps)
1578 5982 : CALL dbt_filter(t3c_der_k(jcell, kcell, i_xyz), filter_eps)
1579 : END DO
1580 : END DO
1581 : END DO
1582 : ELSE
1583 0 : CPABORT("requested symmetric case not implemented")
1584 : END IF
1585 :
1586 562 : IF (nl_3c%sym == symmetric_jk) THEN
1587 1952 : DO i_xyz = 1, 3
1588 3416 : DO j_img = 1, nimg
1589 4392 : DO i_img = 1, nimg
1590 2928 : CALL dbt_destroy(t3c_der_j(i_img, j_img, i_xyz))
1591 : END DO
1592 : END DO
1593 : END DO
1594 : END IF
1595 :
1596 3846 : DO iset = 1, max_nset
1597 10094 : DO ibasis = 1, nbasis
1598 6248 : IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
1599 6248 : IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
1600 9532 : IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
1601 : END DO
1602 : END DO
1603 :
1604 562 : DEALLOCATE (spi, tspj, spk)
1605 :
1606 562 : CALL timestop(handle)
1607 6522 : END SUBROUTINE build_3c_derivatives
1608 :
1609 : ! **************************************************************************************************
1610 : !> \brief Calculates the 3c virial contributions on the fly
1611 : !> \param work_virial ...
1612 : !> \param t3c_trace the tensor with which the trace should be taken
1613 : !> \param pref ...
1614 : !> \param qs_env ...
1615 : !> \param nl_3c 3-center neighborlist, with a distribution matching that of t3c_trace
1616 : !> \param basis_i ...
1617 : !> \param basis_j ...
1618 : !> \param basis_k ...
1619 : !> \param potential_parameter ...
1620 : !> \param der_eps neglect integrals smaller than der_eps
1621 : !> \param op_pos operator position.
1622 : !> 1: calculate (i|jk) integrals,
1623 : !> 2: calculate (ij|k) integrals
1624 : !> this routine requires that libint has been static initialised somewhere else
1625 : ! **************************************************************************************************
1626 16 : SUBROUTINE calc_3c_virial(work_virial, t3c_trace, pref, qs_env, &
1627 16 : nl_3c, basis_i, basis_j, basis_k, &
1628 : potential_parameter, der_eps, op_pos)
1629 :
1630 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1631 : TYPE(dbt_type), INTENT(INOUT) :: t3c_trace
1632 : REAL(KIND=dp), INTENT(IN) :: pref
1633 : TYPE(qs_environment_type), POINTER :: qs_env
1634 : TYPE(neighbor_list_3c_type), INTENT(INOUT) :: nl_3c
1635 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j, basis_k
1636 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
1637 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: der_eps
1638 : INTEGER, INTENT(IN), OPTIONAL :: op_pos
1639 :
1640 : CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_3c_virial'
1641 :
1642 : INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
1643 : block_start_k, egfi, handle, i, i_xyz, iatom, ibasis, ikind, ilist, imax, iset, j_xyz, &
1644 : jatom, jkind, jset, katom, kkind, kset, m_max, max_ncoj, max_nset, max_nsgfi, maxli, &
1645 : maxlj, maxlk, mepos, natom, nbasis, ncoi, ncoj, ncok, nseti, nsetj, nsetk, nthread, &
1646 : op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, unit_id
1647 : INTEGER, DIMENSION(2) :: bo
1648 : INTEGER, DIMENSION(3) :: blk_size, sp
1649 16 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
1650 32 : lmin_k, npgfi, npgfj, npgfk, nsgfi, &
1651 16 : nsgfj, nsgfk
1652 16 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j, first_sgf_k
1653 : LOGICAL :: found, skip
1654 : LOGICAL, DIMENSION(3) :: block_j_not_zero, block_k_not_zero, &
1655 : der_j_zero, der_k_zero
1656 : REAL(dp) :: force
1657 : REAL(dp), DIMENSION(3) :: der_ext_i, der_ext_j, der_ext_k
1658 : REAL(KIND=dp) :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
1659 : kind_radius_i, kind_radius_j, &
1660 : kind_radius_k
1661 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ccp_buffer, cpp_buffer, &
1662 16 : max_contraction_i, max_contraction_j, &
1663 16 : max_contraction_k
1664 32 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ablock, dijk_contr, tmp_block
1665 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: block_t_i, block_t_j, block_t_k, dijk_j, &
1666 16 : dijk_k
1667 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rik, rj, rjk, rk, scoord
1668 16 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j, set_radius_k
1669 16 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
1670 16 : sphi_k, zeti, zetj, zetk
1671 16 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1672 : TYPE(cell_type), POINTER :: cell
1673 16 : TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER :: spi, spk, tspj
1674 : TYPE(cp_libint_t) :: lib
1675 : TYPE(dft_control_type), POINTER :: dft_control
1676 : TYPE(gto_basis_set_type), POINTER :: basis_set
1677 : TYPE(mp_para_env_type), POINTER :: para_env
1678 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
1679 16 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1680 16 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1681 :
1682 16 : CALL timeset(routineN, handle)
1683 :
1684 16 : op_ij = do_potential_id; op_jk = do_potential_id
1685 :
1686 16 : IF (PRESENT(op_pos)) THEN
1687 16 : op_pos_prv = op_pos
1688 : ELSE
1689 : op_pos_prv = 1
1690 : END IF
1691 16 : CPASSERT(op_pos == 1)
1692 16 : CPASSERT(.NOT. nl_3c%sym == symmetric_jk)
1693 :
1694 16 : SELECT CASE (op_pos_prv)
1695 : CASE (1)
1696 16 : op_ij = potential_parameter%potential_type
1697 : CASE (2)
1698 16 : op_jk = potential_parameter%potential_type
1699 : END SELECT
1700 :
1701 16 : dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
1702 :
1703 16 : IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
1704 6 : dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
1705 6 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
1706 10 : ELSEIF (op_ij == do_potential_coulomb) THEN
1707 0 : dr_ij = 1000000.0_dp
1708 0 : dr_ik = 1000000.0_dp
1709 : END IF
1710 :
1711 16 : IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
1712 0 : dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
1713 0 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
1714 16 : ELSEIF (op_jk == do_potential_coulomb) THEN
1715 0 : dr_jk = 1000000.0_dp
1716 0 : dr_ik = 1000000.0_dp
1717 : END IF
1718 :
1719 16 : NULLIFY (qs_kind_set, atomic_kind_set)
1720 :
1721 : ! get stuff
1722 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
1723 : natom=natom, dft_control=dft_control, para_env=para_env, &
1724 16 : particle_set=particle_set, cell=cell)
1725 :
1726 : !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
1727 16 : nbasis = SIZE(basis_i)
1728 16 : max_nsgfi = 0
1729 16 : max_nset = 0
1730 16 : maxli = 0
1731 48 : DO ibasis = 1, nbasis
1732 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
1733 32 : nset=iset, nsgf_set=nsgfi, npgf=npgfi)
1734 32 : maxli = MAX(maxli, imax)
1735 32 : max_nset = MAX(max_nset, iset)
1736 182 : max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
1737 : END DO
1738 : max_ncoj = 0
1739 : maxlj = 0
1740 48 : DO ibasis = 1, nbasis
1741 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
1742 32 : nset=jset, nsgf_set=nsgfj, npgf=npgfj)
1743 32 : maxlj = MAX(maxlj, imax)
1744 32 : max_nset = MAX(max_nset, jset)
1745 138 : max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
1746 : END DO
1747 : maxlk = 0
1748 48 : DO ibasis = 1, nbasis
1749 : CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
1750 32 : nset=kset, nsgf_set=nsgfk, npgf=npgfk)
1751 32 : maxlk = MAX(maxlk, imax)
1752 48 : max_nset = MAX(max_nset, kset)
1753 : END DO
1754 16 : m_max = maxli + maxlj + maxlk + 1
1755 :
1756 : !To minimize expensive memory opsand generally optimize contraction, pre-allocate
1757 : !contiguous sphi arrays (and transposed in the cas of sphi_i)
1758 :
1759 16 : NULLIFY (tspj, spi, spk)
1760 920 : ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
1761 :
1762 48 : DO ibasis = 1, nbasis
1763 280 : DO iset = 1, max_nset
1764 232 : NULLIFY (spi(iset, ibasis)%array)
1765 232 : NULLIFY (tspj(iset, ibasis)%array)
1766 :
1767 264 : NULLIFY (spk(iset, ibasis)%array)
1768 : END DO
1769 : END DO
1770 :
1771 64 : DO ilist = 1, 3
1772 160 : DO ibasis = 1, nbasis
1773 96 : IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
1774 96 : IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
1775 96 : IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
1776 :
1777 458 : DO iset = 1, basis_set%nset
1778 :
1779 314 : ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
1780 314 : sgfi = basis_set%first_sgf(1, iset)
1781 314 : egfi = sgfi + basis_set%nsgf_set(iset) - 1
1782 :
1783 410 : IF (ilist == 1) THEN
1784 536 : ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
1785 177990 : spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
1786 :
1787 180 : ELSE IF (ilist == 2) THEN
1788 360 : ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
1789 5194 : tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
1790 :
1791 : ELSE
1792 360 : ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
1793 4442 : spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
1794 : END IF
1795 :
1796 : END DO !iset
1797 : END DO !ibasis
1798 : END DO !ilist
1799 :
1800 : !Init the truncated Coulomb operator
1801 16 : IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
1802 :
1803 6 : IF (m_max > get_lmax_init()) THEN
1804 0 : IF (para_env%mepos == 0) THEN
1805 0 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
1806 : END IF
1807 0 : CALL init(m_max, unit_id, para_env%mepos, para_env)
1808 0 : IF (para_env%mepos == 0) THEN
1809 0 : CALL close_file(unit_id)
1810 : END IF
1811 : END IF
1812 : END IF
1813 :
1814 16 : CALL init_md_ftable(nmax=m_max)
1815 :
1816 16 : nthread = 1
1817 16 : !$ nthread = omp_get_max_threads()
1818 :
1819 : !$OMP PARALLEL DEFAULT(NONE) &
1820 : !$OMP SHARED (nthread,maxli,maxlk,maxlj,i,work_virial,pref,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,&
1821 : !$OMP ncoset,potential_parameter,der_eps,tspj,spi,spk,natom,nl_3c,t3c_trace,cell,particle_set) &
1822 : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,i_xyz,j_xyz,first_sgf_i,&
1823 : !$OMP lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,sphi_i,zeti,kind_radius_i,first_sgf_j,&
1824 : !$OMP lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,&
1825 : !$OMP lmax_k,lmin_k,npgfk,nsetk,nsgfk,rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,&
1826 : !$OMP ncoi,ncoj,ncok,sgfi,sgfj,sgfk,dijk_j,dijk_k,ri,rj,rk,max_contraction_i,max_contraction_j,&
1827 : !$OMP tmp_block,max_contraction_k,iset,jset,kset,block_t_i,blk_size,dijk_contr,found,sp,mepos,&
1828 : !$OMP block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,block_t_k,&
1829 : !$OMP bo,der_ext_i,der_ext_j,der_ext_k,ablock,force,scoord,skip,cpp_buffer,ccp_buffer,&
1830 16 : !$OMP block_k_not_zero,der_k_zero,der_j_zero,block_t_j,block_j_not_zero)
1831 :
1832 : mepos = 0
1833 : !$ mepos = omp_get_thread_num()
1834 :
1835 : CALL cp_libint_init_3eri1(lib, MAX(maxli, maxlj, maxlk))
1836 : CALL cp_libint_set_contrdepth(lib, 1)
1837 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
1838 :
1839 : !We split the provided bounds among the threads such that each threads works on a different set of atoms
1840 :
1841 : bo = get_limit(natom, nthread, mepos)
1842 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i=bo)
1843 :
1844 : skip = .FALSE.
1845 : IF (bo(1) > bo(2)) skip = .TRUE.
1846 :
1847 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
1848 : CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
1849 : iatom=iatom, jatom=jatom, katom=katom, &
1850 : rij=rij, rjk=rjk, rik=rik)
1851 : IF (skip) EXIT
1852 :
1853 : CALL dbt_get_block(t3c_trace, [iatom, jatom, katom], tmp_block, found)
1854 : IF (.NOT. found) CYCLE
1855 :
1856 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
1857 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
1858 : sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
1859 :
1860 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
1861 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
1862 : sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
1863 :
1864 : CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
1865 : npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
1866 : sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
1867 :
1868 : djk = NORM2(rjk)
1869 : dij = NORM2(rij)
1870 : dik = NORM2(rik)
1871 :
1872 : IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
1873 : IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
1874 : IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
1875 :
1876 : ALLOCATE (max_contraction_i(nseti))
1877 : max_contraction_i = 0.0_dp
1878 : DO iset = 1, nseti
1879 : sgfi = first_sgf_i(1, iset)
1880 : max_contraction_i(iset) = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
1881 : END DO
1882 :
1883 : ALLOCATE (max_contraction_j(nsetj))
1884 : max_contraction_j = 0.0_dp
1885 : DO jset = 1, nsetj
1886 : sgfj = first_sgf_j(1, jset)
1887 : max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
1888 : END DO
1889 :
1890 : ALLOCATE (max_contraction_k(nsetk))
1891 : max_contraction_k = 0.0_dp
1892 : DO kset = 1, nsetk
1893 : sgfk = first_sgf_k(1, kset)
1894 : max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
1895 : END DO
1896 :
1897 : CALL dbt_blk_sizes(t3c_trace, [iatom, jatom, katom], blk_size)
1898 :
1899 : ALLOCATE (block_t_i(blk_size(2), blk_size(3), blk_size(1), 3))
1900 : ALLOCATE (block_t_j(blk_size(2), blk_size(3), blk_size(1), 3))
1901 : ALLOCATE (block_t_k(blk_size(2), blk_size(3), blk_size(1), 3))
1902 :
1903 : ALLOCATE (ablock(blk_size(2), blk_size(3), blk_size(1)))
1904 : DO i = 1, blk_size(1)
1905 : ablock(:, :, i) = tmp_block(i, :, :)
1906 : END DO
1907 : DEALLOCATE (tmp_block)
1908 :
1909 : block_t_i = 0.0_dp
1910 : block_t_j = 0.0_dp
1911 : block_t_k = 0.0_dp
1912 : block_j_not_zero = .FALSE.
1913 : block_k_not_zero = .FALSE.
1914 :
1915 : DO iset = 1, nseti
1916 :
1917 : DO jset = 1, nsetj
1918 :
1919 : IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
1920 :
1921 : DO kset = 1, nsetk
1922 :
1923 : IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
1924 : IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
1925 :
1926 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
1927 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
1928 : ncok = npgfk(kset)*ncoset(lmax_k(kset))
1929 :
1930 : sgfi = first_sgf_i(1, iset)
1931 : sgfj = first_sgf_j(1, jset)
1932 : sgfk = first_sgf_k(1, kset)
1933 :
1934 : IF (ncoj*ncok*ncoi > 0) THEN
1935 : ALLOCATE (dijk_j(ncoj, ncok, ncoi, 3))
1936 : ALLOCATE (dijk_k(ncoj, ncok, ncoi, 3))
1937 : dijk_j(:, :, :, :) = 0.0_dp
1938 : dijk_k(:, :, :, :) = 0.0_dp
1939 :
1940 : der_j_zero = .FALSE.
1941 : der_k_zero = .FALSE.
1942 :
1943 : !need positions for libint. Only relative positions are needed => set ri to 0.0
1944 : ri = 0.0_dp
1945 : rj = rij ! ri + rij
1946 : rk = rik ! ri + rik
1947 :
1948 : CALL eri_3center_derivs(dijk_j, dijk_k, &
1949 : lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
1950 : lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
1951 : lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
1952 : djk, dij, dik, lib, potential_parameter, &
1953 : der_abc_1_ext=der_ext_j, der_abc_2_ext=der_ext_k)
1954 :
1955 : IF (PRESENT(der_eps)) THEN
1956 : DO i_xyz = 1, 3
1957 : IF (der_eps > der_ext_j(i_xyz)*(max_contraction_i(iset)* &
1958 : max_contraction_j(jset)* &
1959 : max_contraction_k(kset))) THEN
1960 : der_j_zero(i_xyz) = .TRUE.
1961 : END IF
1962 : END DO
1963 :
1964 : DO i_xyz = 1, 3
1965 : IF (der_eps > der_ext_k(i_xyz)*(max_contraction_i(iset)* &
1966 : max_contraction_j(jset)* &
1967 : max_contraction_k(kset))) THEN
1968 : der_k_zero(i_xyz) = .TRUE.
1969 : END IF
1970 : END DO
1971 : IF (ALL(der_j_zero) .AND. ALL(der_k_zero)) THEN
1972 : DEALLOCATE (dijk_j, dijk_k)
1973 : CYCLE
1974 : END IF
1975 : END IF
1976 :
1977 : ALLOCATE (dijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
1978 :
1979 : block_start_j = sgfj
1980 : block_end_j = sgfj + nsgfj(jset) - 1
1981 : block_start_k = sgfk
1982 : block_end_k = sgfk + nsgfk(kset) - 1
1983 : block_start_i = sgfi
1984 : block_end_i = sgfi + nsgfi(iset) - 1
1985 :
1986 : DO i_xyz = 1, 3
1987 : IF (der_j_zero(i_xyz)) CYCLE
1988 :
1989 : block_j_not_zero(i_xyz) = .TRUE.
1990 : CALL abc_contract_xsmm(dijk_contr, dijk_j(:, :, :, i_xyz), tspj(jset, jkind)%array, &
1991 : spk(kset, kkind)%array, spi(iset, ikind)%array, &
1992 : ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
1993 : nsgfi(iset), cpp_buffer, ccp_buffer)
1994 :
1995 : block_t_j(block_start_j:block_end_j, &
1996 : block_start_k:block_end_k, &
1997 : block_start_i:block_end_i, i_xyz) = &
1998 : block_t_j(block_start_j:block_end_j, &
1999 : block_start_k:block_end_k, &
2000 : block_start_i:block_end_i, i_xyz) + &
2001 : dijk_contr(:, :, :)
2002 :
2003 : END DO
2004 :
2005 : DO i_xyz = 1, 3
2006 : IF (der_k_zero(i_xyz)) CYCLE
2007 :
2008 : block_k_not_zero(i_xyz) = .TRUE.
2009 : CALL abc_contract_xsmm(dijk_contr, dijk_k(:, :, :, i_xyz), tspj(jset, jkind)%array, &
2010 : spk(kset, kkind)%array, spi(iset, ikind)%array, &
2011 : ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
2012 : nsgfi(iset), cpp_buffer, ccp_buffer)
2013 :
2014 : block_t_k(block_start_j:block_end_j, &
2015 : block_start_k:block_end_k, &
2016 : block_start_i:block_end_i, i_xyz) = &
2017 : block_t_k(block_start_j:block_end_j, &
2018 : block_start_k:block_end_k, &
2019 : block_start_i:block_end_i, i_xyz) + &
2020 : dijk_contr(:, :, :)
2021 :
2022 : END DO
2023 :
2024 : DEALLOCATE (dijk_j, dijk_k, dijk_contr)
2025 : END IF ! number of triples > 0
2026 : END DO
2027 : END DO
2028 : END DO
2029 :
2030 : !We obtain the derivative wrt to first center using translational invariance
2031 : DO i_xyz = 1, 3
2032 : block_t_i(:, :, :, i_xyz) = -block_t_j(:, :, :, i_xyz) - block_t_k(:, :, :, i_xyz)
2033 : END DO
2034 :
2035 : !virial contribution coming from deriv wrt to first center
2036 : DO i_xyz = 1, 3
2037 : force = pref*SUM(ablock(:, :, :)*block_t_i(:, :, :, i_xyz))
2038 : CALL real_to_scaled(scoord, particle_set(iatom)%r, cell)
2039 : DO j_xyz = 1, 3
2040 : !$OMP ATOMIC
2041 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
2042 : END DO
2043 : END DO
2044 :
2045 : !second center
2046 : DO i_xyz = 1, 3
2047 : force = pref*SUM(ablock(:, :, :)*block_t_j(:, :, :, i_xyz))
2048 : CALL real_to_scaled(scoord, particle_set(iatom)%r + rij, cell)
2049 : DO j_xyz = 1, 3
2050 : !$OMP ATOMIC
2051 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
2052 : END DO
2053 : END DO
2054 :
2055 : !third center
2056 : DO i_xyz = 1, 3
2057 : force = pref*SUM(ablock(:, :, :)*block_t_k(:, :, :, i_xyz))
2058 : CALL real_to_scaled(scoord, particle_set(iatom)%r + rik, cell)
2059 : DO j_xyz = 1, 3
2060 : !$OMP ATOMIC
2061 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
2062 : END DO
2063 : END DO
2064 :
2065 : DEALLOCATE (block_t_i, block_t_j, block_t_k)
2066 : DEALLOCATE (max_contraction_i, max_contraction_j, max_contraction_k, ablock)
2067 : END DO
2068 :
2069 : IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
2070 : IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
2071 :
2072 : CALL cp_libint_cleanup_3eri1(lib)
2073 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
2074 : !$OMP END PARALLEL
2075 :
2076 132 : DO iset = 1, max_nset
2077 364 : DO ibasis = 1, nbasis
2078 232 : IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
2079 232 : IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
2080 348 : IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
2081 : END DO
2082 : END DO
2083 :
2084 16 : DEALLOCATE (spi, tspj, spk)
2085 :
2086 16 : CALL timestop(handle)
2087 128 : END SUBROUTINE calc_3c_virial
2088 :
2089 : ! **************************************************************************************************
2090 : !> \brief Build 3-center integral tensor
2091 : !> \param t3c empty DBCSR tensor
2092 : !> Should be of shape (1,1) if no kpoints are used and of shape (nimages, nimages)
2093 : !> if k-points are used
2094 : !> \param filter_eps Filter threshold for tensor blocks
2095 : !> \param qs_env ...
2096 : !> \param nl_3c 3-center neighborlist
2097 : !> \param basis_i ...
2098 : !> \param basis_j ...
2099 : !> \param basis_k ...
2100 : !> \param potential_parameter ...
2101 : !> \param int_eps neglect integrals smaller than int_eps
2102 : !> \param op_pos operator position.
2103 : !> 1: calculate (i|jk) integrals,
2104 : !> 2: calculate (ij|k) integrals
2105 : !> \param do_kpoints ...
2106 : !> this routine requires that libint has been static initialised somewhere else
2107 : !> \param do_hfx_kpoints ...
2108 : !> \param desymmetrize ...
2109 : !> \param cell_sym ...
2110 : !> \param bounds_i ...
2111 : !> \param bounds_j ...
2112 : !> \param bounds_k ...
2113 : !> \param RI_range ...
2114 : !> \param img_to_RI_cell ...
2115 : !> \param cell_to_index_ext ...
2116 : ! **************************************************************************************************
2117 1858 : SUBROUTINE build_3c_integrals(t3c, filter_eps, qs_env, &
2118 1858 : nl_3c, basis_i, basis_j, basis_k, &
2119 : potential_parameter, int_eps, &
2120 : op_pos, do_kpoints, do_hfx_kpoints, desymmetrize, cell_sym, &
2121 : bounds_i, bounds_j, bounds_k, &
2122 1858 : RI_range, img_to_RI_cell, cell_to_index_ext)
2123 :
2124 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t3c
2125 : REAL(KIND=dp), INTENT(IN) :: filter_eps
2126 : TYPE(qs_environment_type), POINTER :: qs_env
2127 : TYPE(neighbor_list_3c_type), INTENT(INOUT) :: nl_3c
2128 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j, basis_k
2129 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
2130 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: int_eps
2131 : INTEGER, INTENT(IN), OPTIONAL :: op_pos
2132 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints, do_hfx_kpoints, &
2133 : desymmetrize, cell_sym
2134 : INTEGER, DIMENSION(2), INTENT(IN), OPTIONAL :: bounds_i, bounds_j, bounds_k
2135 : REAL(dp), INTENT(IN), OPTIONAL :: RI_range
2136 : INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: img_to_RI_cell
2137 : INTEGER, DIMENSION(:, :, :), OPTIONAL, POINTER :: cell_to_index_ext
2138 :
2139 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_3c_integrals'
2140 :
2141 : INTEGER :: block_end_i, block_end_j, block_end_k, block_start_i, block_start_j, &
2142 : block_start_k, egfi, handle, handle2, i, iatom, ibasis, ikind, ilist, imax, iset, jatom, &
2143 : jcell, jkind, jset, katom, kcell, kkind, kset, m_max, max_ncoj, max_nset, max_nsgfi, &
2144 : maxli, maxlj, maxlk, mepos, natom, nbasis, ncell_RI, ncoi, ncoj, ncok, nimg, nseti, &
2145 : nsetj, nsetk, nthread, op_ij, op_jk, op_pos_prv, sgfi, sgfj, sgfk, unit_id
2146 1858 : INTEGER, ALLOCATABLE, DIMENSION(:) :: img_to_RI_cell_prv
2147 : INTEGER, DIMENSION(2) :: bo
2148 : INTEGER, DIMENSION(3) :: blk_idx, blk_size, cell_j, cell_k, &
2149 : kp_index_lbounds, kp_index_ubounds, sp
2150 1858 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmax_k, lmin_i, lmin_j, &
2151 3716 : lmin_k, npgfi, npgfj, npgfk, nsgfi, &
2152 1858 : nsgfj, nsgfk
2153 1858 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j, first_sgf_k
2154 1858 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2155 : LOGICAL :: block_not_zero, cell_sym_prv, debug, &
2156 : desymmetrize_prv, do_hfx_kpoints_prv, &
2157 : do_kpoints_prv, found, skip
2158 : REAL(KIND=dp) :: dij, dik, djk, dr_ij, dr_ik, dr_jk, &
2159 : kind_radius_i, kind_radius_j, &
2160 : kind_radius_k, max_contraction_i, &
2161 : prefac, sijk_ext
2162 1858 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ccp_buffer, cpp_buffer, &
2163 1858 : max_contraction_j, max_contraction_k
2164 3716 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: block_t, dummy_block_t, sijk, &
2165 3716 : sijk_contr, tmp_ijk
2166 : REAL(KIND=dp), DIMENSION(1, 1, 1) :: counter
2167 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rik, rj, rjk, rk
2168 1858 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j, set_radius_k
2169 1858 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, rpgf_k, sphi_i, sphi_j, &
2170 3716 : sphi_k, zeti, zetj, zetk
2171 1858 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2172 : TYPE(cell_type), POINTER :: cell
2173 3716 : TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER :: spi, spk, tspj
2174 : TYPE(cp_libint_t) :: lib
2175 13006 : TYPE(dbt_type) :: t_3c_tmp
2176 : TYPE(dft_control_type), POINTER :: dft_control
2177 : TYPE(gto_basis_set_type), POINTER :: basis_set
2178 : TYPE(kpoint_type), POINTER :: kpoints
2179 : TYPE(mp_para_env_type), POINTER :: para_env
2180 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
2181 1858 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2182 :
2183 1858 : CALL timeset(routineN, handle)
2184 :
2185 1858 : debug = .FALSE.
2186 :
2187 1858 : IF (PRESENT(do_kpoints)) THEN
2188 368 : do_kpoints_prv = do_kpoints
2189 : ELSE
2190 : do_kpoints_prv = .FALSE.
2191 : END IF
2192 :
2193 1858 : IF (PRESENT(do_hfx_kpoints)) THEN
2194 122 : do_hfx_kpoints_prv = do_hfx_kpoints
2195 : ELSE
2196 : do_hfx_kpoints_prv = .FALSE.
2197 : END IF
2198 122 : IF (do_hfx_kpoints_prv) THEN
2199 122 : CPASSERT(do_kpoints_prv)
2200 122 : CPASSERT(PRESENT(RI_range))
2201 122 : CPASSERT(PRESENT(img_to_RI_cell))
2202 : END IF
2203 :
2204 1736 : IF (PRESENT(img_to_RI_cell)) THEN
2205 366 : ALLOCATE (img_to_RI_cell_prv(SIZE(img_to_RI_cell)))
2206 5978 : img_to_RI_cell_prv(:) = img_to_RI_cell
2207 : END IF
2208 :
2209 1858 : IF (PRESENT(desymmetrize)) THEN
2210 1858 : desymmetrize_prv = desymmetrize
2211 : ELSE
2212 : desymmetrize_prv = .TRUE.
2213 : END IF
2214 :
2215 1858 : IF (PRESENT(cell_sym)) THEN
2216 6 : cell_sym_prv = cell_sym
2217 : ELSE
2218 1852 : cell_sym_prv = .FALSE.
2219 : END IF
2220 :
2221 1858 : op_ij = do_potential_id; op_jk = do_potential_id
2222 :
2223 1858 : IF (PRESENT(op_pos)) THEN
2224 438 : op_pos_prv = op_pos
2225 : ELSE
2226 1420 : op_pos_prv = 1
2227 : END IF
2228 :
2229 1736 : SELECT CASE (op_pos_prv)
2230 : CASE (1)
2231 1736 : op_ij = potential_parameter%potential_type
2232 : CASE (2)
2233 1858 : op_jk = potential_parameter%potential_type
2234 : END SELECT
2235 :
2236 1858 : dr_ij = 0.0_dp; dr_jk = 0.0_dp; dr_ik = 0.0_dp
2237 :
2238 1858 : IF (op_ij == do_potential_truncated .OR. op_ij == do_potential_short) THEN
2239 1348 : dr_ij = potential_parameter%cutoff_radius*cutoff_screen_factor
2240 1348 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
2241 510 : ELSEIF (op_ij == do_potential_coulomb) THEN
2242 192 : dr_ij = 1000000.0_dp
2243 192 : dr_ik = 1000000.0_dp
2244 : END IF
2245 :
2246 1858 : IF (op_jk == do_potential_truncated .OR. op_jk == do_potential_short) THEN
2247 12 : dr_jk = potential_parameter%cutoff_radius*cutoff_screen_factor
2248 12 : dr_ik = potential_parameter%cutoff_radius*cutoff_screen_factor
2249 1846 : ELSEIF (op_jk == do_potential_coulomb) THEN
2250 0 : dr_jk = 1000000.0_dp
2251 0 : dr_ik = 1000000.0_dp
2252 : END IF
2253 :
2254 1858 : NULLIFY (qs_kind_set, atomic_kind_set)
2255 :
2256 : ! get stuff
2257 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, cell=cell, &
2258 1858 : natom=natom, dft_control=dft_control, para_env=para_env)
2259 :
2260 1858 : IF (do_kpoints_prv) THEN
2261 134 : IF (PRESENT(cell_to_index_ext)) THEN
2262 6 : cell_to_index => cell_to_index_ext
2263 240 : nimg = MAXVAL(cell_to_index)
2264 : ELSE
2265 128 : CALL get_qs_env(qs_env, kpoints=kpoints)
2266 128 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
2267 128 : nimg = dft_control%nimages
2268 : END IF
2269 134 : ncell_RI = nimg
2270 134 : IF (do_hfx_kpoints_prv) THEN
2271 122 : nimg = SIZE(t3c, 1)
2272 122 : ncell_RI = SIZE(t3c, 2)
2273 : END IF
2274 : ELSE
2275 : nimg = 1
2276 : ncell_RI = 1
2277 : END IF
2278 :
2279 : CALL alloc_block_3c(t3c, nl_3c, basis_i, basis_j, basis_k, qs_env, potential_parameter, &
2280 : op_pos=op_pos_prv, do_kpoints=do_kpoints, do_hfx_kpoints=do_hfx_kpoints, &
2281 : bounds_i=bounds_i, bounds_j=bounds_j, bounds_k=bounds_k, &
2282 : RI_range=RI_range, img_to_RI_cell=img_to_RI_cell, cell_sym=cell_sym_prv, &
2283 3594 : cell_to_index=cell_to_index)
2284 :
2285 1858 : IF (do_hfx_kpoints_prv) THEN
2286 122 : CPASSERT(op_pos_prv == 2)
2287 122 : CPASSERT(.NOT. desymmetrize_prv)
2288 1736 : ELSE IF (do_kpoints_prv) THEN
2289 36 : CPASSERT(ALL(SHAPE(t3c) == [nimg, ncell_RI]))
2290 : END IF
2291 :
2292 : !Need the max l for each basis for libint and max nset, nco and nsgf for LIBXSMM contraction
2293 1858 : nbasis = SIZE(basis_i)
2294 1858 : max_nsgfi = 0
2295 1858 : max_nset = 0
2296 1858 : maxli = 0
2297 4798 : DO ibasis = 1, nbasis
2298 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax, &
2299 2940 : nset=iset, nsgf_set=nsgfi, npgf=npgfi)
2300 2940 : maxli = MAX(maxli, imax)
2301 2940 : max_nset = MAX(max_nset, iset)
2302 16592 : max_nsgfi = MAX(max_nsgfi, MAXVAL(nsgfi))
2303 : END DO
2304 : max_ncoj = 0
2305 : maxlj = 0
2306 4798 : DO ibasis = 1, nbasis
2307 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax, &
2308 2940 : nset=jset, nsgf_set=nsgfj, npgf=npgfj)
2309 2940 : maxlj = MAX(maxlj, imax)
2310 2940 : max_nset = MAX(max_nset, jset)
2311 11338 : max_ncoj = MAX(max_ncoj, MAXVAL(npgfj)*ncoset(maxlj))
2312 : END DO
2313 : maxlk = 0
2314 4798 : DO ibasis = 1, nbasis
2315 : CALL get_gto_basis_set(gto_basis_set=basis_k(ibasis)%gto_basis_set, maxl=imax, &
2316 2940 : nset=kset, nsgf_set=nsgfk, npgf=npgfk)
2317 2940 : maxlk = MAX(maxlk, imax)
2318 4798 : max_nset = MAX(max_nset, kset)
2319 : END DO
2320 1858 : m_max = maxli + maxlj + maxlk
2321 :
2322 : !To minimize expensive memory ops and generally optimize contraction, pre-allocate
2323 : !contiguous sphi arrays (and transposed in the case of sphi_i)
2324 :
2325 1858 : NULLIFY (tspj, spi, spk)
2326 73304 : ALLOCATE (spi(max_nset, nbasis), tspj(max_nset, nbasis), spk(max_nset, nbasis))
2327 :
2328 4798 : DO ibasis = 1, nbasis
2329 21338 : DO iset = 1, max_nset
2330 16540 : NULLIFY (spi(iset, ibasis)%array)
2331 16540 : NULLIFY (tspj(iset, ibasis)%array)
2332 :
2333 19480 : NULLIFY (spk(iset, ibasis)%array)
2334 : END DO
2335 : END DO
2336 :
2337 7432 : DO ilist = 1, 3
2338 16252 : DO ibasis = 1, nbasis
2339 8820 : IF (ilist == 1) basis_set => basis_i(ibasis)%gto_basis_set
2340 8820 : IF (ilist == 2) basis_set => basis_j(ibasis)%gto_basis_set
2341 8820 : IF (ilist == 3) basis_set => basis_k(ibasis)%gto_basis_set
2342 :
2343 38968 : DO iset = 1, basis_set%nset
2344 :
2345 24574 : ncoi = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
2346 24574 : sgfi = basis_set%first_sgf(1, iset)
2347 24574 : egfi = sgfi + basis_set%nsgf_set(iset) - 1
2348 :
2349 33394 : IF (ilist == 1) THEN
2350 47176 : ALLOCATE (spi(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
2351 3277042 : spi(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
2352 :
2353 12780 : ELSE IF (ilist == 2) THEN
2354 26160 : ALLOCATE (tspj(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoi))
2355 376292 : tspj(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoi, sgfi:egfi))
2356 :
2357 : ELSE
2358 24960 : ALLOCATE (spk(iset, ibasis)%array(ncoi, basis_set%nsgf_set(iset)))
2359 1443220 : spk(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoi, sgfi:egfi)
2360 : END IF
2361 :
2362 : END DO !iset
2363 : END DO !ibasis
2364 : END DO !ilist
2365 :
2366 : !Init the truncated Coulomb operator
2367 1858 : IF (op_ij == do_potential_truncated .OR. op_jk == do_potential_truncated) THEN
2368 :
2369 1354 : IF (m_max > get_lmax_init()) THEN
2370 86 : IF (para_env%mepos == 0) THEN
2371 43 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
2372 : END IF
2373 86 : CALL init(m_max, unit_id, para_env%mepos, para_env)
2374 86 : IF (para_env%mepos == 0) THEN
2375 43 : CALL close_file(unit_id)
2376 : END IF
2377 : END IF
2378 : END IF
2379 :
2380 1858 : CALL init_md_ftable(nmax=m_max)
2381 :
2382 1858 : IF (do_kpoints_prv) THEN
2383 536 : kp_index_lbounds = LBOUND(cell_to_index)
2384 536 : kp_index_ubounds = UBOUND(cell_to_index)
2385 : END IF
2386 :
2387 7432 : counter = 1.0_dp
2388 :
2389 1858 : nthread = 1
2390 1858 : !$ nthread = omp_get_max_threads()
2391 :
2392 : !$OMP PARALLEL DEFAULT(NONE) &
2393 : !$OMP SHARED (nthread,do_kpoints_prv,kp_index_lbounds,kp_index_ubounds,maxli,maxlk,maxlj,bounds_i,&
2394 : !$OMP bounds_j,bounds_k,nimg,basis_i,basis_j,basis_k,dr_ij,dr_jk,dr_ik,ncoset,&
2395 : !$OMP potential_parameter,int_eps,t3c,tspj,spi,spk,debug,cell_to_index,&
2396 : !$OMP natom,nl_3c,cell,op_pos_prv,do_hfx_kpoints_prv,RI_range,ncell_RI, &
2397 : !$OMP img_to_RI_cell_prv, cell_sym_prv) &
2398 : !$OMP PRIVATE (lib,nl_3c_iter,ikind,jkind,kkind,iatom,jatom,katom,rij,rjk,rik,cell_j,cell_k,&
2399 : !$OMP prefac,jcell,kcell,first_sgf_i,lmax_i,lmin_i,npgfi,nseti,nsgfi,rpgf_i,set_radius_i,&
2400 : !$OMP sphi_i,zeti,kind_radius_i,first_sgf_j,lmax_j,lmin_j,npgfj,nsetj,nsgfj,rpgf_j,&
2401 : !$OMP set_radius_j,sphi_j,zetj,kind_radius_j,first_sgf_k,lmax_k,lmin_k,npgfk,nsetk,nsgfk,&
2402 : !$OMP rpgf_k,set_radius_k,sphi_k,zetk,kind_radius_k,djk,dij,dik,ncoi,ncoj,ncok,sgfi,sgfj,&
2403 : !$OMP sgfk,sijk,ri,rj,rk,sijk_ext,block_not_zero,max_contraction_i,max_contraction_j,&
2404 : !$OMP max_contraction_k,iset,jset,kset,block_t,blk_size,sijk_contr,cpp_buffer,ccp_buffer,&
2405 : !$OMP block_start_j,block_end_j,block_start_k,block_end_k,block_start_i,block_end_i,found,&
2406 1858 : !$OMP dummy_block_t,sp,handle2,mepos,bo,skip,tmp_ijk,i,blk_idx)
2407 :
2408 : mepos = 0
2409 : !$ mepos = omp_get_thread_num()
2410 :
2411 : CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
2412 : CALL cp_libint_set_contrdepth(lib, 1)
2413 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
2414 :
2415 : !We split the provided bounds among the threads such that each threads works on a different set of atoms
2416 : IF (PRESENT(bounds_i)) THEN
2417 : bo = get_limit(bounds_i(2) - bounds_i(1) + 1, nthread, mepos)
2418 : bo(:) = bo(:) + bounds_i(1) - 1
2419 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
2420 : ELSE IF (PRESENT(bounds_j)) THEN
2421 :
2422 : bo = get_limit(bounds_j(2) - bounds_j(1) + 1, nthread, mepos)
2423 : bo(:) = bo(:) + bounds_j(1) - 1
2424 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bo, bounds_k)
2425 : ELSE IF (PRESENT(bounds_k)) THEN
2426 : bo = get_limit(bounds_k(2) - bounds_k(1) + 1, nthread, mepos)
2427 : bo(:) = bo(:) + bounds_k(1) - 1
2428 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bounds_i, bounds_j, bo)
2429 : ELSE
2430 : bo = get_limit(natom, nthread, mepos)
2431 : CALL nl_3c_iter_set_bounds(nl_3c_iter, bo, bounds_j, bounds_k)
2432 : END IF
2433 :
2434 : skip = .FALSE.
2435 : IF (bo(1) > bo(2)) skip = .TRUE.
2436 :
2437 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
2438 : CALL get_3c_iterator_info(nl_3c_iter, ikind=ikind, jkind=jkind, kkind=kkind, &
2439 : iatom=iatom, jatom=jatom, katom=katom, &
2440 : rij=rij, rjk=rjk, rik=rik, cell_j=cell_j, cell_k=cell_k)
2441 : IF (skip) EXIT
2442 :
2443 : djk = NORM2(rjk)
2444 : dij = NORM2(rij)
2445 : dik = NORM2(rik)
2446 :
2447 : IF (nl_3c%sym == symmetric_jk) THEN
2448 : IF (jatom == katom) THEN
2449 : ! factor 0.5 due to double-counting of diagonal blocks
2450 : ! (we desymmetrize by adding transpose)
2451 : prefac = 0.5_dp
2452 : ELSE
2453 : prefac = 1.0_dp
2454 : END IF
2455 : ELSEIF (nl_3c%sym == symmetric_ij) THEN
2456 : IF (iatom == jatom) THEN
2457 : ! factor 0.5 due to double-counting of diagonal blocks
2458 : ! (we desymmetrize by adding transpose)
2459 : prefac = 0.5_dp
2460 : ELSE
2461 : prefac = 1.0_dp
2462 : END IF
2463 : ELSE
2464 : prefac = 1.0_dp
2465 : END IF
2466 : IF (do_kpoints_prv) prefac = 1.0_dp
2467 :
2468 : IF (do_kpoints_prv) THEN
2469 :
2470 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
2471 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
2472 :
2473 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2474 : IF (jcell > nimg .OR. jcell < 1) CYCLE
2475 :
2476 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
2477 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
2478 :
2479 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
2480 : IF (kcell > nimg .OR. kcell < 1) CYCLE
2481 : !In case of HFX k-points, we only consider P^k if d_ik <= RI_range
2482 : IF (do_hfx_kpoints_prv) THEN
2483 : IF (dik > RI_range) CYCLE
2484 : kcell = img_to_RI_cell_prv(kcell)
2485 : END IF
2486 : ELSE
2487 : jcell = 1; kcell = 1
2488 : END IF
2489 :
2490 : IF (cell_sym_prv .AND. jcell < kcell) CYCLE
2491 :
2492 : blk_idx = [iatom, jatom, katom]
2493 : IF (do_hfx_kpoints_prv) THEN
2494 : blk_idx(3) = (kcell - 1)*natom + katom
2495 : kcell = 1
2496 : END IF
2497 :
2498 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
2499 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
2500 : sphi=sphi_i, zet=zeti, kind_radius=kind_radius_i)
2501 :
2502 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
2503 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
2504 : sphi=sphi_j, zet=zetj, kind_radius=kind_radius_j)
2505 :
2506 : CALL get_gto_basis_set(basis_k(kkind)%gto_basis_set, first_sgf=first_sgf_k, lmax=lmax_k, lmin=lmin_k, &
2507 : npgf=npgfk, nset=nsetk, nsgf_set=nsgfk, pgf_radius=rpgf_k, set_radius=set_radius_k, &
2508 : sphi=sphi_k, zet=zetk, kind_radius=kind_radius_k)
2509 :
2510 : IF (kind_radius_j + kind_radius_i + dr_ij < dij) CYCLE
2511 : IF (kind_radius_j + kind_radius_k + dr_jk < djk) CYCLE
2512 : IF (kind_radius_k + kind_radius_i + dr_ik < dik) CYCLE
2513 :
2514 : ALLOCATE (max_contraction_j(nsetj))
2515 : DO jset = 1, nsetj
2516 : sgfj = first_sgf_j(1, jset)
2517 : max_contraction_j(jset) = MAXVAL((/(SUM(ABS(sphi_j(:, i))), i=sgfj, sgfj + nsgfj(jset) - 1)/))
2518 : END DO
2519 :
2520 : ALLOCATE (max_contraction_k(nsetk))
2521 : DO kset = 1, nsetk
2522 : sgfk = first_sgf_k(1, kset)
2523 : max_contraction_k(kset) = MAXVAL((/(SUM(ABS(sphi_k(:, i))), i=sgfk, sgfk + nsgfk(kset) - 1)/))
2524 : END DO
2525 :
2526 : CALL dbt_blk_sizes(t3c(jcell, kcell), blk_idx, blk_size)
2527 :
2528 : ALLOCATE (block_t(blk_size(2), blk_size(3), blk_size(1)))
2529 :
2530 : block_t = 0.0_dp
2531 : block_not_zero = .FALSE.
2532 : DO iset = 1, nseti
2533 :
2534 : sgfi = first_sgf_i(1, iset)
2535 : max_contraction_i = MAXVAL((/(SUM(ABS(sphi_i(:, i))), i=sgfi, sgfi + nsgfi(iset) - 1)/))
2536 :
2537 : DO jset = 1, nsetj
2538 :
2539 : IF (set_radius_j(jset) + set_radius_i(iset) + dr_ij < dij) CYCLE
2540 :
2541 : DO kset = 1, nsetk
2542 :
2543 : IF (set_radius_j(jset) + set_radius_k(kset) + dr_jk < djk) CYCLE
2544 : IF (set_radius_k(kset) + set_radius_i(iset) + dr_ik < dik) CYCLE
2545 :
2546 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
2547 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
2548 : ncok = npgfk(kset)*ncoset(lmax_k(kset))
2549 :
2550 : !ensure non-zero number of triples below
2551 : IF (ncoj*ncok*ncoi == 0) CYCLE
2552 :
2553 : !need positions for libint. Only relative positions are needed => set ri to 0.0
2554 : ri = 0.0_dp
2555 : rj = rij ! ri + rij
2556 : rk = rik ! ri + rik
2557 :
2558 : ALLOCATE (sijk(ncoj, ncok, ncoi))
2559 : IF (op_pos_prv == 1) THEN
2560 : sijk(:, :, :) = 0.0_dp
2561 : CALL eri_3center(sijk, &
2562 : lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
2563 : lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
2564 : lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
2565 : djk, dij, dik, lib, potential_parameter, int_abc_ext=sijk_ext)
2566 : ELSE
2567 : ALLOCATE (tmp_ijk(ncoi, ncoj, ncok))
2568 : tmp_ijk(:, :, :) = 0.0_dp
2569 : CALL eri_3center(tmp_ijk, &
2570 : lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), rpgf_i(:, iset), ri, &
2571 : lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), rpgf_j(:, jset), rj, &
2572 : lmin_k(kset), lmax_k(kset), npgfk(kset), zetk(:, kset), rpgf_k(:, kset), rk, &
2573 : dij, dik, djk, lib, potential_parameter, int_abc_ext=sijk_ext)
2574 :
2575 : !F08: sijk = RESHAPE(tmp_ijk, [ncoj, ncok, ncoi], ORDER=[2, 3, 1]) with sijk not allocated
2576 : DO i = 1, ncoi !TODO: revise/check for efficiency
2577 : sijk(:, :, i) = tmp_ijk(i, :, :)
2578 : END DO
2579 : DEALLOCATE (tmp_ijk)
2580 : END IF
2581 :
2582 : IF (PRESENT(int_eps)) THEN
2583 : IF (int_eps > sijk_ext*(max_contraction_i* &
2584 : max_contraction_j(jset)* &
2585 : max_contraction_k(kset))) THEN
2586 : DEALLOCATE (sijk)
2587 : CYCLE
2588 : END IF
2589 : END IF
2590 :
2591 : block_not_zero = .TRUE.
2592 : ALLOCATE (sijk_contr(nsgfj(jset), nsgfk(kset), nsgfi(iset)))
2593 : CALL abc_contract_xsmm(sijk_contr, sijk, tspj(jset, jkind)%array, &
2594 : spk(kset, kkind)%array, spi(iset, ikind)%array, &
2595 : ncoj, ncok, ncoi, nsgfj(jset), nsgfk(kset), &
2596 : nsgfi(iset), cpp_buffer, ccp_buffer, prefac)
2597 : DEALLOCATE (sijk)
2598 :
2599 : sgfj = first_sgf_j(1, jset)
2600 : sgfk = first_sgf_k(1, kset)
2601 :
2602 : block_start_j = sgfj
2603 : block_end_j = sgfj + nsgfj(jset) - 1
2604 : block_start_k = sgfk
2605 : block_end_k = sgfk + nsgfk(kset) - 1
2606 : block_start_i = sgfi
2607 : block_end_i = sgfi + nsgfi(iset) - 1
2608 :
2609 : block_t(block_start_j:block_end_j, &
2610 : block_start_k:block_end_k, &
2611 : block_start_i:block_end_i) = &
2612 : block_t(block_start_j:block_end_j, &
2613 : block_start_k:block_end_k, &
2614 : block_start_i:block_end_i) + &
2615 : sijk_contr(:, :, :)
2616 : DEALLOCATE (sijk_contr)
2617 :
2618 : END DO
2619 :
2620 : END DO
2621 :
2622 : END DO
2623 :
2624 : IF (block_not_zero) THEN
2625 : !$OMP CRITICAL
2626 : CALL timeset(routineN//"_put_dbcsr", handle2)
2627 : IF (debug) THEN
2628 : CALL dbt_get_block(t3c(jcell, kcell), blk_idx, dummy_block_t, found=found)
2629 : CPASSERT(found)
2630 : END IF
2631 :
2632 : sp = SHAPE(block_t)
2633 : sp([2, 3, 1]) = sp ! sp = sp([2, 3, 1]) performs worse
2634 :
2635 : CALL dbt_put_block(t3c(jcell, kcell), blk_idx, sp, &
2636 : RESHAPE(block_t, SHAPE=sp, ORDER=[2, 3, 1]), summation=.TRUE.)
2637 :
2638 : CALL timestop(handle2)
2639 : !$OMP END CRITICAL
2640 : END IF
2641 :
2642 : DEALLOCATE (block_t)
2643 : DEALLOCATE (max_contraction_j, max_contraction_k)
2644 : END DO
2645 :
2646 : IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
2647 : IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
2648 :
2649 : CALL cp_libint_cleanup_3eri(lib)
2650 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
2651 : !$OMP END PARALLEL
2652 :
2653 : !TODO: deal with hfx_kpoints, because should not filter by 1/2
2654 1858 : IF (nl_3c%sym == symmetric_jk .OR. do_kpoints_prv) THEN
2655 :
2656 684 : IF (.NOT. do_hfx_kpoints_prv) THEN
2657 1186 : DO kcell = 1, nimg
2658 2274 : DO jcell = 1, nimg
2659 : ! need half of filter eps because afterwards we add transposed tensor
2660 1712 : CALL dbt_filter(t3c(jcell, kcell), filter_eps/2)
2661 : END DO
2662 : END DO
2663 : ELSE
2664 244 : DO kcell = 1, ncell_RI
2665 3492 : DO jcell = 1, nimg
2666 3370 : CALL dbt_filter(t3c(jcell, kcell), filter_eps)
2667 : END DO
2668 : END DO
2669 : END IF
2670 :
2671 684 : IF (desymmetrize_prv) THEN
2672 : ! add transposed of overlap integrals
2673 0 : CALL dbt_create(t3c(1, 1), t_3c_tmp)
2674 0 : DO kcell = 1, jcell
2675 0 : DO jcell = 1, nimg
2676 0 : CALL dbt_copy(t3c(jcell, kcell), t_3c_tmp)
2677 0 : CALL dbt_copy(t_3c_tmp, t3c(kcell, jcell), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
2678 0 : CALL dbt_filter(t3c(kcell, jcell), filter_eps)
2679 : END DO
2680 : END DO
2681 0 : DO kcell = jcell + 1, nimg
2682 0 : DO jcell = 1, nimg
2683 0 : CALL dbt_copy(t3c(jcell, kcell), t_3c_tmp)
2684 0 : CALL dbt_copy(t_3c_tmp, t3c(kcell, jcell), order=[1, 3, 2], summation=.FALSE., move_data=.TRUE.)
2685 0 : CALL dbt_filter(t3c(kcell, jcell), filter_eps)
2686 : END DO
2687 : END DO
2688 0 : CALL dbt_destroy(t_3c_tmp)
2689 : END IF
2690 1174 : ELSEIF (nl_3c%sym == symmetric_ij) THEN
2691 0 : DO kcell = 1, nimg
2692 0 : DO jcell = 1, nimg
2693 0 : CALL dbt_filter(t3c(jcell, kcell), filter_eps/2)
2694 : END DO
2695 : END DO
2696 1174 : ELSEIF (nl_3c%sym == symmetric_none) THEN
2697 2348 : DO kcell = 1, nimg
2698 3522 : DO jcell = 1, nimg
2699 2348 : CALL dbt_filter(t3c(jcell, kcell), filter_eps)
2700 : END DO
2701 : END DO
2702 : ELSE
2703 0 : CPABORT("requested symmetric case not implemented")
2704 : END IF
2705 :
2706 12036 : DO iset = 1, max_nset
2707 28576 : DO ibasis = 1, nbasis
2708 16540 : IF (ASSOCIATED(spi(iset, ibasis)%array)) DEALLOCATE (spi(iset, ibasis)%array)
2709 16540 : IF (ASSOCIATED(tspj(iset, ibasis)%array)) DEALLOCATE (tspj(iset, ibasis)%array)
2710 26718 : IF (ASSOCIATED(spk(iset, ibasis)%array)) DEALLOCATE (spk(iset, ibasis)%array)
2711 : END DO
2712 : END DO
2713 1858 : DEALLOCATE (spi, tspj, spk)
2714 :
2715 1858 : CALL timestop(handle)
2716 16722 : END SUBROUTINE build_3c_integrals
2717 :
2718 : ! **************************************************************************************************
2719 : !> \brief Calculates the derivatives of 2-center integrals, wrt to the first center
2720 : !> \param t2c_der ...
2721 : !> this routine requires that libint has been static initialised somewhere else
2722 : !> \param filter_eps Filter threshold for matrix blocks
2723 : !> \param qs_env ...
2724 : !> \param nl_2c 2-center neighborlist
2725 : !> \param basis_i ...
2726 : !> \param basis_j ...
2727 : !> \param potential_parameter ...
2728 : !> \param do_kpoints ...
2729 : ! **************************************************************************************************
2730 372 : SUBROUTINE build_2c_derivatives(t2c_der, filter_eps, qs_env, &
2731 372 : nl_2c, basis_i, basis_j, &
2732 : potential_parameter, do_kpoints)
2733 :
2734 : TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: t2c_der
2735 : REAL(KIND=dp), INTENT(IN) :: filter_eps
2736 : TYPE(qs_environment_type), POINTER :: qs_env
2737 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2738 : POINTER :: nl_2c
2739 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j
2740 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
2741 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
2742 :
2743 : CHARACTER(len=*), PARAMETER :: routineN = 'build_2c_derivatives'
2744 :
2745 : INTEGER :: handle, i_xyz, iatom, ibasis, icol, ikind, imax, img, irow, iset, jatom, jkind, &
2746 : jset, m_max, maxli, maxlj, natom, ncoi, ncoj, nimg, nseti, nsetj, op_prv, sgfi, sgfj, &
2747 : unit_id
2748 : INTEGER, DIMENSION(3) :: cell_j, kp_index_lbounds, &
2749 : kp_index_ubounds
2750 372 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
2751 372 : npgfj, nsgfi, nsgfj
2752 372 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j
2753 372 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2754 : LOGICAL :: do_kpoints_prv, do_symmetric, found, &
2755 : trans
2756 : REAL(KIND=dp) :: dab
2757 372 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: dij_contr, dij_rs
2758 372 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: dij
2759 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rj
2760 372 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j
2761 372 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
2762 372 : zetj
2763 372 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2764 1488 : TYPE(block_p_type), DIMENSION(3) :: block_t
2765 : TYPE(cp_libint_t) :: lib
2766 : TYPE(dft_control_type), POINTER :: dft_control
2767 : TYPE(kpoint_type), POINTER :: kpoints
2768 : TYPE(mp_para_env_type), POINTER :: para_env
2769 : TYPE(neighbor_list_iterator_p_type), &
2770 372 : DIMENSION(:), POINTER :: nl_iterator
2771 372 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2772 :
2773 372 : CALL timeset(routineN, handle)
2774 :
2775 372 : IF (PRESENT(do_kpoints)) THEN
2776 84 : do_kpoints_prv = do_kpoints
2777 : ELSE
2778 : do_kpoints_prv = .FALSE.
2779 : END IF
2780 :
2781 372 : op_prv = potential_parameter%potential_type
2782 :
2783 372 : NULLIFY (qs_kind_set, atomic_kind_set, block_t(1)%block, block_t(2)%block, block_t(3)%block, cell_to_index)
2784 :
2785 : ! get stuff
2786 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
2787 372 : natom=natom, kpoints=kpoints, dft_control=dft_control, para_env=para_env)
2788 :
2789 372 : IF (do_kpoints_prv) THEN
2790 84 : nimg = SIZE(t2c_der, 1)
2791 84 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
2792 336 : kp_index_lbounds = LBOUND(cell_to_index)
2793 336 : kp_index_ubounds = UBOUND(cell_to_index)
2794 : ELSE
2795 : nimg = 1
2796 : END IF
2797 :
2798 : ! check for symmetry
2799 372 : CPASSERT(SIZE(nl_2c) > 0)
2800 372 : CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
2801 :
2802 372 : IF (do_symmetric) THEN
2803 576 : DO img = 1, nimg
2804 : !Derivtive matrix is assymetric
2805 1440 : DO i_xyz = 1, 3
2806 1152 : CPASSERT(dbcsr_get_matrix_type(t2c_der(img, i_xyz)) == dbcsr_type_antisymmetric)
2807 : END DO
2808 : END DO
2809 : ELSE
2810 2104 : DO img = 1, nimg
2811 8164 : DO i_xyz = 1, 3
2812 8080 : CPASSERT(dbcsr_get_matrix_type(t2c_der(img, i_xyz)) == dbcsr_type_no_symmetry)
2813 : END DO
2814 : END DO
2815 : END IF
2816 :
2817 2680 : DO img = 1, nimg
2818 9604 : DO i_xyz = 1, 3
2819 9232 : CALL cp_dbcsr_alloc_block_from_nbl(t2c_der(img, i_xyz), nl_2c)
2820 : END DO
2821 : END DO
2822 :
2823 372 : maxli = 0
2824 1062 : DO ibasis = 1, SIZE(basis_i)
2825 690 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
2826 1062 : maxli = MAX(maxli, imax)
2827 : END DO
2828 372 : maxlj = 0
2829 1062 : DO ibasis = 1, SIZE(basis_j)
2830 690 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
2831 1062 : maxlj = MAX(maxlj, imax)
2832 : END DO
2833 :
2834 372 : m_max = maxli + maxlj + 1
2835 :
2836 : !Init the truncated Coulomb operator
2837 372 : IF (op_prv == do_potential_truncated) THEN
2838 :
2839 54 : IF (m_max > get_lmax_init()) THEN
2840 18 : IF (para_env%mepos == 0) THEN
2841 9 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
2842 : END IF
2843 18 : CALL init(m_max, unit_id, para_env%mepos, para_env)
2844 18 : IF (para_env%mepos == 0) THEN
2845 9 : CALL close_file(unit_id)
2846 : END IF
2847 : END IF
2848 : END IF
2849 :
2850 372 : CALL init_md_ftable(nmax=m_max)
2851 :
2852 372 : CALL cp_libint_init_2eri1(lib, MAX(maxli, maxlj))
2853 372 : CALL cp_libint_set_contrdepth(lib, 1)
2854 :
2855 372 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2856 14603 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2857 :
2858 : CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
2859 14231 : iatom=iatom, jatom=jatom, r=rij, cell=cell_j)
2860 14231 : IF (do_kpoints_prv) THEN
2861 10010 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
2862 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
2863 1430 : img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2864 1430 : IF (img > nimg .OR. img < 1) CYCLE
2865 : ELSE
2866 : img = 1
2867 : END IF
2868 :
2869 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
2870 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
2871 14203 : sphi=sphi_i, zet=zeti)
2872 :
2873 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
2874 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
2875 14203 : sphi=sphi_j, zet=zetj)
2876 :
2877 14203 : IF (do_symmetric) THEN
2878 12801 : IF (iatom <= jatom) THEN
2879 7824 : irow = iatom
2880 7824 : icol = jatom
2881 : ELSE
2882 4977 : irow = jatom
2883 4977 : icol = iatom
2884 : END IF
2885 : ELSE
2886 1402 : irow = iatom
2887 1402 : icol = jatom
2888 : END IF
2889 :
2890 56812 : dab = NORM2(rij)
2891 14203 : trans = do_symmetric .AND. (iatom > jatom)
2892 :
2893 56812 : DO i_xyz = 1, 3
2894 : CALL dbcsr_get_block_p(matrix=t2c_der(img, i_xyz), &
2895 42609 : row=irow, col=icol, BLOCK=block_t(i_xyz)%block, found=found)
2896 56812 : CPASSERT(found)
2897 : END DO
2898 :
2899 48405 : DO iset = 1, nseti
2900 :
2901 33830 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
2902 33830 : sgfi = first_sgf_i(1, iset)
2903 :
2904 165921 : DO jset = 1, nsetj
2905 :
2906 117860 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
2907 117860 : sgfj = first_sgf_j(1, jset)
2908 :
2909 151690 : IF (ncoi*ncoj > 0) THEN
2910 471440 : ALLOCATE (dij_contr(nsgfi(iset), nsgfj(jset)))
2911 589300 : ALLOCATE (dij(ncoi, ncoj, 3))
2912 312725222 : dij(:, :, :) = 0.0_dp
2913 :
2914 117860 : ri = 0.0_dp
2915 117860 : rj = rij
2916 :
2917 : CALL eri_2center_derivs(dij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
2918 : rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
2919 117860 : rpgf_j(:, jset), rj, dab, lib, potential_parameter)
2920 :
2921 471440 : DO i_xyz = 1, 3
2922 :
2923 123133524 : dij_contr(:, :) = 0.0_dp
2924 : CALL ab_contract(dij_contr, dij(:, :, i_xyz), &
2925 : sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
2926 353580 : ncoi, ncoj, nsgfi(iset), nsgfj(jset))
2927 :
2928 353580 : IF (trans) THEN
2929 : !if transpose, then -1 factor for antisymmetry
2930 448116 : ALLOCATE (dij_rs(nsgfj(jset), nsgfi(iset)))
2931 44937846 : dij_rs(:, :) = -1.0_dp*TRANSPOSE(dij_contr)
2932 : ELSE
2933 966204 : ALLOCATE (dij_rs(nsgfi(iset), nsgfj(jset)))
2934 78086691 : dij_rs(:, :) = dij_contr
2935 : END IF
2936 :
2937 : CALL block_add("IN", dij_rs, &
2938 : nsgfi(iset), nsgfj(jset), block_t(i_xyz)%block, &
2939 353580 : sgfi, sgfj, trans=trans)
2940 471440 : DEALLOCATE (dij_rs)
2941 : END DO
2942 :
2943 117860 : DEALLOCATE (dij, dij_contr)
2944 : END IF
2945 : END DO
2946 : END DO
2947 : END DO
2948 :
2949 372 : CALL cp_libint_cleanup_2eri1(lib)
2950 372 : CALL neighbor_list_iterator_release(nl_iterator)
2951 :
2952 2680 : DO img = 1, nimg
2953 9604 : DO i_xyz = 1, 3
2954 6924 : CALL dbcsr_finalize(t2c_der(img, i_xyz))
2955 9232 : CALL dbcsr_filter(t2c_der(img, i_xyz), filter_eps)
2956 : END DO
2957 : END DO
2958 :
2959 372 : CALL timestop(handle)
2960 :
2961 1116 : END SUBROUTINE build_2c_derivatives
2962 :
2963 : ! **************************************************************************************************
2964 : !> \brief Calculates the virial coming from 2c derivatives on the fly
2965 : !> \param work_virial ...
2966 : !> \param t2c_trace the 2c tensor that we should trace with the derivatives
2967 : !> \param pref ...
2968 : !> \param qs_env ...
2969 : !> \param nl_2c 2-center neighborlist. Assumed to have compatible distribution with t2c_trace,
2970 : !> and to be non-symmetric
2971 : !> \param basis_i ...
2972 : !> \param basis_j ...
2973 : !> \param potential_parameter ...
2974 : ! **************************************************************************************************
2975 12 : SUBROUTINE calc_2c_virial(work_virial, t2c_trace, pref, qs_env, nl_2c, basis_i, basis_j, potential_parameter)
2976 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
2977 : TYPE(dbcsr_type), INTENT(INOUT) :: t2c_trace
2978 : REAL(KIND=dp), INTENT(IN) :: pref
2979 : TYPE(qs_environment_type), POINTER :: qs_env
2980 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2981 : POINTER :: nl_2c
2982 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j
2983 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
2984 :
2985 : CHARACTER(len=*), PARAMETER :: routineN = 'calc_2c_virial'
2986 :
2987 : INTEGER :: handle, i_xyz, iatom, ibasis, ikind, imax, iset, j_xyz, jatom, jkind, jset, &
2988 : m_max, maxli, maxlj, natom, ncoi, ncoj, nseti, nsetj, op_prv, sgfi, sgfj, unit_id
2989 12 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
2990 12 : npgfj, nsgfi, nsgfj
2991 12 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j
2992 : LOGICAL :: do_symmetric, found
2993 : REAL(dp) :: force
2994 12 : REAL(dp), DIMENSION(:, :), POINTER :: pblock
2995 : REAL(KIND=dp) :: dab
2996 12 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: dij_contr
2997 12 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: dij
2998 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rj, scoord
2999 12 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j
3000 12 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
3001 12 : zetj
3002 12 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
3003 : TYPE(cell_type), POINTER :: cell
3004 : TYPE(cp_libint_t) :: lib
3005 : TYPE(dft_control_type), POINTER :: dft_control
3006 : TYPE(mp_para_env_type), POINTER :: para_env
3007 : TYPE(neighbor_list_iterator_p_type), &
3008 12 : DIMENSION(:), POINTER :: nl_iterator
3009 12 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3010 12 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3011 :
3012 12 : CALL timeset(routineN, handle)
3013 :
3014 12 : op_prv = potential_parameter%potential_type
3015 :
3016 12 : NULLIFY (qs_kind_set, atomic_kind_set, pblock, particle_set, cell)
3017 :
3018 : ! get stuff
3019 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, &
3020 : natom=natom, dft_control=dft_control, para_env=para_env, &
3021 12 : particle_set=particle_set, cell=cell)
3022 :
3023 : ! check for symmetry
3024 12 : CPASSERT(SIZE(nl_2c) > 0)
3025 12 : CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
3026 12 : CPASSERT(.NOT. do_symmetric)
3027 :
3028 12 : maxli = 0
3029 36 : DO ibasis = 1, SIZE(basis_i)
3030 24 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
3031 36 : maxli = MAX(maxli, imax)
3032 : END DO
3033 12 : maxlj = 0
3034 36 : DO ibasis = 1, SIZE(basis_j)
3035 24 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
3036 36 : maxlj = MAX(maxlj, imax)
3037 : END DO
3038 :
3039 12 : m_max = maxli + maxlj + 1
3040 :
3041 : !Init the truncated Coulomb operator
3042 12 : IF (op_prv == do_potential_truncated) THEN
3043 :
3044 2 : IF (m_max > get_lmax_init()) THEN
3045 0 : IF (para_env%mepos == 0) THEN
3046 0 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
3047 : END IF
3048 0 : CALL init(m_max, unit_id, para_env%mepos, para_env)
3049 0 : IF (para_env%mepos == 0) THEN
3050 0 : CALL close_file(unit_id)
3051 : END IF
3052 : END IF
3053 : END IF
3054 :
3055 12 : CALL init_md_ftable(nmax=m_max)
3056 :
3057 12 : CALL cp_libint_init_2eri1(lib, MAX(maxli, maxlj))
3058 12 : CALL cp_libint_set_contrdepth(lib, 1)
3059 :
3060 12 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3061 1644 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3062 :
3063 : CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
3064 1632 : iatom=iatom, jatom=jatom, r=rij)
3065 :
3066 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
3067 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
3068 1632 : sphi=sphi_i, zet=zeti)
3069 :
3070 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
3071 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
3072 1632 : sphi=sphi_j, zet=zetj)
3073 :
3074 6528 : dab = NORM2(rij)
3075 :
3076 1632 : CALL dbcsr_get_block_p(t2c_trace, iatom, jatom, pblock, found)
3077 1632 : IF (.NOT. found) CYCLE
3078 :
3079 6113 : DO iset = 1, nseti
3080 :
3081 4469 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
3082 4469 : sgfi = first_sgf_i(1, iset)
3083 :
3084 25808 : DO jset = 1, nsetj
3085 :
3086 19707 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
3087 19707 : sgfj = first_sgf_j(1, jset)
3088 :
3089 24176 : IF (ncoi*ncoj > 0) THEN
3090 78828 : ALLOCATE (dij_contr(nsgfi(iset), nsgfj(jset)))
3091 98535 : ALLOCATE (dij(ncoi, ncoj, 3))
3092 5649186 : dij(:, :, :) = 0.0_dp
3093 :
3094 19707 : ri = 0.0_dp
3095 19707 : rj = rij
3096 :
3097 : CALL eri_2center_derivs(dij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
3098 : rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
3099 19707 : rpgf_j(:, jset), rj, dab, lib, potential_parameter)
3100 :
3101 78828 : DO i_xyz = 1, 3
3102 :
3103 2522610 : dij_contr(:, :) = 0.0_dp
3104 : CALL ab_contract(dij_contr, dij(:, :, i_xyz), &
3105 : sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
3106 59121 : ncoi, ncoj, nsgfi(iset), nsgfj(jset))
3107 :
3108 2522610 : force = SUM(pblock(sgfi:sgfi + nsgfi(iset) - 1, sgfj:sgfj + nsgfj(jset) - 1)*dij_contr(:, :))
3109 59121 : force = pref*force
3110 :
3111 : !iatom virial
3112 59121 : CALL real_to_scaled(scoord, particle_set(iatom)%r, cell)
3113 236484 : DO j_xyz = 1, 3
3114 236484 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + force*scoord(j_xyz)
3115 : END DO
3116 :
3117 : !jatom virial
3118 236484 : CALL real_to_scaled(scoord, particle_set(iatom)%r + rij, cell)
3119 256191 : DO j_xyz = 1, 3
3120 236484 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - force*scoord(j_xyz)
3121 : END DO
3122 : END DO
3123 :
3124 19707 : DEALLOCATE (dij, dij_contr)
3125 : END IF
3126 : END DO
3127 : END DO
3128 : END DO
3129 :
3130 12 : CALL neighbor_list_iterator_release(nl_iterator)
3131 12 : CALL cp_libint_cleanup_2eri1(lib)
3132 :
3133 12 : CALL timestop(handle)
3134 :
3135 24 : END SUBROUTINE calc_2c_virial
3136 :
3137 : ! **************************************************************************************************
3138 : !> \brief ...
3139 : !> \param t2c empty DBCSR matrix
3140 : !> \param filter_eps Filter threshold for matrix blocks
3141 : !> \param qs_env ...
3142 : !> \param nl_2c 2-center neighborlist
3143 : !> \param basis_i ...
3144 : !> \param basis_j ...
3145 : !> \param potential_parameter ...
3146 : !> \param do_kpoints ...
3147 : !> \param do_hfx_kpoints ...
3148 : !> this routine requires that libint has been static initialised somewhere else
3149 : !> \param ext_kpoints ...
3150 : !> \param regularization_RI ...
3151 : ! **************************************************************************************************
3152 908 : SUBROUTINE build_2c_integrals(t2c, filter_eps, qs_env, &
3153 908 : nl_2c, basis_i, basis_j, &
3154 : potential_parameter, do_kpoints, &
3155 : do_hfx_kpoints, ext_kpoints, regularization_RI)
3156 :
3157 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: t2c
3158 : REAL(KIND=dp), INTENT(IN) :: filter_eps
3159 : TYPE(qs_environment_type), POINTER :: qs_env
3160 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3161 : POINTER :: nl_2c
3162 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_i, basis_j
3163 : TYPE(libint_potential_type), INTENT(IN) :: potential_parameter
3164 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints, do_hfx_kpoints
3165 : TYPE(kpoint_type), OPTIONAL, POINTER :: ext_kpoints
3166 : REAL(KIND=dp), OPTIONAL :: regularization_RI
3167 :
3168 : CHARACTER(len=*), PARAMETER :: routineN = 'build_2c_integrals'
3169 :
3170 : INTEGER :: handle, i_diag, iatom, ibasis, icol, ikind, imax, img, irow, iset, jatom, jkind, &
3171 : jset, m_max, maxli, maxlj, natom, ncoi, ncoj, nimg, nseti, nsetj, op_prv, sgfi, sgfj, &
3172 : unit_id
3173 : INTEGER, DIMENSION(3) :: cell_j, kp_index_lbounds, &
3174 : kp_index_ubounds
3175 908 : INTEGER, DIMENSION(:), POINTER :: lmax_i, lmax_j, lmin_i, lmin_j, npgfi, &
3176 908 : npgfj, nsgfi, nsgfj
3177 908 : INTEGER, DIMENSION(:, :), POINTER :: first_sgf_i, first_sgf_j
3178 908 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
3179 : LOGICAL :: do_hfx_kpoints_prv, do_kpoints_prv, &
3180 : do_symmetric, found, trans
3181 : REAL(KIND=dp) :: dab, min_zet
3182 908 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: sij, sij_contr, sij_rs
3183 : REAL(KIND=dp), DIMENSION(3) :: ri, rij, rj
3184 908 : REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_i, set_radius_j
3185 908 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgf_i, rpgf_j, sphi_i, sphi_j, zeti, &
3186 908 : zetj
3187 908 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
3188 : TYPE(block_p_type) :: block_t
3189 : TYPE(cell_type), POINTER :: cell
3190 : TYPE(cp_libint_t) :: lib
3191 : TYPE(dft_control_type), POINTER :: dft_control
3192 : TYPE(kpoint_type), POINTER :: kpoints
3193 : TYPE(mp_para_env_type), POINTER :: para_env
3194 : TYPE(neighbor_list_iterator_p_type), &
3195 908 : DIMENSION(:), POINTER :: nl_iterator
3196 908 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3197 :
3198 908 : CALL timeset(routineN, handle)
3199 :
3200 908 : IF (PRESENT(do_kpoints)) THEN
3201 858 : do_kpoints_prv = do_kpoints
3202 : ELSE
3203 : do_kpoints_prv = .FALSE.
3204 : END IF
3205 :
3206 908 : IF (PRESENT(do_hfx_kpoints)) THEN
3207 266 : do_hfx_kpoints_prv = do_hfx_kpoints
3208 : ELSE
3209 : do_hfx_kpoints_prv = .FALSE.
3210 : END IF
3211 266 : IF (do_hfx_kpoints_prv) THEN
3212 140 : CPASSERT(do_kpoints_prv)
3213 : END IF
3214 :
3215 908 : op_prv = potential_parameter%potential_type
3216 :
3217 908 : NULLIFY (qs_kind_set, atomic_kind_set, block_t%block, cell_to_index)
3218 :
3219 : ! get stuff
3220 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, qs_kind_set=qs_kind_set, cell=cell, &
3221 908 : natom=natom, dft_control=dft_control, para_env=para_env, kpoints=kpoints)
3222 :
3223 908 : IF (PRESENT(ext_kpoints)) kpoints => ext_kpoints
3224 :
3225 908 : IF (do_kpoints_prv) THEN
3226 504 : nimg = SIZE(t2c)
3227 504 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
3228 2016 : kp_index_lbounds = LBOUND(cell_to_index)
3229 2016 : kp_index_ubounds = UBOUND(cell_to_index)
3230 : ELSE
3231 : nimg = 1
3232 : END IF
3233 :
3234 : ! check for symmetry
3235 908 : CPASSERT(SIZE(nl_2c) > 0)
3236 908 : CALL get_neighbor_list_set_p(neighbor_list_sets=nl_2c, symmetric=do_symmetric)
3237 :
3238 908 : IF (do_symmetric) THEN
3239 2454 : DO img = 1, nimg
3240 2454 : CPASSERT(dbcsr_has_symmetry(t2c(img)))
3241 : END DO
3242 : ELSE
3243 4380 : DO img = 1, nimg
3244 4380 : CPASSERT(.NOT. dbcsr_has_symmetry(t2c(img)))
3245 : END DO
3246 : END IF
3247 :
3248 6834 : DO img = 1, nimg
3249 6834 : CALL cp_dbcsr_alloc_block_from_nbl(t2c(img), nl_2c)
3250 : END DO
3251 :
3252 908 : maxli = 0
3253 2578 : DO ibasis = 1, SIZE(basis_i)
3254 1670 : CALL get_gto_basis_set(gto_basis_set=basis_i(ibasis)%gto_basis_set, maxl=imax)
3255 2578 : maxli = MAX(maxli, imax)
3256 : END DO
3257 908 : maxlj = 0
3258 2578 : DO ibasis = 1, SIZE(basis_j)
3259 1670 : CALL get_gto_basis_set(gto_basis_set=basis_j(ibasis)%gto_basis_set, maxl=imax)
3260 2578 : maxlj = MAX(maxlj, imax)
3261 : END DO
3262 :
3263 908 : m_max = maxli + maxlj
3264 :
3265 : !Init the truncated Coulomb operator
3266 908 : IF (op_prv == do_potential_truncated) THEN
3267 :
3268 586 : IF (m_max > get_lmax_init()) THEN
3269 100 : IF (para_env%mepos == 0) THEN
3270 50 : CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
3271 : END IF
3272 100 : CALL init(m_max, unit_id, para_env%mepos, para_env)
3273 100 : IF (para_env%mepos == 0) THEN
3274 50 : CALL close_file(unit_id)
3275 : END IF
3276 : END IF
3277 : END IF
3278 :
3279 908 : CALL init_md_ftable(nmax=m_max)
3280 :
3281 908 : CALL cp_libint_init_2eri(lib, MAX(maxli, maxlj))
3282 908 : CALL cp_libint_set_contrdepth(lib, 1)
3283 :
3284 908 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
3285 26850 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
3286 :
3287 : CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, &
3288 25942 : iatom=iatom, jatom=jatom, r=rij, cell=cell_j)
3289 25942 : IF (do_kpoints_prv) THEN
3290 34674 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
3291 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
3292 4930 : img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
3293 4930 : IF (img > nimg .OR. img < 1) CYCLE
3294 : ELSE
3295 : img = 1
3296 : END IF
3297 :
3298 : CALL get_gto_basis_set(basis_i(ikind)%gto_basis_set, first_sgf=first_sgf_i, lmax=lmax_i, lmin=lmin_i, &
3299 : npgf=npgfi, nset=nseti, nsgf_set=nsgfi, pgf_radius=rpgf_i, set_radius=set_radius_i, &
3300 25860 : sphi=sphi_i, zet=zeti)
3301 :
3302 : CALL get_gto_basis_set(basis_j(jkind)%gto_basis_set, first_sgf=first_sgf_j, lmax=lmax_j, lmin=lmin_j, &
3303 : npgf=npgfj, nset=nsetj, nsgf_set=nsgfj, pgf_radius=rpgf_j, set_radius=set_radius_j, &
3304 25860 : sphi=sphi_j, zet=zetj)
3305 :
3306 25860 : IF (do_symmetric) THEN
3307 22706 : IF (iatom <= jatom) THEN
3308 13668 : irow = iatom
3309 13668 : icol = jatom
3310 : ELSE
3311 9038 : irow = jatom
3312 9038 : icol = iatom
3313 : END IF
3314 : ELSE
3315 3154 : irow = iatom
3316 3154 : icol = jatom
3317 : END IF
3318 :
3319 103440 : dab = NORM2(rij)
3320 :
3321 : CALL dbcsr_get_block_p(matrix=t2c(img), &
3322 25860 : row=irow, col=icol, BLOCK=block_t%block, found=found)
3323 25860 : CPASSERT(found)
3324 25860 : trans = do_symmetric .AND. (iatom > jatom)
3325 :
3326 100744 : DO iset = 1, nseti
3327 :
3328 73976 : ncoi = npgfi(iset)*ncoset(lmax_i(iset))
3329 73976 : sgfi = first_sgf_i(1, iset)
3330 :
3331 484451 : DO jset = 1, nsetj
3332 :
3333 384565 : ncoj = npgfj(jset)*ncoset(lmax_j(jset))
3334 384565 : sgfj = first_sgf_j(1, jset)
3335 :
3336 458541 : IF (ncoi*ncoj > 0) THEN
3337 1538260 : ALLOCATE (sij_contr(nsgfi(iset), nsgfj(jset)))
3338 76490739 : sij_contr(:, :) = 0.0_dp
3339 :
3340 1538260 : ALLOCATE (sij(ncoi, ncoj))
3341 199917867 : sij(:, :) = 0.0_dp
3342 :
3343 384565 : ri = 0.0_dp
3344 384565 : rj = rij
3345 :
3346 : CALL eri_2center(sij, lmin_i(iset), lmax_i(iset), npgfi(iset), zeti(:, iset), &
3347 : rpgf_i(:, iset), ri, lmin_j(jset), lmax_j(jset), npgfj(jset), zetj(:, jset), &
3348 384565 : rpgf_j(:, jset), rj, dab, lib, potential_parameter)
3349 :
3350 : CALL ab_contract(sij_contr, sij, &
3351 : sphi_i(:, sgfi:), sphi_j(:, sgfj:), &
3352 384565 : ncoi, ncoj, nsgfi(iset), nsgfj(jset))
3353 :
3354 384565 : DEALLOCATE (sij)
3355 384565 : IF (trans) THEN
3356 479264 : ALLOCATE (sij_rs(nsgfj(jset), nsgfi(iset)))
3357 29082749 : sij_rs(:, :) = TRANSPOSE(sij_contr)
3358 : ELSE
3359 1058996 : ALLOCATE (sij_rs(nsgfi(iset), nsgfj(jset)))
3360 47335804 : sij_rs(:, :) = sij_contr
3361 : END IF
3362 :
3363 384565 : DEALLOCATE (sij_contr)
3364 :
3365 : ! RI regularization
3366 : IF (.NOT. do_hfx_kpoints_prv .AND. PRESENT(regularization_RI) .AND. &
3367 : iatom == jatom .AND. iset == jset .AND. &
3368 384565 : cell_j(1) == 0 .AND. cell_j(2) == 0 .AND. cell_j(3) == 0) THEN
3369 12174 : DO i_diag = 1, nsgfi(iset)
3370 26346 : min_zet = MINVAL(zeti(:, iset))
3371 8782 : CPASSERT(min_zet > 1.0E-10_dp)
3372 : sij_rs(i_diag, i_diag) = sij_rs(i_diag, i_diag) + &
3373 12174 : regularization_RI*MAX(1.0_dp, 1.0_dp/min_zet)
3374 : END DO
3375 : END IF
3376 :
3377 : CALL block_add("IN", sij_rs, &
3378 : nsgfi(iset), nsgfj(jset), block_t%block, &
3379 384565 : sgfi, sgfj, trans=trans)
3380 384565 : DEALLOCATE (sij_rs)
3381 :
3382 : END IF
3383 : END DO
3384 : END DO
3385 : END DO
3386 :
3387 908 : CALL cp_libint_cleanup_2eri(lib)
3388 :
3389 908 : CALL neighbor_list_iterator_release(nl_iterator)
3390 6834 : DO img = 1, nimg
3391 5926 : CALL dbcsr_finalize(t2c(img))
3392 6834 : CALL dbcsr_filter(t2c(img), filter_eps)
3393 : END DO
3394 :
3395 908 : CALL timestop(handle)
3396 :
3397 1816 : END SUBROUTINE build_2c_integrals
3398 :
3399 : ! **************************************************************************************************
3400 : !> \brief ...
3401 : !> \param tensor tensor with data. Data is cleared after compression.
3402 : !> \param blk_indices ...
3403 : !> \param compressed compressed tensor data
3404 : !> \param eps all entries < eps are discarded
3405 : !> \param memory ...
3406 : ! **************************************************************************************************
3407 20870 : SUBROUTINE compress_tensor(tensor, blk_indices, compressed, eps, memory)
3408 : TYPE(dbt_type), INTENT(INOUT) :: tensor
3409 : INTEGER, ALLOCATABLE, DIMENSION(:, :), &
3410 : INTENT(INOUT) :: blk_indices
3411 : TYPE(hfx_compression_type), INTENT(INOUT) :: compressed
3412 : REAL(dp), INTENT(IN) :: eps
3413 : REAL(dp), INTENT(INOUT) :: memory
3414 :
3415 : INTEGER :: buffer_left, buffer_size, buffer_start, &
3416 : i, iblk, memory_usage, nbits, nblk, &
3417 : nints, offset, shared_offset
3418 : INTEGER(int_8) :: estimate_to_store_int, &
3419 : storage_counter_integrals
3420 : INTEGER, DIMENSION(3) :: ind
3421 : LOGICAL :: found
3422 : REAL(dp) :: spherical_estimate
3423 20870 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: blk_data
3424 20870 : REAL(dp), DIMENSION(:), POINTER :: blk_data_1d
3425 : TYPE(dbt_iterator_type) :: iter
3426 20870 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
3427 : TYPE(hfx_cache_type), POINTER :: maxval_cache
3428 20870 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
3429 : TYPE(hfx_container_type), POINTER :: maxval_container
3430 :
3431 20870 : CALL dealloc_containers(compressed, memory_usage)
3432 20870 : CALL alloc_containers(compressed, 1)
3433 :
3434 20870 : maxval_container => compressed%maxval_container(1)
3435 20870 : integral_containers => compressed%integral_containers(:, 1)
3436 :
3437 20870 : CALL hfx_init_container(maxval_container, memory_usage, .FALSE.)
3438 1356550 : DO i = 1, 64
3439 1356550 : CALL hfx_init_container(integral_containers(i), memory_usage, .FALSE.)
3440 : END DO
3441 :
3442 20870 : maxval_cache => compressed%maxval_cache(1)
3443 20870 : integral_caches => compressed%integral_caches(:, 1)
3444 :
3445 20870 : IF (ALLOCATED(blk_indices)) DEALLOCATE (blk_indices)
3446 55485 : ALLOCATE (blk_indices(dbt_get_num_blocks(tensor), 3))
3447 20870 : shared_offset = 0
3448 : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor,blk_indices,shared_offset) &
3449 20870 : !$OMP PRIVATE(iter,ind,offset,nblk,iblk)
3450 : CALL dbt_iterator_start(iter, tensor)
3451 : nblk = dbt_iterator_num_blocks(iter)
3452 : !$OMP CRITICAL
3453 : offset = shared_offset
3454 : shared_offset = shared_offset + nblk
3455 : !$OMP END CRITICAL
3456 : DO iblk = 1, nblk
3457 : CALL dbt_iterator_next_block(iter, ind)
3458 : blk_indices(offset + iblk, :) = ind(:)
3459 : END DO
3460 : CALL dbt_iterator_stop(iter)
3461 : !$OMP END PARALLEL
3462 :
3463 : ! Can not use the tensor iterator here because the order of the blocks is not guaranteed.
3464 315126 : DO i = 1, SIZE(blk_indices, 1)
3465 1177024 : ind = blk_indices(i, :)
3466 294256 : CALL dbt_get_block(tensor, ind, blk_data, found)
3467 294256 : CPASSERT(found)
3468 1177024 : nints = SIZE(blk_data)
3469 294256 : blk_data_1d(1:nints) => blk_data
3470 143160217 : spherical_estimate = MAXVAL(ABS(blk_data_1d))
3471 294256 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
3472 294256 : estimate_to_store_int = EXPONENT(spherical_estimate)
3473 294256 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
3474 :
3475 : CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
3476 : maxval_cache, maxval_container, memory_usage, &
3477 294256 : .FALSE.)
3478 :
3479 294256 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
3480 :
3481 294256 : nbits = EXPONENT(ANINT(spherical_estimate/eps)) + 1
3482 294256 : IF (nbits > 64) THEN
3483 : CALL cp_abort(__LOCATION__, &
3484 0 : "Overflow during tensor compression. Please use a larger EPS_FILTER or EPS_STORAGE_SCALING")
3485 : END IF
3486 :
3487 : buffer_left = nints
3488 : buffer_start = 1
3489 :
3490 645931 : DO WHILE (buffer_left > 0)
3491 351675 : buffer_size = MIN(buffer_left, cache_size)
3492 : CALL hfx_add_mult_cache_elements(blk_data_1d(buffer_start:), &
3493 : buffer_size, nbits, &
3494 : integral_caches(nbits), &
3495 : integral_containers(nbits), &
3496 : eps, 1.0_dp, &
3497 : memory_usage, &
3498 351675 : .FALSE.)
3499 351675 : buffer_left = buffer_left - buffer_size
3500 351675 : buffer_start = buffer_start + buffer_size
3501 : END DO
3502 :
3503 609382 : NULLIFY (blk_data_1d); DEALLOCATE (blk_data)
3504 : END DO
3505 :
3506 20870 : CALL dbt_clear(tensor)
3507 :
3508 20870 : storage_counter_integrals = memory_usage*cache_size
3509 20870 : memory = memory + REAL(storage_counter_integrals, dp)/1024/128
3510 : !WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
3511 : ! "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]: ", memory
3512 :
3513 : CALL hfx_flush_last_cache(6, maxval_cache, maxval_container, memory_usage, &
3514 20870 : .FALSE.)
3515 1356550 : DO i = 1, 64
3516 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
3517 1356550 : memory_usage, .FALSE.)
3518 : END DO
3519 :
3520 20870 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_usage, .FALSE.)
3521 1356550 : DO i = 1, 64
3522 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
3523 1356550 : memory_usage, .FALSE.)
3524 : END DO
3525 :
3526 41740 : END SUBROUTINE
3527 :
3528 : ! **************************************************************************************************
3529 : !> \brief ...
3530 : !> \param tensor empty tensor which is filled by decompressed data
3531 : !> \param blk_indices indices of blocks to be reserved
3532 : !> \param compressed compressed data
3533 : !> \param eps all entries < eps are discarded
3534 : ! **************************************************************************************************
3535 61068 : SUBROUTINE decompress_tensor(tensor, blk_indices, compressed, eps)
3536 :
3537 : TYPE(dbt_type), INTENT(INOUT) :: tensor
3538 : INTEGER, DIMENSION(:, :) :: blk_indices
3539 : TYPE(hfx_compression_type), INTENT(INOUT) :: compressed
3540 : REAL(dp), INTENT(IN) :: eps
3541 :
3542 : INTEGER :: A, b, buffer_left, buffer_size, &
3543 : buffer_start, i, memory_usage, nbits, &
3544 : nblk_per_thread, nints
3545 : INTEGER(int_8) :: estimate_to_store_int
3546 : INTEGER, DIMENSION(3) :: blk_size, ind
3547 : REAL(dp) :: spherical_estimate
3548 61068 : REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET :: blk_data
3549 61068 : REAL(dp), DIMENSION(:, :, :), POINTER :: blk_data_3d
3550 61068 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
3551 : TYPE(hfx_cache_type), POINTER :: maxval_cache
3552 61068 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
3553 : TYPE(hfx_container_type), POINTER :: maxval_container
3554 :
3555 61068 : maxval_cache => compressed%maxval_cache(1)
3556 61068 : maxval_container => compressed%maxval_container(1)
3557 61068 : integral_caches => compressed%integral_caches(:, 1)
3558 61068 : integral_containers => compressed%integral_containers(:, 1)
3559 :
3560 61068 : memory_usage = 0
3561 :
3562 61068 : CALL hfx_decompress_first_cache(6, maxval_cache, maxval_container, memory_usage, .FALSE.)
3563 :
3564 3969420 : DO i = 1, 64
3565 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
3566 3969420 : memory_usage, .FALSE.)
3567 : END DO
3568 :
3569 : !TODO: Parallelize creation of block list.
3570 61068 : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor,blk_indices) PRIVATE(nblk_per_thread,A,b)
3571 : nblk_per_thread = SIZE(blk_indices, 1)/omp_get_num_threads() + 1
3572 : a = omp_get_thread_num()*nblk_per_thread + 1
3573 : b = MIN(a + nblk_per_thread, SIZE(blk_indices, 1))
3574 : CALL dbt_reserve_blocks(tensor, blk_indices(a:b, :))
3575 : !$OMP END PARALLEL
3576 :
3577 : ! Can not use the tensor iterator here because the order of the blocks is not guaranteed.
3578 1227609 : DO i = 1, SIZE(blk_indices, 1)
3579 4666164 : ind = blk_indices(i, :)
3580 1166541 : CALL dbt_blk_sizes(tensor, ind, blk_size)
3581 4666164 : nints = PRODUCT(blk_size)
3582 : CALL hfx_get_single_cache_element( &
3583 : estimate_to_store_int, 6, &
3584 : maxval_cache, maxval_container, memory_usage, &
3585 1166541 : .FALSE.)
3586 :
3587 1166541 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
3588 :
3589 1166541 : nbits = EXPONENT(ANINT(spherical_estimate/eps)) + 1
3590 :
3591 1166541 : buffer_left = nints
3592 1166541 : buffer_start = 1
3593 :
3594 3499623 : ALLOCATE (blk_data(nints))
3595 2448223 : DO WHILE (buffer_left > 0)
3596 1281682 : buffer_size = MIN(buffer_left, cache_size)
3597 : CALL hfx_get_mult_cache_elements(blk_data(buffer_start), &
3598 : buffer_size, nbits, &
3599 : integral_caches(nbits), &
3600 : integral_containers(nbits), &
3601 : eps, 1.0_dp, &
3602 : memory_usage, &
3603 1281682 : .FALSE.)
3604 1281682 : buffer_left = buffer_left - buffer_size
3605 1281682 : buffer_start = buffer_start + buffer_size
3606 : END DO
3607 :
3608 1166541 : blk_data_3d(1:blk_size(1), 1:blk_size(2), 1:blk_size(3)) => blk_data
3609 1166541 : CALL dbt_put_block(tensor, ind, blk_size, blk_data_3d)
3610 1227609 : NULLIFY (blk_data_3d); DEALLOCATE (blk_data)
3611 : END DO
3612 :
3613 61068 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_usage, .FALSE.)
3614 3969420 : DO i = 1, 64
3615 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
3616 3969420 : memory_usage, .FALSE.)
3617 : END DO
3618 122136 : END SUBROUTINE
3619 :
3620 : ! **************************************************************************************************
3621 : !> \brief ...
3622 : !> \param tensor ...
3623 : !> \param nze ...
3624 : !> \param occ ...
3625 : ! **************************************************************************************************
3626 113501 : SUBROUTINE get_tensor_occupancy(tensor, nze, occ)
3627 : TYPE(dbt_type), INTENT(IN) :: tensor
3628 : INTEGER(int_8), INTENT(OUT) :: nze
3629 : REAL(dp), INTENT(OUT) :: occ
3630 :
3631 227002 : INTEGER, DIMENSION(dbt_ndims(tensor)) :: dims
3632 :
3633 113501 : nze = dbt_get_nze_total(tensor)
3634 113501 : CALL dbt_get_info(tensor, nfull_total=dims)
3635 437238 : occ = REAL(nze, dp)/PRODUCT(REAL(dims, dp))
3636 :
3637 113501 : END SUBROUTINE
3638 :
3639 0 : END MODULE
|