Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate derivatives with respect to basis function origin
10 : !> \par History
11 : !> 04.2008 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_derivatives
15 : USE admm_types, ONLY: admm_type
16 : USE atomic_kind_types, ONLY: atomic_kind_type, &
17 : get_atomic_kind_set
18 : USE cell_types, ONLY: cell_type, &
19 : pbc
20 : USE cp_control_types, ONLY: dft_control_type
21 : USE cp_files, ONLY: close_file, &
22 : open_file
23 : USE cp_log_handling, ONLY: cp_get_default_logger, &
24 : cp_logger_type
25 : USE cp_output_handling, ONLY: cp_p_file, &
26 : cp_print_key_finished_output, &
27 : cp_print_key_should_output, &
28 : cp_print_key_unit_nr
29 : USE message_passing, ONLY: mp_para_env_type
30 : USE cp_dbcsr_api, ONLY: dbcsr_get_matrix_type, &
31 : dbcsr_p_type, &
32 : dbcsr_type_antisymmetric
33 : USE gamma, ONLY: init_md_ftable
34 : USE hfx_communication, ONLY: get_full_density
35 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
36 : hfx_add_single_cache_element, &
37 : hfx_decompress_first_cache, &
38 : hfx_flush_last_cache, &
39 : hfx_get_mult_cache_elements, &
40 : hfx_get_single_cache_element, &
41 : hfx_reset_cache_and_container
42 : USE hfx_libint_interface, ONLY: evaluate_deriv_eri
43 : USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
44 : hfx_load_balance, &
45 : hfx_update_load_balance
46 : USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
47 : build_pair_list, &
48 : build_pair_list_pgf, &
49 : build_pgf_product_list, &
50 : pgf_product_list_size
51 : USE hfx_screening_methods, ONLY: update_pmax_mat
52 : USE hfx_types, ONLY: &
53 : alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
54 : hfx_cell_type, hfx_container_type, hfx_distribution, hfx_general_type, hfx_init_container, &
55 : hfx_load_balance_type, hfx_memory_type, hfx_p_kind, hfx_pgf_list, hfx_pgf_product_list, &
56 : hfx_potential_type, hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, &
57 : hfx_type, init_t_c_g0_lmax, log_zero, pair_list_type, pair_set_list_type
58 : USE input_constants, ONLY: do_potential_mix_cl_trunc, &
59 : do_potential_truncated, &
60 : hfx_do_eval_forces
61 : USE input_section_types, ONLY: section_vals_type
62 : USE kinds, ONLY: dp, &
63 : int_8
64 : USE libint_wrapper, ONLY: cp_libint_t
65 : USE machine, ONLY: m_walltime
66 : USE mathconstants, ONLY: fac
67 : USE orbital_pointers, ONLY: nco, &
68 : ncoset, &
69 : nso
70 : USE particle_types, ONLY: particle_type
71 : USE qs_environment_types, ONLY: get_qs_env, &
72 : qs_environment_type
73 : USE qs_force_types, ONLY: qs_force_type
74 : USE t_c_g0, ONLY: init
75 : USE util, ONLY: sort
76 : USE virial_types, ONLY: virial_type
77 :
78 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
79 :
80 : #include "./base/base_uses.f90"
81 :
82 : IMPLICIT NONE
83 : PRIVATE
84 :
85 : PUBLIC :: derivatives_four_center
86 :
87 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
88 :
89 : !***
90 :
91 : CONTAINS
92 :
93 : ! **************************************************************************************************
94 : !> \brief computes four center derivatives for a full basis set and updates the
95 : !> forces%fock_4c arrays. Uses all 8 eri symmetries
96 : !> \param qs_env ...
97 : !> \param rho_ao density matrix
98 : !> \param rho_ao_resp relaxed density matrix from response
99 : !> \param hfx_section HFX input section
100 : !> \param para_env para_env
101 : !> \param irep ID of HFX replica
102 : !> \param use_virial ...
103 : !> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
104 : !> \param resp_only Calculate only forces from response density
105 : !> \par History
106 : !> 06.2007 created [Manuel Guidon]
107 : !> 08.2007 optimized load balance [Manuel Guidon]
108 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
109 : !> 12.2017 major bug fix. removed wrong cycle that was causing segfault.
110 : !> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
111 : !> [Tobias Binninger + Valery Weber]
112 : !> \author Manuel Guidon
113 : ! **************************************************************************************************
114 1788 : SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
115 : irep, use_virial, adiabatic_rescale_factor, resp_only, &
116 : external_x_data)
117 :
118 : TYPE(qs_environment_type), POINTER :: qs_env
119 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
120 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: rho_ao_resp
121 : TYPE(section_vals_type), POINTER :: hfx_section
122 : TYPE(mp_para_env_type), POINTER :: para_env
123 : INTEGER, INTENT(IN) :: irep
124 : LOGICAL, INTENT(IN) :: use_virial
125 : REAL(dp), INTENT(IN), OPTIONAL :: adiabatic_rescale_factor
126 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
127 : TYPE(hfx_type), DIMENSION(:, :), POINTER, OPTIONAL :: external_x_data
128 :
129 : CHARACTER(LEN=*), PARAMETER :: routineN = 'derivatives_four_center'
130 :
131 : INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
132 : bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
133 : forces_map(4, 2), handle, handle_bin, handle_getP, handle_load, handle_main, i, i_atom, &
134 : i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
135 : i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
136 : iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
137 : jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
138 : INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
139 : latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
140 : n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
141 : nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
142 : sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
143 : sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
144 : INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
145 : mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
146 : n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
147 : nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
148 : shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
149 : storage_counter_integrals, tmp_block, tmp_i8(6)
150 1788 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
151 1788 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
152 1788 : nimages, tmp_index
153 1788 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
154 3576 : ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
155 3576 : INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
156 1788 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
157 : INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
158 1788 : INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
159 : INTEGER, SAVE :: shm_number_of_p_entries
160 : LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
161 : do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
162 : treat_forces_in_core, use_disk_storage, with_resp_density
163 1788 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
164 : REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
165 : compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
166 : log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
167 : my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
168 : rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
169 : tmp_virial(3, 3)
170 1788 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
171 1788 : ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
172 3576 : ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
173 1788 : pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
174 1788 : pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
175 1788 : REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET :: primitive_forces, primitive_forces_virial
176 1788 : REAL(dp), DIMENSION(:), POINTER :: full_density_resp, &
177 3576 : full_density_resp_beta, T2
178 1788 : REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
179 1788 : max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
180 1788 : sphi_b, zeta, zetb, zetc, zetd
181 1788 : REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
182 1788 : sphi_c_ext_set, sphi_d_ext_set
183 1788 : REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
184 1788 : sphi_d_ext
185 : REAL(KIND=dp) :: coeffs_kind_max0
186 : TYPE(admm_type), POINTER :: admm_env
187 1788 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
188 : TYPE(cell_type), POINTER :: cell
189 : TYPE(cp_libint_t) :: private_deriv
190 : TYPE(cp_logger_type), POINTER :: logger
191 : TYPE(dft_control_type), POINTER :: dft_control
192 : TYPE(hfx_basis_info_type), POINTER :: basis_info
193 1788 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
194 1788 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
195 : TYPE(hfx_cache_type), POINTER :: maxval_cache
196 1788 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
197 : TYPE(hfx_container_type), POINTER :: maxval_container
198 : TYPE(hfx_distribution), POINTER :: distribution_forces
199 : TYPE(hfx_general_type) :: general_parameter
200 : TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
201 : TYPE(hfx_memory_type), POINTER :: memory_parameter
202 1788 : TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
203 1788 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
204 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
205 1788 : DIMENSION(:) :: pgf_product_list
206 : TYPE(hfx_potential_type) :: potential_parameter
207 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
208 1788 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
209 1788 : tmp_screen_pgf1, tmp_screen_pgf2
210 : TYPE(hfx_screen_coeff_type), &
211 1788 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
212 : TYPE(hfx_screen_coeff_type), &
213 1788 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
214 : TYPE(hfx_screening_type) :: screening_parameter
215 1788 : TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
216 : TYPE(hfx_type), POINTER :: actual_x_data
217 1788 : TYPE(hfx_type), DIMENSION(:, :), POINTER :: my_x_data
218 : TYPE(pair_list_type) :: list_ij, list_kl
219 : TYPE(pair_set_list_type), ALLOCATABLE, &
220 1788 : DIMENSION(:) :: set_list_ij, set_list_kl
221 1788 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
222 1788 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
223 : TYPE(virial_type), POINTER :: virial
224 :
225 1788 : NULLIFY (dft_control, admm_env)
226 :
227 1788 : with_resp_density = .FALSE.
228 622 : IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .TRUE.
229 :
230 1788 : my_resp_only = .FALSE.
231 1788 : IF (PRESENT(resp_only)) my_resp_only = resp_only
232 :
233 1788 : is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
234 :
235 : CALL get_qs_env(qs_env, &
236 : admm_env=admm_env, &
237 : particle_set=particle_set, &
238 : atomic_kind_set=atomic_kind_set, &
239 : cell=cell, &
240 1788 : dft_control=dft_control)
241 :
242 1788 : nspins = dft_control%nspins
243 1788 : nkimages = dft_control%nimages
244 1788 : CPASSERT(nkimages == 1)
245 :
246 1788 : my_x_data => qs_env%x_data
247 1788 : IF (PRESENT(external_x_data)) my_x_data => external_x_data
248 :
249 : !! One atom systems have no contribution to forces
250 1788 : IF (SIZE(particle_set, 1) == 1) THEN
251 4 : IF (.NOT. use_virial) RETURN
252 : END IF
253 :
254 1784 : CALL timeset(routineN, handle)
255 : !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
256 1784 : nkind = SIZE(atomic_kind_set, 1)
257 1784 : l_max = 0
258 5106 : DO ikind = 1, nkind
259 14080 : l_max = MAX(l_max, MAXVAL(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
260 : END DO
261 1784 : l_max = 4*l_max + 1
262 1784 : CALL init_md_ftable(l_max)
263 :
264 1784 : IF (my_x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
265 : my_x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
266 720 : IF (l_max > init_t_c_g0_lmax) THEN
267 194 : CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
268 194 : CALL init(l_max, unit_id, para_env%mepos, para_env)
269 194 : CALL close_file(unit_id)
270 194 : init_t_c_g0_lmax = l_max
271 : END IF
272 : END IF
273 :
274 1784 : n_threads = 1
275 1784 : !$ n_threads = omp_get_max_threads()
276 :
277 1784 : shm_neris_total = 0
278 1784 : shm_nprim_ints = 0
279 1784 : shm_neris_onthefly = 0
280 1784 : shm_storage_counter_integrals = 0
281 1784 : shm_neris_incore = 0
282 1784 : shm_stor_count_max_val = 0
283 :
284 : !! get force array
285 1784 : CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
286 :
287 1784 : my_adiabatic_rescale_factor = 1.0_dp
288 1784 : IF (PRESENT(adiabatic_rescale_factor)) THEN
289 208 : my_adiabatic_rescale_factor = adiabatic_rescale_factor
290 : END IF
291 :
292 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
293 : !$OMP SHARED(qs_env,&
294 : !$OMP rho_ao,&
295 : !$OMP rho_ao_resp,&
296 : !$OMP with_resp_density,&
297 : !$OMP hfx_section,&
298 : !$OMP para_env,&
299 : !$OMP irep,&
300 : !$OMP my_resp_only,&
301 : !$OMP ncoset,&
302 : !$OMP nco,&
303 : !$OMP nso,&
304 : !$OMP n_threads,&
305 : !$OMP full_density_alpha,&
306 : !$OMP full_density_resp,&
307 : !$OMP full_density_resp_beta,&
308 : !$OMP full_density_beta,&
309 : !$OMP shm_initial_p,&
310 : !$OMP shm_is_assoc_atomic_block,&
311 : !$OMP shm_number_of_p_entries,&
312 : !$OMP force,&
313 : !$OMP virial,&
314 : !$OMP my_adiabatic_rescale_factor,&
315 : !$OMP shm_neris_total,&
316 : !$OMP shm_neris_onthefly,&
317 : !$OMP shm_storage_counter_integrals,&
318 : !$OMP shm_neris_incore,&
319 : !$OMP shm_nprim_ints,&
320 : !$OMP shm_stor_count_max_val,&
321 : !$OMP cell,&
322 : !$OMP screen_coeffs_set,&
323 : !$OMP screen_coeffs_kind,&
324 : !$OMP screen_coeffs_pgf,&
325 : !$OMP pgf_product_list_size,&
326 : !$OMP nkind,&
327 : !$OMP is_anti_symmetric,&
328 : !$OMP radii_pgf,&
329 : !$OMP shm_atomic_block_offset,&
330 : !$OMP shm_set_offset,&
331 : !$OMP shm_block_offset,&
332 : !$OMP shm_task_counter,&
333 : !$OMP shm_task_list,&
334 : !$OMP shm_total_bins,&
335 : !$OMP shm_pmax_block,&
336 : !$OMP shm_atomic_pair_list,&
337 : !$OMP shm_pmax_atom,&
338 : !$OMP use_virial,&
339 : !$OMP do_print_load_balance_info,&
340 : !$OMP nkimages,nspins,my_x_data)&
341 : !$OMP PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
342 : !$OMP atomic_offset_bd,atom_of_kind,basis_info,&
343 : !$OMP basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
344 : !$OMP buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
345 : !$OMP coeffs_kind_max0,compression_factor,counter,current_counter,&
346 : !$OMP distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
347 : !$OMP ede_primitives_tmp,ede_primitives_tmp_virial,&
348 : !$OMP ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
349 : !$OMP fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
350 : !$OMP handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
351 : !$OMP i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
352 : !$OMP jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
353 : !$OMP kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
354 : !$OMP latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
355 : !$OMP ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
356 : !$OMP log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
357 : !$OMP max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
358 : !$OMP mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
359 : !$OMP nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
360 : !$OMP neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
361 : !$OMP nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
362 : !$OMP nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
363 : !$OMP offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
364 : !$OMP pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
365 : !$OMP pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
366 : !$OMP pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
367 : !$OMP primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
368 : !$OMP rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
369 : !$OMP sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
370 : !$OMP sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
371 : !$OMP sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
372 : !$OMP storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
373 : !$OMP tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
374 1784 : !$OMP treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
375 :
376 : i_thread = 0
377 : !$ i_thread = omp_get_thread_num()
378 :
379 : ln_10 = LOG(10.0_dp)
380 : log_2 = LOG10(2.0_dp)
381 : actual_x_data => my_x_data(irep, i_thread + 1)
382 : do_periodic = actual_x_data%periodic_parameter%do_periodic
383 :
384 : screening_parameter = actual_x_data%screening_parameter
385 : general_parameter = actual_x_data%general_parameter
386 : potential_parameter = actual_x_data%potential_parameter
387 : basis_info => actual_x_data%basis_info
388 :
389 : load_balance_parameter => actual_x_data%load_balance_parameter
390 : basis_parameter => actual_x_data%basis_parameter
391 :
392 : ! IF(with_resp_density) THEN
393 : ! ! here we also do a copy of the original load_balance_parameter
394 : ! ! since if MP2 forces are required after the calculation of the HFX
395 : ! ! derivatives we need to calculate again the SCF energy.
396 : ! ALLOCATE(load_balance_parameter_energy)
397 : ! load_balance_parameter_energy = actual_x_data%load_balance_parameter
398 : ! END IF
399 :
400 : memory_parameter => actual_x_data%memory_parameter
401 :
402 : cache_size = memory_parameter%cache_size
403 : bits_max_val = memory_parameter%bits_max_val
404 :
405 : use_disk_storage = .FALSE.
406 :
407 : CALL get_qs_env(qs_env=qs_env, &
408 : atomic_kind_set=atomic_kind_set, &
409 : particle_set=particle_set)
410 :
411 : max_set = basis_info%max_set
412 : max_am = basis_info%max_am
413 : natom = SIZE(particle_set, 1)
414 :
415 : treat_forces_in_core = memory_parameter%treat_forces_in_core
416 :
417 : hf_fraction = general_parameter%fraction
418 : hf_fraction = hf_fraction*my_adiabatic_rescale_factor
419 : eps_schwarz = screening_parameter%eps_schwarz_forces
420 : IF (eps_schwarz < 0.0_dp) THEN
421 : log10_eps_schwarz = log_zero
422 : ELSE
423 : !! ** make eps_schwarz tighter in case of a stress calculation
424 : IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
425 : log10_eps_schwarz = LOG10(eps_schwarz)
426 : END IF
427 : screen_pmat_forces = screening_parameter%do_p_screening_forces
428 : ! don't do density screening in case of response forces
429 : IF (with_resp_density) screen_pmat_forces = .FALSE.
430 :
431 : eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
432 :
433 : counter = 0_int_8
434 :
435 : !! Copy the libint_guy
436 : private_deriv = actual_x_data%lib_deriv
437 :
438 : !! Get screening parameter
439 :
440 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
441 : atom_of_kind=atom_of_kind, &
442 : kind_of=kind_of)
443 :
444 : !! Create helper arrray for mapping local basis functions to global ones
445 : ALLOCATE (last_sgf_global(0:natom))
446 : last_sgf_global(0) = 0
447 : DO iatom = 1, natom
448 : ikind = kind_of(iatom)
449 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
450 : END DO
451 :
452 : ALLOCATE (max_contraction(max_set, natom))
453 :
454 : max_contraction = 0.0_dp
455 : max_pgf = 0
456 : DO jatom = 1, natom
457 : jkind = kind_of(jatom)
458 : lb_max => basis_parameter(jkind)%lmax
459 : npgfb => basis_parameter(jkind)%npgf
460 : nsetb = basis_parameter(jkind)%nset
461 : first_sgfb => basis_parameter(jkind)%first_sgf
462 : sphi_b => basis_parameter(jkind)%sphi
463 : nsgfb => basis_parameter(jkind)%nsgf
464 : DO jset = 1, nsetb
465 : ! takes the primitive to contracted transformation into account
466 : ncob = npgfb(jset)*ncoset(lb_max(jset))
467 : sgfb = first_sgfb(1, jset)
468 : ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
469 : ! the maximum value after multiplication with sphi_b
470 : max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
471 : max_pgf = MAX(max_pgf, npgfb(jset))
472 : END DO
473 : END DO
474 :
475 : ncpu = para_env%num_pe
476 : n_processes = ncpu*n_threads
477 :
478 : !! initialize some counters
479 : neris_total = 0_int_8
480 : neris_incore = 0_int_8
481 : neris_onthefly = 0_int_8
482 : mem_eris = 0_int_8
483 : mem_max_val = 0_int_8
484 : compression_factor = 0.0_dp
485 : nprim_ints = 0_int_8
486 : neris_tmp = 0_int_8
487 : max_val_memory = 1_int_8
488 :
489 : !$OMP MASTER
490 : !! Set pointer for is_assoc helper
491 : shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
492 : shm_number_of_p_entries = actual_x_data%number_of_p_entries
493 : shm_atomic_block_offset => actual_x_data%atomic_block_offset
494 : shm_set_offset => actual_x_data%set_offset
495 : shm_block_offset => actual_x_data%block_offset
496 :
497 : NULLIFY (full_density_alpha)
498 : NULLIFY (full_density_beta)
499 : ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
500 :
501 : !! Get the full density from all the processors
502 : CALL timeset(routineN//"_getP", handle_getP)
503 : CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
504 : shm_block_offset, &
505 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
506 : antisymmetric=is_anti_symmetric)
507 : ! for now only closed shell case
508 : IF (with_resp_density) THEN
509 : NULLIFY (full_density_resp)
510 : ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
511 : CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
512 : shm_block_offset, &
513 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
514 : antisymmetric=is_anti_symmetric)
515 : END IF
516 :
517 : IF (nspins == 2) THEN
518 : ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
519 : CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
520 : shm_block_offset, &
521 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
522 : antisymmetric=is_anti_symmetric)
523 : ! With resp density
524 : IF (with_resp_density) THEN
525 : NULLIFY (full_density_resp_beta)
526 : ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
527 : CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
528 : shm_block_offset, &
529 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
530 : antisymmetric=is_anti_symmetric)
531 : END IF
532 :
533 : END IF
534 : CALL timestop(handle_getP)
535 :
536 : !! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
537 : !! matrix is initialized to 1.0
538 : IF (screen_pmat_forces) THEN
539 : NULLIFY (shm_initial_p)
540 : shm_initial_p => actual_x_data%initial_p_forces
541 : shm_pmax_atom => actual_x_data%pmax_atom_forces
542 : IF (memory_parameter%recalc_forces) THEN
543 : CALL update_pmax_mat(shm_initial_p, &
544 : actual_x_data%map_atom_to_kind_atom, &
545 : actual_x_data%set_offset, &
546 : actual_x_data%atomic_block_offset, &
547 : shm_pmax_atom, &
548 : full_density_alpha, full_density_beta, &
549 : natom, kind_of, basis_parameter, &
550 : nkind, actual_x_data%is_assoc_atomic_block)
551 : END IF
552 : END IF
553 :
554 : ! restore as full density the HF density
555 : ! maybe in the future
556 : IF (with_resp_density .AND. .NOT. my_resp_only) THEN
557 : full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
558 : IF (nspins == 2) THEN
559 : full_density_beta(:, 1) = &
560 : full_density_beta(:, 1) - full_density_resp_beta
561 : END IF
562 : ! full_density_resp=full_density+full_density_resp
563 : END IF
564 :
565 : screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
566 : screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
567 : screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
568 : radii_pgf => actual_x_data%pair_dist_radii_pgf
569 : shm_pmax_block => actual_x_data%pmax_block
570 :
571 : !$OMP END MASTER
572 : !$OMP BARRIER
573 :
574 : !$OMP MASTER
575 : CALL timeset(routineN//"_load", handle_load)
576 : !$OMP END MASTER
577 : !! Load balance the work
578 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
579 : IF (actual_x_data%b_first_load_balance_forces) THEN
580 : CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
581 : screen_coeffs_set, screen_coeffs_kind, &
582 : shm_is_assoc_atomic_block, do_periodic, &
583 : load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
584 : shm_pmax_atom, i_thread, n_threads, &
585 : cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
586 : nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
587 : actual_x_data%b_first_load_balance_forces = .FALSE.
588 : ELSE
589 : CALL hfx_update_load_balance(actual_x_data, para_env, &
590 : load_balance_parameter, &
591 : i_thread, n_threads, hfx_do_eval_forces)
592 : END IF
593 : END IF
594 : !$OMP MASTER
595 : CALL timestop(handle_load)
596 : !$OMP END MASTER
597 :
598 : !! precompute maximum nco and allocate scratch
599 : ncos_max = 0
600 : nsgf_max = 0
601 : DO iatom = 1, natom
602 : ikind = kind_of(iatom)
603 : nseta = basis_parameter(ikind)%nset
604 : npgfa => basis_parameter(ikind)%npgf
605 : la_max => basis_parameter(ikind)%lmax
606 : nsgfa => basis_parameter(ikind)%nsgf
607 : DO iset = 1, nseta
608 : ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
609 : nsgf_max = MAX(nsgf_max, nsgfa(iset))
610 : END DO
611 : END DO
612 :
613 : !! Allocate work arrays
614 : ALLOCATE (primitive_forces(12*nsgf_max**4))
615 : primitive_forces = 0.0_dp
616 :
617 : ! ** Allocate buffers for pgf_lists
618 : nneighbors = SIZE(actual_x_data%neighbor_cells)
619 : ALLOCATE (pgf_list_ij(max_pgf**2))
620 : ALLOCATE (pgf_list_kl(max_pgf**2))
621 : !$OMP ATOMIC READ
622 : tmp_i4 = pgf_product_list_size
623 : ALLOCATE (pgf_product_list(tmp_i4))
624 : ALLOCATE (nimages(max_pgf**2))
625 :
626 : DO i = 1, max_pgf**2
627 : ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
628 : ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
629 : END DO
630 :
631 : ALLOCATE (pbd_buf(nsgf_max**2))
632 : ALLOCATE (pbc_buf(nsgf_max**2))
633 : ALLOCATE (pad_buf(nsgf_max**2))
634 : ALLOCATE (pac_buf(nsgf_max**2))
635 : IF (with_resp_density) THEN
636 : ALLOCATE (pbd_buf_resp(nsgf_max**2))
637 : ALLOCATE (pbc_buf_resp(nsgf_max**2))
638 : ALLOCATE (pad_buf_resp(nsgf_max**2))
639 : ALLOCATE (pac_buf_resp(nsgf_max**2))
640 : END IF
641 : IF (nspins == 2) THEN
642 : ALLOCATE (pbd_buf_beta(nsgf_max**2))
643 : ALLOCATE (pbc_buf_beta(nsgf_max**2))
644 : ALLOCATE (pad_buf_beta(nsgf_max**2))
645 : ALLOCATE (pac_buf_beta(nsgf_max**2))
646 : IF (with_resp_density) THEN
647 : ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
648 : ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
649 : ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
650 : ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
651 : END IF
652 : END IF
653 :
654 : ALLOCATE (ede_work(ncos_max**4*12))
655 : ALLOCATE (ede_work2(ncos_max**4*12))
656 : ALLOCATE (ede_work_forces(ncos_max**4*12))
657 : ALLOCATE (ede_buffer1(ncos_max**4))
658 : ALLOCATE (ede_buffer2(ncos_max**4))
659 : ALLOCATE (ede_primitives_tmp(nsgf_max**4))
660 :
661 : IF (use_virial) THEN
662 : ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
663 : primitive_forces_virial = 0.0_dp
664 : ALLOCATE (ede_work_virial(ncos_max**4*12*3))
665 : ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
666 : ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
667 : tmp_virial = 0.0_dp
668 : ELSE
669 : ! dummy allocation
670 : ALLOCATE (primitive_forces_virial(1))
671 : primitive_forces_virial = 0.0_dp
672 : ALLOCATE (ede_work_virial(1))
673 : ALLOCATE (ede_work2_virial(1))
674 : ALLOCATE (ede_primitives_tmp_virial(1))
675 : END IF
676 :
677 : !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
678 : !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
679 : !!
680 : !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
681 : !!
682 : !! by iterating in the following way
683 : !!
684 : !! DO iatom=1,natom and DO katom=1,natom
685 : !! DO jatom=iatom,natom DO latom=katom,natom
686 : !!
687 : !! The third symmetry
688 : !!
689 : !! (ab|cd) = (cd|ab)
690 : !!
691 : !! is taken into account by the following criterion:
692 : !!
693 : !! IF(katom+latom<=iatom+jatom) THEN
694 : !! IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
695 : !!
696 : !! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
697 : !!
698 : !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
699 : !! factor ( symm_fac ).
700 : !!
701 : !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
702 : !! different hierarchies of short range screening matrices.
703 : !!
704 : !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
705 : !! defined as :
706 : !!
707 : !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
708 : !!
709 : !! This tells the process where to start the main loops and how many bunches of integrals it has to
710 : !! calculate. The original parallelization is a simple modulo distribution that is binned and
711 : !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
712 : !! we need to know which was the initial cpu_id.
713 : !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
714 : !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
715 : !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
716 :
717 : !$OMP BARRIER
718 : !$OMP MASTER
719 : CALL timeset(routineN//"_main", handle_main)
720 : !$OMP END MASTER
721 : !$OMP BARRIER
722 :
723 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
724 : ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
725 : ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
726 :
727 : !$OMP BARRIER
728 :
729 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
730 : actual_x_data%distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
731 : memory_parameter%ram_counter_forces = HUGE(memory_parameter%ram_counter_forces)
732 : END IF
733 :
734 : IF (treat_forces_in_core) THEN
735 : buffer_overflow = .FALSE.
736 : ELSE
737 : buffer_overflow = .TRUE.
738 : END IF
739 :
740 : do_dynamic_load_balancing = .TRUE.
741 : IF (n_threads == 1) do_dynamic_load_balancing = .FALSE.
742 :
743 : IF (do_dynamic_load_balancing) THEN
744 : my_bin_size = SIZE(actual_x_data%distribution_forces)
745 : ELSE
746 : my_bin_size = 1
747 : END IF
748 : !! We do not need the containers if MAX_MEM = 0
749 : IF (treat_forces_in_core) THEN
750 : !! IF new md step -> reinitialize containers
751 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
752 : CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
753 : CALL alloc_containers(actual_x_data%store_forces, my_bin_size)
754 :
755 : DO bin = 1, my_bin_size
756 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
757 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
758 : CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
759 : DO i = 1, 64
760 : CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
761 : END DO
762 : END DO
763 : END IF
764 :
765 : !! Decompress the first cache for maxvals and integrals
766 : IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
767 : DO bin = 1, my_bin_size
768 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
769 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
770 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
771 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
772 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
773 : memory_parameter%actual_memory_usage, .FALSE.)
774 : DO i = 1, 64
775 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
776 : memory_parameter%actual_memory_usage, .FALSE.)
777 : END DO
778 : END DO
779 : END IF
780 : END IF
781 :
782 : !$OMP BARRIER
783 : !$OMP MASTER
784 : IF (do_dynamic_load_balancing) THEN
785 : ! ** Lets construct the task list
786 : shm_total_bins = 0
787 : DO i = 1, n_threads
788 : shm_total_bins = shm_total_bins + SIZE(my_x_data(irep, i)%distribution_forces)
789 : END DO
790 : ALLOCATE (tmp_task_list(shm_total_bins))
791 : shm_task_counter = 0
792 : DO i = 1, n_threads
793 : DO bin = 1, SIZE(my_x_data(irep, i)%distribution_forces)
794 : shm_task_counter = shm_task_counter + 1
795 : tmp_task_list(shm_task_counter)%thread_id = i
796 : tmp_task_list(shm_task_counter)%bin_id = bin
797 : tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
798 : END DO
799 : END DO
800 :
801 : ! ** Now sort the task list
802 : ALLOCATE (tmp_task_list_cost(shm_total_bins))
803 : ALLOCATE (tmp_index(shm_total_bins))
804 :
805 : DO i = 1, shm_total_bins
806 : tmp_task_list_cost(i) = tmp_task_list(i)%cost
807 : END DO
808 :
809 : CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
810 :
811 : ALLOCATE (actual_x_data%task_list(shm_total_bins))
812 :
813 : DO i = 1, shm_total_bins
814 : actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
815 : END DO
816 :
817 : shm_task_list => actual_x_data%task_list
818 : shm_task_counter = 0
819 :
820 : DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
821 : END IF
822 :
823 : !! precalculate maximum density matrix elements in blocks
824 : shm_pmax_block => actual_x_data%pmax_block
825 : actual_x_data%pmax_block = 0.0_dp
826 : IF (screen_pmat_forces) THEN
827 : DO iatom_block = 1, SIZE(actual_x_data%blocks)
828 : iatom_start = actual_x_data%blocks(iatom_block)%istart
829 : iatom_end = actual_x_data%blocks(iatom_block)%iend
830 : DO jatom_block = 1, SIZE(actual_x_data%blocks)
831 : jatom_start = actual_x_data%blocks(jatom_block)%istart
832 : jatom_end = actual_x_data%blocks(jatom_block)%iend
833 : shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
834 : END DO
835 : END DO
836 : END IF
837 : shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
838 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
839 : CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
840 : do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
841 : actual_x_data%blocks)
842 : END IF
843 :
844 : my_bin_size = SIZE(actual_x_data%distribution_forces)
845 : DO bin = 1, my_bin_size
846 : actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
847 : END DO
848 : !$OMP END MASTER
849 : !$OMP BARRIER
850 :
851 : IF (treat_forces_in_core) THEN
852 : IF (my_bin_size > 0) THEN
853 : maxval_container => actual_x_data%store_forces%maxval_container(1)
854 : maxval_cache => actual_x_data%store_forces%maxval_cache(1)
855 :
856 : integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
857 : integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
858 : END IF
859 : END IF
860 :
861 : nblocks = load_balance_parameter%nblocks
862 :
863 : bins_left = .TRUE.
864 : do_it = .TRUE.
865 : bin = 0
866 : DO WHILE (bins_left)
867 : IF (.NOT. do_dynamic_load_balancing) THEN
868 : bin = bin + 1
869 : IF (bin > my_bin_size) THEN
870 : do_it = .FALSE.
871 : bins_left = .FALSE.
872 : ELSE
873 : do_it = .TRUE.
874 : bins_left = .TRUE.
875 : distribution_forces => actual_x_data%distribution_forces(bin)
876 : END IF
877 : ELSE
878 : !$OMP CRITICAL(hfxderivatives_critical)
879 : shm_task_counter = shm_task_counter + 1
880 : IF (shm_task_counter <= shm_total_bins) THEN
881 : my_thread_id = shm_task_list(shm_task_counter)%thread_id
882 : my_bin_id = shm_task_list(shm_task_counter)%bin_id
883 : IF (treat_forces_in_core) THEN
884 : maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
885 : maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
886 : integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
887 : integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
888 : END IF
889 : distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
890 : do_it = .TRUE.
891 : bins_left = .TRUE.
892 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
893 : distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
894 : END IF
895 : counter = 0_Int_8
896 : ELSE
897 : do_it = .FALSE.
898 : bins_left = .FALSE.
899 : END IF
900 : !$OMP END CRITICAL(hfxderivatives_critical)
901 : END IF
902 :
903 : IF (.NOT. do_it) CYCLE
904 : !$OMP MASTER
905 : CALL timeset(routineN//"_bin", handle_bin)
906 : !$OMP END MASTER
907 : bintime_start = m_walltime()
908 : my_istart = distribution_forces%istart
909 : my_current_counter = 0
910 : IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
911 : my_istart == -1_int_8) my_istart = nblocks**4
912 : atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
913 : latom_block = INT(MODULO(atom_block, nblocks)) + 1
914 : tmp_block = atom_block/nblocks
915 : katom_block = INT(MODULO(tmp_block, nblocks)) + 1
916 : IF (latom_block < katom_block) CYCLE
917 : tmp_block = tmp_block/nblocks
918 : jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
919 : tmp_block = tmp_block/nblocks
920 : iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
921 : IF (jatom_block < iatom_block) CYCLE
922 : my_current_counter = my_current_counter + 1
923 : IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
924 :
925 : iatom_start = actual_x_data%blocks(iatom_block)%istart
926 : iatom_end = actual_x_data%blocks(iatom_block)%iend
927 : jatom_start = actual_x_data%blocks(jatom_block)%istart
928 : jatom_end = actual_x_data%blocks(jatom_block)%iend
929 : katom_start = actual_x_data%blocks(katom_block)%istart
930 : katom_end = actual_x_data%blocks(katom_block)%iend
931 : latom_start = actual_x_data%blocks(latom_block)%istart
932 : latom_end = actual_x_data%blocks(latom_block)%iend
933 :
934 : pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block) + &
935 : shm_pmax_block(latom_block, jatom_block), &
936 : shm_pmax_block(latom_block, iatom_block) + &
937 : shm_pmax_block(katom_block, jatom_block))
938 :
939 : IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
940 :
941 : CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
942 : jatom_start, jatom_end, &
943 : kind_of, basis_parameter, particle_set, &
944 : do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
945 : log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
946 :
947 : CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
948 : latom_start, latom_end, &
949 : kind_of, basis_parameter, particle_set, &
950 : do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
951 : log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
952 :
953 : DO i_list_ij = 1, list_ij%n_element
954 : iatom = list_ij%elements(i_list_ij)%pair(1)
955 : jatom = list_ij%elements(i_list_ij)%pair(2)
956 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
957 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
958 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
959 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
960 : ra = list_ij%elements(i_list_ij)%r1
961 : rb = list_ij%elements(i_list_ij)%r2
962 : rab2 = list_ij%elements(i_list_ij)%dist2
963 :
964 : la_max => basis_parameter(ikind)%lmax
965 : la_min => basis_parameter(ikind)%lmin
966 : npgfa => basis_parameter(ikind)%npgf
967 : nseta = basis_parameter(ikind)%nset
968 : zeta => basis_parameter(ikind)%zet
969 : nsgfa => basis_parameter(ikind)%nsgf
970 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
971 : nsgfl_a => basis_parameter(ikind)%nsgfl
972 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
973 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
974 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
975 :
976 : lb_max => basis_parameter(jkind)%lmax
977 : lb_min => basis_parameter(jkind)%lmin
978 : npgfb => basis_parameter(jkind)%npgf
979 : nsetb = basis_parameter(jkind)%nset
980 : zetb => basis_parameter(jkind)%zet
981 : nsgfb => basis_parameter(jkind)%nsgf
982 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
983 : nsgfl_b => basis_parameter(jkind)%nsgfl
984 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
985 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
986 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
987 :
988 : i_atom = atom_of_kind(iatom)
989 : forces_map(1, 1) = ikind
990 : forces_map(1, 2) = i_atom
991 : j_atom = atom_of_kind(jatom)
992 : forces_map(2, 1) = jkind
993 : forces_map(2, 2) = j_atom
994 :
995 : DO i_list_kl = 1, list_kl%n_element
996 : katom = list_kl%elements(i_list_kl)%pair(1)
997 : latom = list_kl%elements(i_list_kl)%pair(2)
998 :
999 : !All four centers equivalent => zero-contribution
1000 : !VIIRAL IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE
1001 : IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1002 : IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) CYCLE
1003 :
1004 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1005 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1006 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1007 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1008 : rc = list_kl%elements(i_list_kl)%r1
1009 : rd = list_kl%elements(i_list_kl)%r2
1010 : rcd2 = list_kl%elements(i_list_kl)%dist2
1011 :
1012 : IF (screen_pmat_forces) THEN
1013 : pmax_atom = MAX(shm_pmax_atom(katom, iatom) + &
1014 : shm_pmax_atom(latom, jatom), &
1015 : shm_pmax_atom(latom, iatom) + &
1016 : shm_pmax_atom(katom, jatom))
1017 : ELSE
1018 : pmax_atom = 0.0_dp
1019 : END IF
1020 :
1021 : IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1022 : screen_coeffs_kind(jkind, ikind)%x(2)) + &
1023 : (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1024 : screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1025 :
1026 : IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1027 : shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1028 : shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1029 : shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1030 : k_atom = atom_of_kind(katom)
1031 : forces_map(3, 1) = kkind
1032 : forces_map(3, 2) = k_atom
1033 :
1034 : l_atom = atom_of_kind(latom)
1035 : forces_map(4, 1) = lkind
1036 : forces_map(4, 2) = l_atom
1037 :
1038 : IF (nspins == 1) THEN
1039 : fac = 0.25_dp*hf_fraction
1040 : ELSE
1041 : fac = 0.5_dp*hf_fraction
1042 : END IF
1043 : !calculate symmetry_factor
1044 : symm_fac = 0.25_dp
1045 : IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1046 : IF (katom == latom) symm_fac = symm_fac*2.0_dp
1047 : IF (iatom == katom .AND. jatom == latom .AND. &
1048 : iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1049 : IF (iatom == katom .AND. iatom == jatom .AND. &
1050 : katom == latom) symm_fac = symm_fac*2.0_dp
1051 :
1052 : symm_fac = 1.0_dp/symm_fac
1053 : fac = fac*symm_fac
1054 :
1055 : lc_max => basis_parameter(kkind)%lmax
1056 : lc_min => basis_parameter(kkind)%lmin
1057 : npgfc => basis_parameter(kkind)%npgf
1058 : zetc => basis_parameter(kkind)%zet
1059 : nsgfc => basis_parameter(kkind)%nsgf
1060 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1061 : nsgfl_c => basis_parameter(kkind)%nsgfl
1062 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1063 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1064 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1065 :
1066 : ld_max => basis_parameter(lkind)%lmax
1067 : ld_min => basis_parameter(lkind)%lmin
1068 : npgfd => basis_parameter(lkind)%npgf
1069 : zetd => basis_parameter(lkind)%zet
1070 : nsgfd => basis_parameter(lkind)%nsgf
1071 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1072 : nsgfl_d => basis_parameter(lkind)%nsgfl
1073 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1074 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1075 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1076 :
1077 : atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1078 : atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1079 : atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1080 : atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1081 :
1082 : IF (jatom < latom) THEN
1083 : offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1084 : ELSE
1085 : offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1086 : END IF
1087 : IF (jatom < katom) THEN
1088 : offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1089 : ELSE
1090 : offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1091 : END IF
1092 : IF (iatom < latom) THEN
1093 : offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1094 : ELSE
1095 : offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1096 : END IF
1097 : IF (iatom < katom) THEN
1098 : offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1099 : ELSE
1100 : offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1101 : END IF
1102 :
1103 : IF (screen_pmat_forces) THEN
1104 : swap_id = 16
1105 : kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1106 : IF (ikind >= kkind) THEN
1107 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1108 : actual_x_data%map_atom_to_kind_atom(katom), &
1109 : actual_x_data%map_atom_to_kind_atom(iatom))
1110 : ELSE
1111 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1112 : actual_x_data%map_atom_to_kind_atom(iatom), &
1113 : actual_x_data%map_atom_to_kind_atom(katom))
1114 : swap_id = swap_id + 1
1115 : END IF
1116 : kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1117 : IF (jkind >= lkind) THEN
1118 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1119 : actual_x_data%map_atom_to_kind_atom(latom), &
1120 : actual_x_data%map_atom_to_kind_atom(jatom))
1121 : ELSE
1122 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1123 : actual_x_data%map_atom_to_kind_atom(jatom), &
1124 : actual_x_data%map_atom_to_kind_atom(latom))
1125 : swap_id = swap_id + 2
1126 : END IF
1127 : kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1128 : IF (ikind >= lkind) THEN
1129 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1130 : actual_x_data%map_atom_to_kind_atom(latom), &
1131 : actual_x_data%map_atom_to_kind_atom(iatom))
1132 : ELSE
1133 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1134 : actual_x_data%map_atom_to_kind_atom(iatom), &
1135 : actual_x_data%map_atom_to_kind_atom(latom))
1136 : swap_id = swap_id + 4
1137 : END IF
1138 : kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1139 : IF (jkind >= kkind) THEN
1140 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1141 : actual_x_data%map_atom_to_kind_atom(katom), &
1142 : actual_x_data%map_atom_to_kind_atom(jatom))
1143 : ELSE
1144 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1145 : actual_x_data%map_atom_to_kind_atom(jatom), &
1146 : actual_x_data%map_atom_to_kind_atom(katom))
1147 : swap_id = swap_id + 8
1148 : END IF
1149 : END IF
1150 :
1151 : !! At this stage, check for memory used in compression
1152 :
1153 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1154 : IF (treat_forces_in_core) THEN
1155 : ! ** We know the maximum amount of integrals that we can store per MPI-process
1156 : ! ** Now we need to sum the current memory usage among all openMP threads
1157 : ! ** We can just read what is currently stored on the corresponding x_data type
1158 : ! ** If this is thread i and it tries to read the data from thread j, that is
1159 : ! ** currently writing that data, we just dont care, because the possible error
1160 : ! ** is of the order of CACHE_SIZE
1161 : mem_compression_counter = 0
1162 : DO i = 1, n_threads
1163 : !$OMP ATOMIC READ
1164 : tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
1165 : mem_compression_counter = mem_compression_counter + &
1166 : tmp_i4*memory_parameter%cache_size
1167 : END DO
1168 : IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1169 : > memory_parameter%max_compression_counter) THEN
1170 : buffer_overflow = .TRUE.
1171 : IF (do_dynamic_load_balancing) THEN
1172 : distribution_forces%ram_counter = counter
1173 : ELSE
1174 : memory_parameter%ram_counter_forces = counter
1175 : END IF
1176 : ELSE
1177 : counter = counter + 1
1178 : buffer_overflow = .FALSE.
1179 : END IF
1180 : END IF
1181 : ELSE
1182 : IF (treat_forces_in_core) THEN
1183 : IF (do_dynamic_load_balancing) THEN
1184 : IF (distribution_forces%ram_counter == counter) THEN
1185 : buffer_overflow = .TRUE.
1186 : ELSE
1187 : counter = counter + 1
1188 : buffer_overflow = .FALSE.
1189 : END IF
1190 : ELSE
1191 : IF (memory_parameter%ram_counter_forces == counter) THEN
1192 : buffer_overflow = .TRUE.
1193 : ELSE
1194 : counter = counter + 1
1195 : buffer_overflow = .FALSE.
1196 : END IF
1197 : END IF
1198 : END IF
1199 : END IF
1200 :
1201 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1202 : iset = set_list_ij(i_set_list_ij)%pair(1)
1203 : jset = set_list_ij(i_set_list_ij)%pair(2)
1204 :
1205 : ncob = npgfb(jset)*ncoset(lb_max(jset))
1206 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1207 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1208 : !! Near field screening
1209 : IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1210 : screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1211 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1212 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1213 :
1214 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1215 : kset = set_list_kl(i_set_list_kl)%pair(1)
1216 : lset = set_list_kl(i_set_list_kl)%pair(2)
1217 :
1218 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1219 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1220 : max_val2 = max_val1 + max_val2_set
1221 :
1222 : !! Near field screening
1223 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1224 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1225 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1226 :
1227 : IF (screen_pmat_forces) THEN
1228 : CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1229 : iset, jset, kset, lset, &
1230 : pmax_tmp, swap_id)
1231 :
1232 : log10_pmax = log_2 + pmax_tmp
1233 : ELSE
1234 : log10_pmax = 0.0_dp
1235 : END IF
1236 :
1237 : max_val2 = max_val2 + log10_pmax
1238 : IF (max_val2 < log10_eps_schwarz) CYCLE
1239 :
1240 : pmax_entry = EXP(log10_pmax*ln_10)
1241 : !! store current number of integrals, update total number and number of integrals in buffer
1242 : current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1243 : IF (buffer_overflow) THEN
1244 : neris_onthefly = neris_onthefly + current_counter
1245 : END IF
1246 :
1247 : !! Get integrals from buffer and update Kohn-Sham matrix
1248 : IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces) THEN
1249 : nints = current_counter
1250 : CALL hfx_get_single_cache_element(estimate_to_store_int, 6, &
1251 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1252 : use_disk_storage)
1253 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1254 : ! IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1255 : IF (.NOT. use_virial) THEN
1256 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1257 : END IF
1258 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1259 : buffer_left = nints
1260 : buffer_start = 1
1261 : neris_incore = neris_incore + INT(nints, int_8)
1262 : DO WHILE (buffer_left > 0)
1263 : buffer_size = MIN(buffer_left, cache_size)
1264 : CALL hfx_get_mult_cache_elements(primitive_forces(buffer_start), &
1265 : buffer_size, nbits, &
1266 : integral_caches(nbits), &
1267 : integral_containers(nbits), &
1268 : eps_storage, pmax_entry, &
1269 : memory_parameter%actual_memory_usage, &
1270 : use_disk_storage)
1271 : buffer_left = buffer_left - buffer_size
1272 : buffer_start = buffer_start + buffer_size
1273 : END DO
1274 : END IF
1275 : !! Calculate integrals if we run out of buffer or the geometry did change
1276 : IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow) THEN
1277 : max_contraction_val = max_contraction(iset, iatom)* &
1278 : max_contraction(jset, jatom)* &
1279 : max_contraction(kset, katom)* &
1280 : max_contraction(lset, latom)* &
1281 : pmax_entry
1282 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1283 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1284 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1285 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1286 :
1287 : CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1288 : npgfc(kset), npgfd(lset), &
1289 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1290 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1291 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1292 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1293 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1294 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1295 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1296 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1297 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1298 : primitive_forces, &
1299 : potential_parameter, &
1300 : actual_x_data%neighbor_cells, &
1301 : screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1302 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1303 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1304 : log10_pmax, log10_eps_schwarz, &
1305 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1306 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
1307 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
1308 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
1309 : sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1310 : ede_work, ede_work2, ede_work_forces, &
1311 : ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1312 : nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1313 : ede_primitives_tmp_virial, primitive_forces_virial, &
1314 : cartesian_estimate_virial)
1315 :
1316 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1317 : neris_total = neris_total + nints
1318 : nprim_ints = nprim_ints + neris_tmp
1319 : ! IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1320 : ! estimate_to_store_int = EXPONENT(cartesian_estimate)
1321 : ! estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1322 : ! cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1323 : ! IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1324 : ! IF (cartesian_estimate < eps_schwarz) THEN
1325 : ! CALL hfx_add_single_cache_element( &
1326 : ! estimate_to_store_int, 6, &
1327 : ! maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1328 : ! use_disk_storage, max_val_memory)
1329 : ! END IF
1330 : ! END IF
1331 : !
1332 : ! IF (.NOT. use_virial) THEN
1333 : ! IF (cartesian_estimate < eps_schwarz) CYCLE
1334 : ! END IF
1335 :
1336 : !! Compress the array for storage
1337 : spherical_estimate = 0.0_dp
1338 : DO i = 1, nints
1339 : spherical_estimate = MAX(spherical_estimate, ABS(primitive_forces(i)))
1340 : END DO
1341 :
1342 : IF (use_virial) THEN
1343 : spherical_estimate_virial = 0.0_dp
1344 : DO i = 1, 3*nints
1345 : spherical_estimate_virial = MAX(spherical_estimate_virial, ABS(primitive_forces_virial(i)))
1346 : END DO
1347 : END IF
1348 :
1349 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1350 : estimate_to_store_int = EXPONENT(spherical_estimate)
1351 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1352 : IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1353 : CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
1354 : maxval_cache, maxval_container, &
1355 : memory_parameter%actual_memory_usage, &
1356 : use_disk_storage, max_val_memory)
1357 : END IF
1358 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1359 : IF (.NOT. use_virial) THEN
1360 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1361 : END IF
1362 : IF (.NOT. buffer_overflow) THEN
1363 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1364 : buffer_left = nints
1365 : buffer_start = 1
1366 : ! neris_incore = neris_incore+nints
1367 : neris_incore = neris_incore + INT(nints, int_8)
1368 :
1369 : DO WHILE (buffer_left > 0)
1370 : buffer_size = MIN(buffer_left, CACHE_SIZE)
1371 : CALL hfx_add_mult_cache_elements(primitive_forces(buffer_start), &
1372 : buffer_size, nbits, &
1373 : integral_caches(nbits), &
1374 : integral_containers(nbits), &
1375 : eps_storage, pmax_entry, &
1376 : memory_parameter%actual_memory_usage, &
1377 : use_disk_storage)
1378 : buffer_left = buffer_left - buffer_size
1379 : buffer_start = buffer_start + buffer_size
1380 : END DO
1381 : ELSE
1382 : !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1383 : !! but only if we treat forces in-core
1384 : IF (treat_forces_in_core) THEN
1385 : DO i = 1, nints
1386 : primitive_forces(i) = primitive_forces(i)*pmax_entry
1387 : IF (ABS(primitive_forces(i)) > eps_storage) THEN
1388 : primitive_forces(i) = ANINT(primitive_forces(i)/eps_storage, dp)*eps_storage/pmax_entry
1389 : ELSE
1390 : primitive_forces(i) = 0.0_dp
1391 : END IF
1392 : END DO
1393 : END IF
1394 : END IF
1395 : END IF
1396 : IF (with_resp_density) THEN
1397 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1398 : full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1399 : iatom, jatom, katom, latom, &
1400 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1401 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1402 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1403 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1404 : full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1405 : iatom, jatom, katom, latom, &
1406 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1407 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1408 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1409 : ELSE
1410 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1411 : full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1412 : iatom, jatom, katom, latom, &
1413 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1414 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1415 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1416 : END IF
1417 : IF (nspins == 2) THEN
1418 : IF (with_resp_density) THEN
1419 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1420 : full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1421 : pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1422 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1423 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1424 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1425 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1426 : full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1427 : pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1428 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1429 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1430 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1431 : ELSE
1432 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1433 : full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1434 : pac_buf_beta, iatom, jatom, katom, latom, &
1435 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1436 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1437 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1438 : END IF
1439 : END IF
1440 : IF (spherical_estimate*pmax_entry >= eps_schwarz) THEN
1441 : DO coord = 1, 12
1442 : T2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1443 : coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1444 :
1445 : IF (with_resp_density) THEN
1446 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1447 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1448 : T2, force, forces_map, coord, &
1449 : pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1450 : my_resp_only)
1451 : ELSE
1452 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1453 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1454 : T2, force, forces_map, coord)
1455 : END IF
1456 : IF (nspins == 2) THEN
1457 : IF (with_resp_density) THEN
1458 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1459 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1460 : fac, T2, force, forces_map, coord, &
1461 : pbd_buf_resp_beta, pbc_buf_resp_beta, &
1462 : pad_buf_resp_beta, pac_buf_resp_beta, &
1463 : my_resp_only)
1464 : ELSE
1465 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1466 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, fac, &
1467 : T2, force, forces_map, coord)
1468 : END IF
1469 : END IF
1470 : END DO !coord
1471 : END IF
1472 : IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz) THEN
1473 : DO coord = 1, 12
1474 : DO i = 1, 3
1475 : T2 => primitive_forces_virial( &
1476 : ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1477 : ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1478 : IF (with_resp_density) THEN
1479 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1480 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1481 : T2, tmp_virial, coord, i, &
1482 : pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
1483 : pac_buf_resp, my_resp_only)
1484 : ELSE
1485 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1486 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1487 : T2, tmp_virial, coord, i)
1488 : END IF
1489 : IF (nspins == 2) THEN
1490 : IF (with_resp_density) THEN
1491 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1492 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1493 : fac, T2, tmp_virial, coord, i, &
1494 : pbd_buf_resp_beta, pbc_buf_resp_beta, &
1495 : pad_buf_resp_beta, pac_buf_resp_beta, &
1496 : my_resp_only)
1497 : ELSE
1498 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1499 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1500 : fac, T2, tmp_virial, coord, i)
1501 : END IF
1502 : END IF
1503 : END DO
1504 : END DO !coord
1505 : END IF
1506 :
1507 : END DO !i_set_list_kl
1508 : END DO !i_set_list_ij
1509 : END DO !i_list_kl
1510 : END DO !i_list_ij
1511 : END DO atomic_blocks
1512 : bintime_stop = m_walltime()
1513 : distribution_forces%time_forces = bintime_stop - bintime_start
1514 : !$OMP MASTER
1515 : CALL timestop(handle_bin)
1516 : !$OMP END MASTER
1517 : END DO !bin
1518 :
1519 : !$OMP MASTER
1520 : logger => cp_get_default_logger()
1521 : do_print_load_balance_info = .FALSE.
1522 : do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1523 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1524 : !$OMP END MASTER
1525 : !$OMP BARRIER
1526 : IF (do_print_load_balance_info) THEN
1527 : iw = -1
1528 : !$OMP MASTER
1529 : iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1530 : extension=".scfLog")
1531 : !$OMP END MASTER
1532 :
1533 : CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1534 : hfx_do_eval_forces)
1535 :
1536 : !$OMP MASTER
1537 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1538 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1539 : !$OMP END MASTER
1540 : END IF
1541 :
1542 : IF (use_virial) THEN
1543 : DO i = 1, 3
1544 : DO j = 1, 3
1545 : DO k = 1, 3
1546 : !$OMP ATOMIC
1547 : virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1548 : END DO
1549 : END DO
1550 : END DO
1551 : END IF
1552 :
1553 : !$OMP BARRIER
1554 : !! Get some number about ERIS
1555 : !$OMP ATOMIC
1556 : shm_neris_total = shm_neris_total + neris_total
1557 : !$OMP ATOMIC
1558 : shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1559 : !$OMP ATOMIC
1560 : shm_nprim_ints = shm_nprim_ints + nprim_ints
1561 :
1562 : storage_counter_integrals = memory_parameter%actual_memory_usage* &
1563 : memory_parameter%cache_size
1564 : stor_count_max_val = max_val_memory*memory_parameter%cache_size
1565 : !$OMP ATOMIC
1566 : shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1567 : !$OMP ATOMIC
1568 : shm_neris_incore = shm_neris_incore + neris_incore
1569 : !$OMP ATOMIC
1570 : shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1571 : !$OMP BARRIER
1572 :
1573 : ! IF(with_resp_density) THEN
1574 : ! ! restore original load_balance_parameter
1575 : ! actual_x_data%load_balance_parameter = load_balance_parameter_energy
1576 : ! DEALLOCATE(load_balance_parameter_energy)
1577 : ! END IF
1578 :
1579 : !$OMP MASTER
1580 : !! Print some memeory information if this is the first step
1581 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1582 : tmp_i8(1:6) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1583 : shm_neris_total, shm_nprim_ints, shm_stor_count_max_val/)
1584 : CALL para_env%sum(tmp_i8)
1585 : shm_storage_counter_integrals = tmp_i8(1)
1586 : shm_neris_onthefly = tmp_i8(2)
1587 : shm_neris_incore = tmp_i8(3)
1588 : shm_neris_total = tmp_i8(4)
1589 : shm_nprim_ints = tmp_i8(5)
1590 : shm_stor_count_max_val = tmp_i8(6)
1591 : mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1592 : compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
1593 : mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1594 :
1595 : IF (shm_neris_incore == 0) THEN
1596 : mem_eris = 0
1597 : compression_factor = 0.0_dp
1598 : END IF
1599 :
1600 : logger => cp_get_default_logger()
1601 :
1602 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
1603 : extension=".scfLog")
1604 : IF (iw > 0) THEN
1605 : WRITE (UNIT=iw, FMT="(/,(T3,A,T65,I16))") &
1606 : "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated: ", shm_nprim_ints
1607 :
1608 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1609 : "HFX_MEM_INFO| Number of sph. DERIV's calculated: ", shm_neris_total
1610 :
1611 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1612 : "HFX_MEM_INFO| Number of sph. DERIV's stored in-core: ", shm_neris_incore
1613 :
1614 : WRITE (UNIT=iw, FMT="((T3,A,T62,I19))") &
1615 : "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1616 :
1617 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1618 : "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]: ", mem_eris
1619 :
1620 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1621 : "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
1622 :
1623 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2),/)") &
1624 : "HFX_MEM_INFO| Total compression factor DERIV's RAM: ", compression_factor
1625 : END IF
1626 :
1627 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1628 : "HF_INFO")
1629 : END IF
1630 : !$OMP END MASTER
1631 :
1632 : !! flush caches if the geometry changed
1633 : IF (do_dynamic_load_balancing) THEN
1634 : my_bin_size = SIZE(actual_x_data%distribution_forces)
1635 : ELSE
1636 : my_bin_size = 1
1637 : END IF
1638 :
1639 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1640 : IF (treat_forces_in_core) THEN
1641 : DO bin = 1, my_bin_size
1642 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1643 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
1644 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1645 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1646 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1647 : .FALSE.)
1648 : DO i = 1, 64
1649 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
1650 : memory_parameter%actual_memory_usage, .FALSE.)
1651 : END DO
1652 : END DO
1653 : END IF
1654 : END IF
1655 : !! reset all caches except we calculate all on the fly
1656 : IF (treat_forces_in_core) THEN
1657 : DO bin = 1, my_bin_size
1658 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1659 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
1660 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1661 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1662 :
1663 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
1664 : DO i = 1, 64
1665 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
1666 : memory_parameter%actual_memory_usage, &
1667 : .FALSE.)
1668 : END DO
1669 : END DO
1670 : END IF
1671 :
1672 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1673 : actual_x_data%memory_parameter%recalc_forces = .FALSE.
1674 : END IF
1675 :
1676 : !$OMP BARRIER
1677 : !$OMP MASTER
1678 : CALL timestop(handle_main)
1679 : !$OMP END MASTER
1680 : !$OMP BARRIER
1681 : DEALLOCATE (last_sgf_global)
1682 : !$OMP MASTER
1683 : DEALLOCATE (full_density_alpha)
1684 : IF (with_resp_density) THEN
1685 : DEALLOCATE (full_density_resp)
1686 : END IF
1687 : IF (nspins == 2) THEN
1688 : DEALLOCATE (full_density_beta)
1689 : IF (with_resp_density) THEN
1690 : DEALLOCATE (full_density_resp_beta)
1691 : END IF
1692 : END IF
1693 : IF (do_dynamic_load_balancing) THEN
1694 : DEALLOCATE (actual_x_data%task_list)
1695 : END IF
1696 : !$OMP END MASTER
1697 : DEALLOCATE (primitive_forces)
1698 : DEALLOCATE (atom_of_kind, kind_of)
1699 : DEALLOCATE (max_contraction)
1700 : DEALLOCATE (pbd_buf)
1701 : DEALLOCATE (pbc_buf)
1702 : DEALLOCATE (pad_buf)
1703 : DEALLOCATE (pac_buf)
1704 :
1705 : IF (with_resp_density) THEN
1706 : DEALLOCATE (pbd_buf_resp)
1707 : DEALLOCATE (pbc_buf_resp)
1708 : DEALLOCATE (pad_buf_resp)
1709 : DEALLOCATE (pac_buf_resp)
1710 : END IF
1711 :
1712 : DO i = 1, max_pgf**2
1713 : DEALLOCATE (pgf_list_ij(i)%image_list)
1714 : DEALLOCATE (pgf_list_kl(i)%image_list)
1715 : END DO
1716 :
1717 : DEALLOCATE (pgf_list_ij)
1718 : DEALLOCATE (pgf_list_kl)
1719 : DEALLOCATE (pgf_product_list)
1720 :
1721 : DEALLOCATE (set_list_ij, set_list_kl)
1722 :
1723 : DEALLOCATE (primitive_forces_virial)
1724 :
1725 : DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1726 :
1727 : DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1728 :
1729 : DEALLOCATE (nimages)
1730 :
1731 : IF (nspins == 2) THEN
1732 : DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1733 : IF (with_resp_density) THEN
1734 : DEALLOCATE (pbd_buf_resp_beta)
1735 : DEALLOCATE (pbc_buf_resp_beta)
1736 : DEALLOCATE (pad_buf_resp_beta)
1737 : DEALLOCATE (pac_buf_resp_beta)
1738 : END IF
1739 : END IF
1740 :
1741 : !
1742 : ! this wraps to free_libint, but currently causes a segfault
1743 : ! as a result, we don't call it, but some memory remains allocated
1744 : !
1745 : ! CALL terminate_libderiv(private_deriv)
1746 :
1747 : !$OMP END PARALLEL
1748 :
1749 1784 : CALL timestop(handle)
1750 :
1751 3708312 : END SUBROUTINE derivatives_four_center
1752 :
1753 : ! **************************************************************************************************
1754 : !> \brief ...
1755 : !> \param deriv ...
1756 : !> \param ra ...
1757 : !> \param rb ...
1758 : !> \param rc ...
1759 : !> \param rd ...
1760 : !> \param npgfa ...
1761 : !> \param npgfb ...
1762 : !> \param npgfc ...
1763 : !> \param npgfd ...
1764 : !> \param la_min ...
1765 : !> \param la_max ...
1766 : !> \param lb_min ...
1767 : !> \param lb_max ...
1768 : !> \param lc_min ...
1769 : !> \param lc_max ...
1770 : !> \param ld_min ...
1771 : !> \param ld_max ...
1772 : !> \param nsgfa ...
1773 : !> \param nsgfb ...
1774 : !> \param nsgfc ...
1775 : !> \param nsgfd ...
1776 : !> \param sphi_a_u1 ...
1777 : !> \param sphi_a_u2 ...
1778 : !> \param sphi_a_u3 ...
1779 : !> \param sphi_b_u1 ...
1780 : !> \param sphi_b_u2 ...
1781 : !> \param sphi_b_u3 ...
1782 : !> \param sphi_c_u1 ...
1783 : !> \param sphi_c_u2 ...
1784 : !> \param sphi_c_u3 ...
1785 : !> \param sphi_d_u1 ...
1786 : !> \param sphi_d_u2 ...
1787 : !> \param sphi_d_u3 ...
1788 : !> \param zeta ...
1789 : !> \param zetb ...
1790 : !> \param zetc ...
1791 : !> \param zetd ...
1792 : !> \param primitive_integrals ...
1793 : !> \param potential_parameter ...
1794 : !> \param neighbor_cells ...
1795 : !> \param screen1 ...
1796 : !> \param screen2 ...
1797 : !> \param eps_schwarz ...
1798 : !> \param max_contraction_val ...
1799 : !> \param cart_estimate ...
1800 : !> \param cell ...
1801 : !> \param neris_tmp ...
1802 : !> \param log10_pmax ...
1803 : !> \param log10_eps_schwarz ...
1804 : !> \param R1_pgf ...
1805 : !> \param R2_pgf ...
1806 : !> \param pgf1 ...
1807 : !> \param pgf2 ...
1808 : !> \param pgf_list_ij ...
1809 : !> \param pgf_list_kl ...
1810 : !> \param pgf_product_list ...
1811 : !> \param nsgfl_a ...
1812 : !> \param nsgfl_b ...
1813 : !> \param nsgfl_c ...
1814 : !> \param nsgfl_d ...
1815 : !> \param sphi_a_ext ...
1816 : !> \param sphi_b_ext ...
1817 : !> \param sphi_c_ext ...
1818 : !> \param sphi_d_ext ...
1819 : !> \param ede_work ...
1820 : !> \param ede_work2 ...
1821 : !> \param ede_work_forces ...
1822 : !> \param ede_buffer1 ...
1823 : !> \param ede_buffer2 ...
1824 : !> \param ede_primitives_tmp ...
1825 : !> \param nimages ...
1826 : !> \param do_periodic ...
1827 : !> \param use_virial ...
1828 : !> \param ede_work_virial ...
1829 : !> \param ede_work2_virial ...
1830 : !> \param ede_primitives_tmp_virial ...
1831 : !> \param primitive_integrals_virial ...
1832 : !> \param cart_estimate_virial ...
1833 : ! **************************************************************************************************
1834 1610523 : SUBROUTINE forces4(deriv, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
1835 : la_min, la_max, lb_min, lb_max, &
1836 : lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
1837 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1838 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1839 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1840 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1841 1610523 : zeta, zetb, zetc, zetd, &
1842 1610523 : primitive_integrals, &
1843 : potential_parameter, neighbor_cells, &
1844 : screen1, screen2, eps_schwarz, max_contraction_val, &
1845 : cart_estimate, cell, neris_tmp, log10_pmax, &
1846 : log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
1847 : pgf_list_ij, pgf_list_kl, &
1848 : pgf_product_list, &
1849 1610523 : nsgfl_a, nsgfl_b, nsgfl_c, &
1850 1610523 : nsgfl_d, &
1851 1610523 : sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
1852 : ede_work, ede_work2, ede_work_forces, &
1853 : ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1854 : nimages, do_periodic, use_virial, ede_work_virial, &
1855 1610523 : ede_work2_virial, ede_primitives_tmp_virial, primitive_integrals_virial, &
1856 : cart_estimate_virial)
1857 :
1858 : TYPE(cp_libint_t) :: deriv
1859 : REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
1860 : INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
1861 : lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1862 : sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
1863 : sphi_d_u3
1864 : REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
1865 : REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
1866 : REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
1867 : REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
1868 : REAL(dp), &
1869 : DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12) :: primitive_integrals
1870 : TYPE(hfx_potential_type) :: potential_parameter
1871 : TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
1872 : REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
1873 : max_contraction_val
1874 : REAL(dp) :: cart_estimate
1875 : TYPE(cell_type), POINTER :: cell
1876 : INTEGER(int_8) :: neris_tmp
1877 : REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
1878 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
1879 : POINTER :: R1_pgf, R2_pgf, pgf1, pgf2
1880 : TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
1881 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
1882 : DIMENSION(:), INTENT(INOUT) :: pgf_product_list
1883 : INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
1884 : REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
1885 : sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
1886 : sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
1887 : REAL(dp), DIMENSION(*) :: ede_work, ede_work2, ede_work_forces, &
1888 : ede_buffer1, ede_buffer2, &
1889 : ede_primitives_tmp
1890 : INTEGER, DIMENSION(*) :: nimages
1891 : LOGICAL, INTENT(IN) :: do_periodic, use_virial
1892 : REAL(dp), DIMENSION(*) :: ede_work_virial, ede_work2_virial, &
1893 : ede_primitives_tmp_virial
1894 : REAL(dp), &
1895 : DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12, 3) :: primitive_integrals_virial
1896 : REAL(dp) :: cart_estimate_virial
1897 :
1898 : INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
1899 : ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
1900 : nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
1901 : REAL(dp) :: EtaInv, tmp_max, tmp_max_virial, Zeta_A, &
1902 : Zeta_B, Zeta_C, Zeta_D, ZetaInv
1903 :
1904 : CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
1905 : pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
1906 : nelements_ij, &
1907 1610523 : neighbor_cells, nimages, do_periodic)
1908 : CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
1909 : pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
1910 : nelements_kl, &
1911 1610523 : neighbor_cells, nimages, do_periodic)
1912 :
1913 1610523 : cart_estimate = 0.0_dp
1914 802089051 : primitive_integrals = 0.0_dp
1915 1610523 : IF (use_virial) THEN
1916 285087896 : primitive_integrals_virial = 0.0_dp
1917 198587 : cart_estimate_virial = 0.0_dp
1918 : END IF
1919 1610523 : neris_tmp = 0_int_8
1920 1610523 : max_l = la_max + lb_max + lc_max + ld_max + 1
1921 4313491 : DO list_ij = 1, nelements_ij
1922 2702968 : Zeta_A = pgf_list_ij(list_ij)%zeta
1923 2702968 : Zeta_B = pgf_list_ij(list_ij)%zetb
1924 2702968 : ZetaInv = pgf_list_ij(list_ij)%ZetaInv
1925 2702968 : ipgf = pgf_list_ij(list_ij)%ipgf
1926 2702968 : jpgf = pgf_list_ij(list_ij)%jpgf
1927 :
1928 17526790 : DO list_kl = 1, nelements_kl
1929 13213299 : Zeta_C = pgf_list_kl(list_kl)%zeta
1930 13213299 : Zeta_D = pgf_list_kl(list_kl)%zetb
1931 13213299 : EtaInv = pgf_list_kl(list_kl)%ZetaInv
1932 13213299 : kpgf = pgf_list_kl(list_kl)%ipgf
1933 13213299 : lpgf = pgf_list_kl(list_kl)%jpgf
1934 :
1935 : CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
1936 : nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
1937 13213299 : potential_parameter, max_l, do_periodic)
1938 13213299 : s_offset_a = 0
1939 31848436 : DO la = la_min, la_max
1940 15932169 : s_offset_b = 0
1941 15932169 : ncoa = nco(la)
1942 15932169 : nsgfla = nsgfl_a(la)
1943 15932169 : nsoa = nso(la)
1944 :
1945 32841129 : DO lb = lb_min, lb_max
1946 16908960 : s_offset_c = 0
1947 16908960 : ncob = nco(lb)
1948 16908960 : nsgflb = nsgfl_b(lb)
1949 16908960 : nsob = nso(lb)
1950 :
1951 41807806 : DO lc = lc_min, lc_max
1952 24898846 : s_offset_d = 0
1953 24898846 : ncoc = nco(lc)
1954 24898846 : nsgflc = nsgfl_c(lc)
1955 24898846 : nsoc = nso(lc)
1956 :
1957 56680298 : DO ld = ld_min, ld_max
1958 31781452 : ncod = nco(ld)
1959 31781452 : nsgfld = nsgfl_d(ld)
1960 31781452 : nsod = nso(ld)
1961 31781452 : tmp_max = 0.0_dp
1962 31781452 : tmp_max_virial = 0.0_dp
1963 : CALL evaluate_deriv_eri(deriv, nproducts, pgf_product_list, &
1964 : la, lb, lc, ld, &
1965 : ncoa, ncob, ncoc, ncod, &
1966 : nsgfa, nsgfb, nsgfc, nsgfd, &
1967 : primitive_integrals, &
1968 : max_contraction_val, tmp_max, eps_schwarz, neris_tmp, &
1969 : Zeta_A, Zeta_B, Zeta_C, Zeta_D, ZetaInv, EtaInv, &
1970 : s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
1971 : nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
1972 : sphi_a_ext(1, la + 1, ipgf), &
1973 : sphi_b_ext(1, lb + 1, jpgf), &
1974 : sphi_c_ext(1, lc + 1, kpgf), &
1975 : sphi_d_ext(1, ld + 1, lpgf), &
1976 : ede_work, ede_work2, ede_work_forces, &
1977 : ede_buffer1, ede_buffer2, ede_primitives_tmp, use_virial, &
1978 : ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial, &
1979 31781452 : primitive_integrals_virial, cell, tmp_max_virial)
1980 31781452 : cart_estimate = MAX(tmp_max, cart_estimate)
1981 31781452 : IF (use_virial) cart_estimate_virial = MAX(tmp_max_virial, cart_estimate_virial)
1982 56680298 : s_offset_d = s_offset_d + nsod*nsgfld
1983 : END DO !ld
1984 41807806 : s_offset_c = s_offset_c + nsoc*nsgflc
1985 : END DO !lc
1986 32841129 : s_offset_b = s_offset_b + nsob*nsgflb
1987 : END DO !lb
1988 29145468 : s_offset_a = s_offset_a + nsoa*nsgfla
1989 : END DO !la
1990 : END DO
1991 : END DO
1992 :
1993 1610523 : END SUBROUTINE forces4
1994 :
1995 : ! **************************************************************************************************
1996 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
1997 : !> a symmetric upper triangle NxN matrix
1998 : !> The compiler should inline this function, therefore it appears in
1999 : !> several modules
2000 : !> \param i 2d index
2001 : !> \param j 2d index
2002 : !> \param N matrix size
2003 : !> \return ...
2004 : !> \par History
2005 : !> 03.2009 created [Manuel Guidon]
2006 : !> \author Manuel Guidon
2007 : ! **************************************************************************************************
2008 57408 : PURE FUNCTION get_1D_idx(i, j, N)
2009 : INTEGER, INTENT(IN) :: i, j
2010 : INTEGER(int_8), INTENT(IN) :: N
2011 : INTEGER(int_8) :: get_1D_idx
2012 :
2013 : INTEGER(int_8) :: min_ij
2014 :
2015 57408 : min_ij = MIN(i, j)
2016 57408 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2017 :
2018 57408 : END FUNCTION get_1D_idx
2019 :
2020 : ! **************************************************************************************************
2021 : !> \brief This routine prefetches density matrix elements, i.e. reshuffles the
2022 : !> data in a way that they can be accessed later on in a cache friendly
2023 : !> way
2024 : !> \param ma_max Size of matrix blocks
2025 : !> \param mb_max Size of matrix blocks
2026 : !> \param mc_max Size of matrix blocks
2027 : !> \param md_max Size of matrix blocks
2028 : !> \param density ...
2029 : !> \param pbd buffer that will contain P(b,d)
2030 : !> \param pbc buffer that will contain P(b,c)
2031 : !> \param pad buffer that will contain P(a,d)
2032 : !> \param pac buffer that will contain P(a,c)
2033 : !> \param iatom ...
2034 : !> \param jatom ...
2035 : !> \param katom ...
2036 : !> \param latom ...
2037 : !> \param iset ...
2038 : !> \param jset ...
2039 : !> \param kset ...
2040 : !> \param lset ...
2041 : !> \param offset_bd_set ...
2042 : !> \param offset_bc_set ...
2043 : !> \param offset_ad_set ...
2044 : !> \param offset_ac_set ...
2045 : !> \param atomic_offset_bd ...
2046 : !> \param atomic_offset_bc ...
2047 : !> \param atomic_offset_ad ...
2048 : !> \param atomic_offset_ac ...
2049 : !> \param anti_symmetric ...
2050 : !> \par History
2051 : !> 03.2009 created [Manuel Guidon]
2052 : !> \author Manuel Guidon
2053 : ! **************************************************************************************************
2054 2876282 : SUBROUTINE prefetch_density_matrix(ma_max, mb_max, mc_max, md_max, &
2055 2876282 : density, pbd, pbc, pad, pac, &
2056 : iatom, jatom, katom, latom, &
2057 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
2058 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
2059 : atomic_offset_ad, atomic_offset_ac, anti_symmetric)
2060 :
2061 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2062 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2063 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac
2064 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2065 : kset, lset
2066 : INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2067 : offset_ad_set, offset_ac_set
2068 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2069 : atomic_offset_ad, atomic_offset_ac
2070 : LOGICAL :: anti_symmetric
2071 :
2072 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2073 : offset_ad, offset_bc, offset_bd
2074 : REAL(dp) :: fac
2075 :
2076 2876282 : fac = 1.0_dp
2077 2876282 : IF (anti_symmetric) fac = -1.0_dp
2078 2876282 : IF (jatom >= latom) THEN
2079 2870724 : i = 1
2080 2870724 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2081 2870724 : j = offset_bd
2082 8389115 : DO md = 1, md_max
2083 17939961 : DO mb = 1, mb_max
2084 9550846 : pbd(i) = density(j)*fac
2085 9550846 : i = i + 1
2086 15069237 : j = j + 1
2087 : END DO
2088 : END DO
2089 : ELSE
2090 5558 : i = 1
2091 5558 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2092 13394 : DO md = 1, md_max
2093 7836 : j = offset_bd + md - 1
2094 25078 : DO mb = 1, mb_max
2095 11684 : pbd(i) = density(j)
2096 11684 : i = i + 1
2097 19520 : j = j + md_max
2098 : END DO
2099 : END DO
2100 : END IF
2101 2876282 : IF (jatom >= katom) THEN
2102 2876282 : i = 1
2103 2876282 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2104 2876282 : j = offset_bc
2105 9384950 : DO mc = 1, mc_max
2106 20324752 : DO mb = 1, mb_max
2107 10939802 : pbc(i) = density(j)*fac
2108 10939802 : i = i + 1
2109 17448470 : j = j + 1
2110 : END DO
2111 : END DO
2112 : ELSE
2113 0 : i = 1
2114 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2115 0 : DO mc = 1, mc_max
2116 0 : j = offset_bc + mc - 1
2117 0 : DO mb = 1, mb_max
2118 0 : pbc(i) = density(j)
2119 0 : i = i + 1
2120 0 : j = j + mc_max
2121 : END DO
2122 : END DO
2123 : END IF
2124 2876282 : IF (iatom >= latom) THEN
2125 2217722 : i = 1
2126 2217722 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2127 2217722 : j = offset_ad
2128 6810939 : DO md = 1, md_max
2129 16353419 : DO ma = 1, ma_max
2130 9542480 : pad(i) = density(j)*fac
2131 9542480 : i = i + 1
2132 14135697 : j = j + 1
2133 : END DO
2134 : END DO
2135 : ELSE
2136 658560 : i = 1
2137 658560 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2138 1591570 : DO md = 1, md_max
2139 933010 : j = offset_ad + md - 1
2140 3850832 : DO ma = 1, ma_max
2141 2259262 : pad(i) = density(j)
2142 2259262 : i = i + 1
2143 3192272 : j = j + md_max
2144 : END DO
2145 : END DO
2146 : END IF
2147 2876282 : IF (iatom >= katom) THEN
2148 2799034 : i = 1
2149 2799034 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2150 2799034 : j = offset_ac
2151 9199648 : DO mc = 1, mc_max
2152 22880997 : DO ma = 1, ma_max
2153 13681349 : pac(i) = density(j)*fac
2154 13681349 : i = i + 1
2155 20081963 : j = j + 1
2156 : END DO
2157 : END DO
2158 : ELSE
2159 77248 : i = 1
2160 77248 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2161 185302 : DO mc = 1, mc_max
2162 108054 : j = offset_ac + mc - 1
2163 511211 : DO ma = 1, ma_max
2164 325909 : pac(i) = density(j)
2165 325909 : i = i + 1
2166 433963 : j = j + mc_max
2167 : END DO
2168 : END DO
2169 : END IF
2170 2876282 : END SUBROUTINE prefetch_density_matrix
2171 :
2172 : ! **************************************************************************************************
2173 : !> \brief This routine updates the forces using buffered density matrices
2174 : !> \param ma_max Size of matrix blocks
2175 : !> \param mb_max Size of matrix blocks
2176 : !> \param mc_max Size of matrix blocks
2177 : !> \param md_max Size of matrix blocks
2178 : !> \param pbd buffer that will contain P(b,d)
2179 : !> \param pbc buffer that will contain P(b,c)
2180 : !> \param pad buffer that will contain P(a,d)
2181 : !> \param pac buffer that will contain P(a,c)
2182 : !> \param fac mulitplication factor (spin, symmetry)
2183 : !> \param prim primitive forces
2184 : !> \param force storage loacation for forces
2185 : !> \param forces_map index table
2186 : !> \param coord which of the 12 coords to be updated
2187 : !> \param pbd_resp ...
2188 : !> \param pbc_resp ...
2189 : !> \param pad_resp ...
2190 : !> \param pac_resp ...
2191 : !> \param resp_only ...
2192 : !> \par History
2193 : !> 03.2009 created [Manuel Guidon]
2194 : !> \author Manuel Guidon
2195 : ! **************************************************************************************************
2196 22815552 : SUBROUTINE update_forces(ma_max, mb_max, mc_max, md_max, &
2197 : pbd, pbc, pad, pac, fac, &
2198 22815552 : prim, force, forces_map, coord, &
2199 : pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2200 :
2201 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2202 : REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2203 : REAL(dp), INTENT(IN) :: fac
2204 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2205 : INTENT(IN) :: prim
2206 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2207 : INTEGER, INTENT(IN) :: forces_map(4, 2), coord
2208 : REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2209 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2210 :
2211 : INTEGER :: ma, mb, mc, md, p_index
2212 : LOGICAL :: full_force, with_resp_density
2213 : REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2214 : temp4, teresp
2215 :
2216 22815552 : with_resp_density = .FALSE.
2217 : IF (PRESENT(pbd_resp) .AND. &
2218 : PRESENT(pbc_resp) .AND. &
2219 22815552 : PRESENT(pad_resp) .AND. &
2220 : PRESENT(pac_resp)) with_resp_density = .TRUE.
2221 :
2222 : IF (with_resp_density) THEN
2223 11668620 : full_force = .TRUE.
2224 11668620 : IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2225 11668620 : p_index = 0
2226 11668620 : temp4 = 0.0_dp
2227 33818064 : DO md = 1, md_max
2228 87186876 : DO mc = 1, mc_max
2229 176449044 : DO mb = 1, mb_max
2230 100930788 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2231 100930788 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2232 100930788 : temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2233 100930788 : temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2234 433302132 : DO ma = 1, ma_max
2235 279002532 : p_index = p_index + 1
2236 : ! HF-SCF
2237 279002532 : IF (full_force) THEN
2238 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2239 98082384 : temp3*pac((mc - 1)*ma_max + ma)
2240 : ELSE
2241 : teresp = 0.0_dp
2242 : END IF
2243 : ! RESP+HF
2244 : teresp = teresp + &
2245 : pac((mc - 1)*ma_max + ma)*temp3_resp + &
2246 : pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2247 : pad((md - 1)*ma_max + ma)*temp1_resp + &
2248 279002532 : pad_resp((md - 1)*ma_max + ma)*temp1
2249 379933320 : temp4 = temp4 + teresp*prim(p_index)
2250 : END DO !ma
2251 : END DO !mb
2252 : END DO !mc
2253 : END DO !md
2254 : ELSE
2255 : p_index = 0
2256 : temp4 = 0.0_dp
2257 33124572 : DO md = 1, md_max
2258 92439276 : DO mc = 1, mc_max
2259 198196464 : DO mb = 1, mb_max
2260 116904120 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2261 116904120 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2262 569013240 : DO ma = 1, ma_max
2263 392794416 : p_index = p_index + 1
2264 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2265 392794416 : temp3*pac((mc - 1)*ma_max + ma)
2266 509698536 : temp4 = temp4 + teresp*prim(p_index)
2267 : END DO !ma
2268 : END DO !mb
2269 : END DO !mc
2270 : END DO !md
2271 : END IF
2272 :
2273 22815552 : !$OMP ATOMIC
2274 : force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2275 : forces_map((coord - 1)/3 + 1, 2)) = &
2276 : force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2277 : forces_map((coord - 1)/3 + 1, 2)) - &
2278 : temp4
2279 :
2280 22815552 : END SUBROUTINE update_forces
2281 :
2282 : ! **************************************************************************************************
2283 : !> \brief This routine updates the virial using buffered density matrices
2284 : !> \param ma_max Size of matrix blocks
2285 : !> \param mb_max Size of matrix blocks
2286 : !> \param mc_max Size of matrix blocks
2287 : !> \param md_max Size of matrix blocks
2288 : !> \param pbd buffer that will contain P(b,d)
2289 : !> \param pbc buffer that will contain P(b,c)
2290 : !> \param pad buffer that will contain P(a,d)
2291 : !> \param pac buffer that will contain P(a,c)
2292 : !> \param fac mulitplication factor (spin, symmetry)
2293 : !> \param prim primitive forces
2294 : !> \param tmp_virial ...
2295 : !> \param coord which of the 12 coords to be updated
2296 : !> \param l ...
2297 : !> \param pbd_resp ...
2298 : !> \param pbc_resp ...
2299 : !> \param pad_resp ...
2300 : !> \param pac_resp ...
2301 : !> \par History
2302 : !> 03.2009 created [Manuel Guidon]
2303 : !> \author Manuel Guidon
2304 : ! **************************************************************************************************
2305 6323868 : SUBROUTINE update_virial(ma_max, mb_max, mc_max, md_max, &
2306 : pbd, pbc, pad, pac, fac, &
2307 6323868 : prim, tmp_virial, coord, l, &
2308 : pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2309 :
2310 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2311 : REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2312 : REAL(dp), INTENT(IN) :: fac
2313 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2314 : INTENT(IN) :: prim
2315 : REAL(dp) :: tmp_virial(3, 3)
2316 : INTEGER, INTENT(IN) :: coord, l
2317 : REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2318 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2319 :
2320 : INTEGER :: i, j, ma, mb, mc, md, p_index
2321 : LOGICAL :: with_resp_density, full_force
2322 : REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2323 : temp4, teresp
2324 :
2325 6323868 : with_resp_density = .FALSE.
2326 : IF (PRESENT(pbd_resp) .AND. &
2327 : PRESENT(pbc_resp) .AND. &
2328 6323868 : PRESENT(pad_resp) .AND. &
2329 : PRESENT(pac_resp)) with_resp_density = .TRUE.
2330 :
2331 : IF (with_resp_density) THEN
2332 5587128 : full_force = .TRUE.
2333 5587128 : IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2334 5587128 : p_index = 0
2335 5587128 : temp4 = 0.0_dp
2336 16806312 : DO md = 1, md_max
2337 41978952 : DO mc = 1, mc_max
2338 91230912 : DO mb = 1, mb_max
2339 54839088 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2340 54839088 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2341 54839088 : temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2342 54839088 : temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2343 236616804 : DO ma = 1, ma_max
2344 156605076 : p_index = p_index + 1
2345 : ! HF-SCF
2346 156605076 : IF (full_force) THEN
2347 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2348 25169472 : temp3*pac((mc - 1)*ma_max + ma)
2349 : ELSE
2350 : teresp = 0.0_dp
2351 : END IF
2352 : ! RESP+HF
2353 : teresp = teresp + &
2354 : pac((mc - 1)*ma_max + ma)*temp3_resp + &
2355 : pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2356 : pad((md - 1)*ma_max + ma)*temp1_resp + &
2357 156605076 : pad_resp((md - 1)*ma_max + ma)*temp1
2358 211444164 : temp4 = temp4 + teresp*prim(p_index)
2359 : END DO !ma
2360 : END DO !mb
2361 : END DO !mc
2362 : END DO !md
2363 : ELSE
2364 : p_index = 0
2365 : temp4 = 0.0_dp
2366 1830528 : DO md = 1, md_max
2367 3917916 : DO mc = 1, mc_max
2368 5776452 : DO mb = 1, mb_max
2369 2595276 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2370 2595276 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2371 9136800 : DO ma = 1, ma_max
2372 4454136 : p_index = p_index + 1
2373 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2374 4454136 : temp3*pac((mc - 1)*ma_max + ma)
2375 7049412 : temp4 = temp4 + teresp*prim(p_index)
2376 : END DO !ma
2377 : END DO !mb
2378 : END DO !mc
2379 : END DO !md
2380 : END IF
2381 :
2382 6323868 : j = l
2383 6323868 : i = MOD(coord - 1, 3) + 1
2384 6323868 : tmp_virial(i, j) = tmp_virial(i, j) - temp4
2385 6323868 : END SUBROUTINE update_virial
2386 :
2387 : #:include 'hfx_get_pmax_val.fypp'
2388 :
2389 : END MODULE hfx_derivatives
|