Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate HFX energy and potential
10 : !> \par History
11 : !> 11.2006 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_energy_potential
15 : USE admm_types, ONLY: get_admm_env
16 : USE atomic_kind_types, ONLY: atomic_kind_type, &
17 : get_atomic_kind_set
18 : USE bibliography, ONLY: cite_reference, &
19 : guidon2008, &
20 : guidon2009
21 : USE cell_types, ONLY: cell_type, &
22 : pbc
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_files, ONLY: close_file, &
25 : open_file
26 : USE cp_log_handling, ONLY: cp_get_default_logger, &
27 : cp_logger_type
28 : USE cp_output_handling, ONLY: cp_p_file, &
29 : cp_print_key_finished_output, &
30 : cp_print_key_should_output, &
31 : cp_print_key_unit_nr
32 : USE message_passing, ONLY: mp_para_env_type
33 : USE cp_dbcsr_api, ONLY: dbcsr_copy, &
34 : dbcsr_dot, &
35 : dbcsr_get_matrix_type, &
36 : dbcsr_p_type, &
37 : dbcsr_type_antisymmetric
38 : USE gamma, ONLY: init_md_ftable
39 : USE hfx_communication, ONLY: distribute_ks_matrix, &
40 : get_atomic_block_maps, &
41 : get_full_density
42 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
43 : hfx_add_single_cache_element, &
44 : hfx_decompress_first_cache, &
45 : hfx_flush_last_cache, &
46 : hfx_get_mult_cache_elements, &
47 : hfx_get_single_cache_element, &
48 : hfx_reset_cache_and_container
49 : USE hfx_contract_block, ONLY: contract_block
50 : USE hfx_libint_interface, ONLY: evaluate_eri
51 : USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
52 : hfx_load_balance, &
53 : hfx_update_load_balance
54 : USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
55 : build_pair_list, &
56 : build_pair_list_pgf, &
57 : build_pgf_product_list, &
58 : pgf_product_list_size
59 : USE hfx_screening_methods, ONLY: calc_pair_dist_radii, &
60 : calc_screening_functions, &
61 : update_pmax_mat
62 : USE hfx_types, ONLY: &
63 : alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
64 : hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
65 : hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
66 : hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
67 : hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
68 : log_zero, pair_list_type, pair_set_list_type
69 : USE input_constants, ONLY: do_potential_mix_cl_trunc, &
70 : do_potential_truncated, &
71 : hfx_do_eval_energy
72 : USE input_section_types, ONLY: section_vals_type
73 : USE kinds, ONLY: default_string_length, &
74 : dp, &
75 : int_8
76 : USE kpoint_types, ONLY: get_kpoint_info, &
77 : kpoint_type
78 : USE libint_wrapper, ONLY: cp_libint_t
79 : USE machine, ONLY: m_flush, &
80 : m_memory, &
81 : m_walltime
82 : USE mathconstants, ONLY: fac
83 : USE orbital_pointers, ONLY: nco, &
84 : ncoset, &
85 : nso
86 : USE particle_types, ONLY: particle_type
87 : USE qs_environment_types, ONLY: get_qs_env, &
88 : qs_environment_type
89 : USE qs_ks_types, ONLY: get_ks_env, &
90 : qs_ks_env_type
91 : USE t_c_g0, ONLY: init
92 : USE util, ONLY: sort
93 :
94 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
95 :
96 : #include "./base/base_uses.f90"
97 :
98 : IMPLICIT NONE
99 : PRIVATE
100 :
101 : PUBLIC :: integrate_four_center, coulomb4
102 :
103 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
104 :
105 : !***
106 :
107 : CONTAINS
108 :
109 : ! **************************************************************************************************
110 : !> \brief computes four center integrals for a full basis set and updates the
111 : !> Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
112 : !> \param qs_env ...
113 : !> \param x_data ...
114 : !> \param ks_matrix ...
115 : !> \param ehfx energy calculated with the updated HFX matrix
116 : !> \param rho_ao density matrix in ao basis
117 : !> \param hfx_section input_section HFX
118 : !> \param para_env ...
119 : !> \param geometry_did_change flag that indicates we have to recalc integrals
120 : !> \param irep Index for the HFX replica
121 : !> \param distribute_fock_matrix Flag that indicates whether to communicate the
122 : !> new fock matrix or not
123 : !> \param ispin ...
124 : !> \par History
125 : !> 06.2007 created [Manuel Guidon]
126 : !> 08.2007 optimized load balance [Manuel Guidon]
127 : !> 09.2007 new parallelization [Manuel Guidon]
128 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
129 : !> 12.2017 major bug fix. removed wrong cycle that was caussing segfault.
130 : !> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
131 : !> [Tobias Binninger + Valery Weber]
132 : !> \author Manuel Guidon
133 : ! **************************************************************************************************
134 34235 : SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
135 : geometry_did_change, irep, distribute_fock_matrix, &
136 : ispin)
137 :
138 : TYPE(qs_environment_type), POINTER :: qs_env
139 : TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data
140 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
141 : REAL(KIND=dp), INTENT(OUT) :: ehfx
142 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
143 : TYPE(section_vals_type), POINTER :: hfx_section
144 : TYPE(mp_para_env_type), POINTER :: para_env
145 : LOGICAL :: geometry_did_change
146 : INTEGER :: irep
147 : LOGICAL, INTENT(IN) :: distribute_fock_matrix
148 : INTEGER, INTENT(IN) :: ispin
149 :
150 : CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center'
151 :
152 : CHARACTER(len=default_string_length) :: eps_scaling_str, eps_schwarz_min_str
153 : INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
154 : atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
155 : buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
156 : handle_getP, handle_load, handle_main, i, i_list_ij, i_list_kl, i_set_list_ij, &
157 : i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
158 : i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
159 : iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
160 : katom_end
161 : INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
162 : latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
163 : my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
164 : nneighbors, nseta, nsetb, nsgf_max, nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
165 : sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
166 : sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
167 : INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
168 : mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
169 : mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
170 : memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
171 : neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
172 : shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
173 : shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
174 : shm_storage_counter_integrals, stor_count_int_disk
175 : INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
176 : tmp_i8(8)
177 34235 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
178 68470 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global, nimages, &
179 34235 : tmp_index
180 34235 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
181 68470 : ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
182 68470 : INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
183 34235 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
184 : INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
185 34235 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
186 34235 : INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
187 : INTEGER, SAVE :: shm_number_of_p_entries
188 : LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
189 : do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
190 : ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
191 34235 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
192 : REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
193 : compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
194 : eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
195 : max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
196 : pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
197 : spherical_estimate, symm_fac
198 34235 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
199 68470 : ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
200 34235 : primitive_integrals
201 34235 : REAL(dp), DIMENSION(:), POINTER :: p_work
202 34235 : REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
203 68470 : full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
204 34235 : shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd
205 34235 : REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
206 34235 : sphi_c_ext_set, sphi_d_ext_set
207 34235 : REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
208 34235 : sphi_d_ext
209 : REAL(KIND=dp) :: coeffs_kind_max0
210 34235 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
211 : TYPE(cell_type), POINTER :: cell
212 : TYPE(cp_libint_t) :: private_lib
213 : TYPE(cp_logger_type), POINTER :: logger
214 34235 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks_aux_fit_hfx
215 : TYPE(dft_control_type), POINTER :: dft_control
216 : TYPE(hfx_basis_info_type), POINTER :: basis_info
217 34235 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
218 34235 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches, integral_caches_disk
219 : TYPE(hfx_cache_type), POINTER :: maxval_cache, maxval_cache_disk
220 34235 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers, &
221 34235 : integral_containers_disk
222 : TYPE(hfx_container_type), POINTER :: maxval_container, maxval_container_disk
223 : TYPE(hfx_distribution), POINTER :: distribution_energy
224 : TYPE(hfx_general_type) :: general_parameter
225 : TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
226 : TYPE(hfx_memory_type), POINTER :: memory_parameter
227 34235 : TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
228 34235 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
229 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
230 34235 : DIMENSION(:) :: pgf_product_list
231 : TYPE(hfx_potential_type) :: potential_parameter
232 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
233 34235 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
234 34235 : tmp_screen_pgf1, tmp_screen_pgf2
235 : TYPE(hfx_screen_coeff_type), &
236 34235 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
237 : TYPE(hfx_screen_coeff_type), &
238 34235 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
239 : TYPE(hfx_screening_type) :: screening_parameter
240 34235 : TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
241 : TYPE(hfx_type), POINTER :: actual_x_data, shm_master_x_data
242 : TYPE(kpoint_type), POINTER :: kpoints
243 : TYPE(pair_list_type) :: list_ij, list_kl
244 : TYPE(pair_set_list_type), ALLOCATABLE, &
245 34235 : DIMENSION(:) :: set_list_ij, set_list_kl
246 34235 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
247 : TYPE(qs_ks_env_type), POINTER :: ks_env
248 :
249 34235 : NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
250 :
251 34235 : CALL timeset(routineN, handle)
252 :
253 34235 : CALL cite_reference(Guidon2008)
254 34235 : CALL cite_reference(Guidon2009)
255 :
256 34235 : ehfx = 0.0_dp
257 :
258 : ! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
259 34235 : my_geo_change = geometry_did_change
260 34235 : IF (ispin == 2) my_geo_change = .FALSE.
261 :
262 34235 : logger => cp_get_default_logger()
263 :
264 34235 : is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
265 :
266 34235 : IF (my_geo_change) THEN
267 2098 : CALL m_memory(memsize_before)
268 2098 : CALL para_env%max(memsize_before)
269 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
270 2098 : extension=".scfLog")
271 2098 : IF (iw > 0) THEN
272 : WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
273 388 : "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
274 388 : CALL m_flush(iw)
275 : END IF
276 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
277 2098 : "HF_INFO")
278 : END IF
279 :
280 34235 : CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
281 :
282 34235 : NULLIFY (cell_to_index)
283 34235 : CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
284 34235 : IF (do_kpoints) THEN
285 0 : CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
286 0 : CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
287 0 : CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
288 : END IF
289 :
290 : !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
291 34235 : nkind = SIZE(atomic_kind_set, 1)
292 34235 : l_max = 0
293 97023 : DO ikind = 1, nkind
294 287887 : l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
295 : END DO
296 34235 : l_max = 4*l_max
297 34235 : CALL init_md_ftable(l_max)
298 :
299 34235 : IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
300 : x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
301 9162 : IF (l_max > init_t_c_g0_lmax) THEN
302 298 : IF (para_env%is_source()) THEN
303 149 : CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
304 : END IF
305 298 : CALL init(l_max, unit_id, para_env%mepos, para_env)
306 298 : IF (para_env%is_source()) THEN
307 149 : CALL close_file(unit_id)
308 : END IF
309 298 : init_t_c_g0_lmax = l_max
310 : END IF
311 : END IF
312 :
313 34235 : n_threads = 1
314 34235 : !$ n_threads = omp_get_max_threads()
315 :
316 34235 : shm_neris_total = 0
317 34235 : shm_nprim_ints = 0
318 34235 : shm_neris_onthefly = 0
319 34235 : shm_storage_counter_integrals = 0
320 34235 : shm_stor_count_int_disk = 0
321 34235 : shm_neris_incore = 0
322 34235 : shm_neris_disk = 0
323 34235 : shm_stor_count_max_val = 0
324 :
325 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
326 : !$OMP SHARED(qs_env,&
327 : !$OMP x_data,&
328 : !$OMP ks_matrix,&
329 : !$OMP ehfx,&
330 : !$OMP rho_ao,&
331 : !$OMP matrix_ks_aux_fit_hfx,&
332 : !$OMP hfx_section,&
333 : !$OMP para_env,&
334 : !$OMP my_geo_change,&
335 : !$OMP irep,&
336 : !$OMP distribute_fock_matrix,&
337 : !$OMP cell_to_index,&
338 : !$OMP ncoset,&
339 : !$OMP nso,&
340 : !$OMP nco,&
341 : !$OMP full_ks_alpha,&
342 : !$OMP full_ks_beta,&
343 : !$OMP n_threads,&
344 : !$OMP full_density_alpha,&
345 : !$OMP full_density_beta,&
346 : !$OMP shm_initial_p,&
347 : !$OMP shm_is_assoc_atomic_block,&
348 : !$OMP shm_number_of_p_entries,&
349 : !$OMP shm_neris_total,&
350 : !$OMP shm_neris_onthefly,&
351 : !$OMP shm_storage_counter_integrals,&
352 : !$OMP shm_stor_count_int_disk,&
353 : !$OMP shm_neris_incore,&
354 : !$OMP shm_neris_disk,&
355 : !$OMP shm_nprim_ints,&
356 : !$OMP shm_stor_count_max_val,&
357 : !$OMP cell,&
358 : !$OMP screen_coeffs_set,&
359 : !$OMP screen_coeffs_kind,&
360 : !$OMP screen_coeffs_pgf,&
361 : !$OMP pgf_product_list_size,&
362 : !$OMP radii_pgf,&
363 : !$OMP nkind,&
364 : !$OMP ispin,&
365 : !$OMP is_anti_symmetric,&
366 : !$OMP shm_atomic_block_offset,&
367 : !$OMP shm_set_offset,&
368 : !$OMP shm_block_offset,&
369 : !$OMP shm_task_counter,&
370 : !$OMP shm_task_list,&
371 : !$OMP shm_total_bins,&
372 : !$OMP shm_master_x_data,&
373 : !$OMP shm_pmax_atom,&
374 : !$OMP shm_pmax_block,&
375 : !$OMP shm_atomic_pair_list,&
376 : !$OMP shm_mem_compression_counter,&
377 : !$OMP do_print_load_balance_info) &
378 : !$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
379 : !$OMP general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
380 : !$OMP basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
381 : !$OMP neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
382 : !$OMP compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
383 : !$OMP max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
384 : !$OMP nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
385 : !$OMP kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,nspins,max_contraction,&
386 : !$OMP max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
387 : !$OMP pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
388 : !$OMP maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
389 : !$OMP log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
390 : !$OMP p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
391 : !$OMP integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
392 : !$OMP set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
393 : !$OMP my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
394 : !$OMP katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
395 : !$OMP i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
396 : !$OMP lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
397 : !$OMP kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
398 : !$OMP nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
399 : !$OMP sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
400 : !$OMP offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
401 : !$OMP mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
402 : !$OMP sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
403 : !$OMP spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
404 : !$OMP tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
405 : !$OMP stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
406 : !$OMP ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
407 34235 : !$OMP etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str)
408 :
409 : ln_10 = LOG(10.0_dp)
410 : i_thread = 0
411 : !$ i_thread = omp_get_thread_num()
412 :
413 : actual_x_data => x_data(irep, i_thread + 1)
414 : !$OMP MASTER
415 : shm_master_x_data => x_data(irep, 1)
416 : !$OMP END MASTER
417 : !$OMP BARRIER
418 :
419 : do_periodic = actual_x_data%periodic_parameter%do_periodic
420 :
421 : IF (do_periodic) THEN
422 : ! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
423 : actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
424 : CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
425 : cell, i_thread)
426 : END IF
427 :
428 : screening_parameter = actual_x_data%screening_parameter
429 : potential_parameter = actual_x_data%potential_parameter
430 :
431 : general_parameter = actual_x_data%general_parameter
432 : load_balance_parameter => actual_x_data%load_balance_parameter
433 : memory_parameter => actual_x_data%memory_parameter
434 :
435 : cache_size = memory_parameter%cache_size
436 : bits_max_val = memory_parameter%bits_max_val
437 :
438 : basis_parameter => actual_x_data%basis_parameter
439 : basis_info => actual_x_data%basis_info
440 :
441 : treat_lsd_in_core = general_parameter%treat_lsd_in_core
442 :
443 : ncpu = para_env%num_pe
444 : n_processes = ncpu*n_threads
445 :
446 : !! initialize some counters
447 : neris_total = 0_int_8
448 : neris_incore = 0_int_8
449 : neris_disk = 0_int_8
450 : neris_onthefly = 0_int_8
451 : mem_eris = 0_int_8
452 : mem_eris_disk = 0_int_8
453 : mem_max_val = 0_int_8
454 : compression_factor = 0.0_dp
455 : compression_factor_disk = 0.0_dp
456 : nprim_ints = 0_int_8
457 : neris_tmp = 0_int_8
458 : max_val_memory = 1_int_8
459 :
460 : max_am = basis_info%max_am
461 :
462 : CALL get_qs_env(qs_env=qs_env, &
463 : atomic_kind_set=atomic_kind_set, &
464 : particle_set=particle_set, &
465 : dft_control=dft_control)
466 : IF (dft_control%do_admm) CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
467 :
468 : do_p_screening = screening_parameter%do_initial_p_screening
469 : ! Special treatment for MP2 with initial density screening
470 : IF (do_p_screening) THEN
471 : IF (ASSOCIATED(qs_env%mp2_env)) THEN
472 : IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer)) THEN
473 : do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
474 : ELSE
475 : do_p_screening = .FALSE.
476 : END IF
477 : END IF
478 : END IF
479 : max_set = basis_info%max_set
480 : natom = SIZE(particle_set, 1)
481 :
482 : ! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
483 : nkimages = dft_control%nimages
484 : CPASSERT(nkimages == 1)
485 :
486 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
487 :
488 : !! precompute maximum nco and allocate scratch
489 : ncos_max = 0
490 : nsgf_max = 0
491 : DO iatom = 1, natom
492 : ikind = kind_of(iatom)
493 : nseta = basis_parameter(ikind)%nset
494 : npgfa => basis_parameter(ikind)%npgf
495 : la_max => basis_parameter(ikind)%lmax
496 : nsgfa => basis_parameter(ikind)%nsgf
497 : DO iset = 1, nseta
498 : ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
499 : nsgf_max = MAX(nsgf_max, nsgfa(iset))
500 : END DO
501 : END DO
502 : !! Allocate the arrays for the integrals.
503 : ALLOCATE (primitive_integrals(nsgf_max**4))
504 : primitive_integrals = 0.0_dp
505 :
506 : ALLOCATE (pbd_buf(nsgf_max**2))
507 : ALLOCATE (pbc_buf(nsgf_max**2))
508 : ALLOCATE (pad_buf(nsgf_max**2))
509 : ALLOCATE (pac_buf(nsgf_max**2))
510 : ALLOCATE (kbd_buf(nsgf_max**2))
511 : ALLOCATE (kbc_buf(nsgf_max**2))
512 : ALLOCATE (kad_buf(nsgf_max**2))
513 : ALLOCATE (kac_buf(nsgf_max**2))
514 : ALLOCATE (ee_work(ncos_max**4))
515 : ALLOCATE (ee_work2(ncos_max**4))
516 : ALLOCATE (ee_buffer1(ncos_max**4))
517 : ALLOCATE (ee_buffer2(ncos_max**4))
518 : ALLOCATE (ee_primitives_tmp(nsgf_max**4))
519 :
520 : nspins = dft_control%nspins
521 :
522 : ALLOCATE (max_contraction(max_set, natom))
523 :
524 : max_contraction = 0.0_dp
525 : max_pgf = 0
526 : DO jatom = 1, natom
527 : jkind = kind_of(jatom)
528 : lb_max => basis_parameter(jkind)%lmax
529 : nsetb = basis_parameter(jkind)%nset
530 : npgfb => basis_parameter(jkind)%npgf
531 : first_sgfb => basis_parameter(jkind)%first_sgf
532 : sphi_b => basis_parameter(jkind)%sphi
533 : nsgfb => basis_parameter(jkind)%nsgf
534 : DO jset = 1, nsetb
535 : ! takes the primitive to contracted transformation into account
536 : ncob = npgfb(jset)*ncoset(lb_max(jset))
537 : sgfb = first_sgfb(1, jset)
538 : ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
539 : ! the maximum value after multiplication with sphi_b
540 : max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
541 : max_pgf = MAX(max_pgf, npgfb(jset))
542 : END DO
543 : END DO
544 :
545 : ! ** Allocate buffers for pgf_lists
546 : nneighbors = SIZE(actual_x_data%neighbor_cells)
547 : ALLOCATE (pgf_list_ij(max_pgf**2))
548 : ALLOCATE (pgf_list_kl(max_pgf**2))
549 : ! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
550 : !$OMP ATOMIC READ
551 : tmp_i4 = pgf_product_list_size
552 : ALLOCATE (pgf_product_list(tmp_i4))
553 : ALLOCATE (nimages(max_pgf**2))
554 :
555 : DO i = 1, max_pgf**2
556 : ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
557 : ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
558 : END DO
559 : !$OMP BARRIER
560 : !$OMP MASTER
561 : !! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
562 : IF (my_geo_change) THEN
563 : CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
564 : shm_master_x_data%is_assoc_atomic_block, &
565 : shm_master_x_data%number_of_p_entries, &
566 : para_env, &
567 : shm_master_x_data%atomic_block_offset, &
568 : shm_master_x_data%set_offset, &
569 : shm_master_x_data%block_offset, &
570 : shm_master_x_data%map_atoms_to_cpus, &
571 : nkind)
572 :
573 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
574 :
575 : !! Get occupation of KS-matrix
576 : ks_fully_occ = .TRUE.
577 : outer: DO iatom = 1, natom
578 : DO jatom = iatom, natom
579 : IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
580 : ks_fully_occ = .FALSE.
581 : EXIT outer
582 : END IF
583 : END DO
584 : END DO outer
585 :
586 : IF (.NOT. ks_fully_occ) THEN
587 : CALL cp_warn(__LOCATION__, &
588 : "The Kohn Sham matrix is not 100% occupied. This "// &
589 : "may result in incorrect Hartree-Fock results. Setting "// &
590 : "MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
591 : "fully occupied KS matrix. For more information "// &
592 : "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
593 : END IF
594 : END IF
595 :
596 : ! ** Set pointers
597 : shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
598 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
599 : shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
600 : shm_set_offset => shm_master_x_data%set_offset
601 : shm_block_offset => shm_master_x_data%block_offset
602 : !$OMP END MASTER
603 : !$OMP BARRIER
604 :
605 : ! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
606 : ! ** Fock and density Matrices (shared)
607 : subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
608 : ! ** if non restricted ==> alpha/beta spin
609 : IF (.NOT. treat_lsd_in_core) THEN
610 : IF (nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
611 : END IF
612 : ! ** Initial P only MAX(alpha,beta) (shared)
613 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
614 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
615 : END IF
616 : ! ** In core forces require their own initial P
617 : IF (screening_parameter%do_p_screening_forces) THEN
618 : IF (memory_parameter%treat_forces_in_core) THEN
619 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
620 : END IF
621 : END IF
622 : ! ** primitive buffer (not shared by the threads)
623 : subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
624 : ! ** density + fock buffers
625 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
626 : ! ** screening functions (shared)
627 : ! ** coeffs_pgf
628 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
629 : ! ** coeffs_set
630 : subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
631 : ! ** coeffs_kind
632 : subtr_size_mb = subtr_size_mb + nkind**2
633 : ! ** radii_pgf
634 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
635 :
636 : ! ** is_assoc (shared)
637 : subtr_size_mb = subtr_size_mb + natom**2
638 :
639 : ! ** pmax_atom (shared)
640 : IF (do_p_screening) THEN
641 : subtr_size_mb = subtr_size_mb + natom**2
642 : END IF
643 : IF (screening_parameter%do_p_screening_forces) THEN
644 : IF (memory_parameter%treat_forces_in_core) THEN
645 : subtr_size_mb = subtr_size_mb + natom**2
646 : END IF
647 : END IF
648 :
649 : ! ** Convert into MiB's
650 : subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
651 :
652 : ! ** Subtracting all these buffers from MAX_MEMORY yields the amount
653 : ! ** of RAM that is left for the compressed integrals. When using threads
654 : ! ** all the available memory is shared among all n_threads. i.e. the faster
655 : ! ** ones can steal from the slower ones
656 :
657 : CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
658 :
659 : use_disk_storage = .FALSE.
660 : counter = 0_int_8
661 : do_disk_storage = memory_parameter%do_disk_storage
662 : IF (do_disk_storage) THEN
663 : maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
664 : maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
665 :
666 : integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
667 : integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
668 : END IF
669 :
670 : IF (my_geo_change) THEN
671 : memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
672 : END IF
673 :
674 : IF (my_geo_change) THEN
675 : memory_parameter%recalc_forces = .TRUE.
676 : ELSE
677 : IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
678 : END IF
679 :
680 : !! Get screening parameter
681 : eps_schwarz = screening_parameter%eps_schwarz
682 : IF (eps_schwarz <= 0.0_dp) THEN
683 : log10_eps_schwarz = log_zero
684 : ELSE
685 : log10_eps_schwarz = LOG10(eps_schwarz)
686 : END IF
687 : !! get storage epsilon
688 : eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
689 :
690 : !! If we have a hybrid functional, we may need only a fraction of exact exchange
691 : hf_fraction = general_parameter%fraction
692 :
693 : !! The number of integrals that fit into the given MAX_MEMORY
694 :
695 : !! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
696 : potential_parameter = actual_x_data%potential_parameter
697 :
698 : !! Variable to check if we calculate the integrals in-core or on the fly
699 : !! If TRUE -> on the fly
700 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
701 : buffer_overflow = .FALSE.
702 : ELSE
703 : buffer_overflow = .TRUE.
704 : END IF
705 : logger => cp_get_default_logger()
706 :
707 : private_lib = actual_x_data%lib
708 :
709 : !! Helper array to map local basis function indices to global ones
710 : ALLOCATE (last_sgf_global(0:natom))
711 : last_sgf_global(0) = 0
712 : DO iatom = 1, natom
713 : ikind = kind_of(iatom)
714 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
715 : END DO
716 : !$OMP BARRIER
717 : !$OMP MASTER
718 : !! Let master thread get the density (avoid problems with MPI)
719 : !! Get the full density from all the processors
720 : NULLIFY (full_density_alpha, full_density_beta)
721 : ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
722 : IF (.NOT. treat_lsd_in_core .OR. nspins == 1) THEN
723 : CALL timeset(routineN//"_getP", handle_getP)
724 : DO img = 1, nkimages
725 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
726 : shm_master_x_data%block_offset, &
727 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
728 : END DO
729 :
730 : IF (nspins == 2) THEN
731 : ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
732 : DO img = 1, nkimages
733 : CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
734 : shm_master_x_data%block_offset, &
735 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
736 : END DO
737 : END IF
738 : CALL timestop(handle_getP)
739 :
740 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
741 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
742 : !! a changed geometry
743 : NULLIFY (shm_initial_p)
744 : IF (do_p_screening) THEN
745 : shm_initial_p => shm_master_x_data%initial_p
746 : shm_pmax_atom => shm_master_x_data%pmax_atom
747 : IF (my_geo_change) THEN
748 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
749 : shm_master_x_data%map_atom_to_kind_atom, &
750 : shm_master_x_data%set_offset, &
751 : shm_master_x_data%atomic_block_offset, &
752 : shm_pmax_atom, &
753 : full_density_alpha, full_density_beta, &
754 : natom, kind_of, basis_parameter, &
755 : nkind, shm_master_x_data%is_assoc_atomic_block)
756 : END IF
757 : END IF
758 : ELSE
759 : IF (do_p_screening) THEN
760 : CALL timeset(routineN//"_getP", handle_getP)
761 : DO img = 1, nkimages
762 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
763 : shm_master_x_data%block_offset, &
764 : kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
765 : rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
766 : END DO
767 : CALL timestop(handle_getP)
768 :
769 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
770 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
771 : !! a changed geometry
772 : NULLIFY (shm_initial_p)
773 : shm_initial_p => actual_x_data%initial_p
774 : shm_pmax_atom => shm_master_x_data%pmax_atom
775 : IF (my_geo_change) THEN
776 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
777 : shm_master_x_data%map_atom_to_kind_atom, &
778 : shm_master_x_data%set_offset, &
779 : shm_master_x_data%atomic_block_offset, &
780 : shm_pmax_atom, &
781 : full_density_alpha, full_density_beta, &
782 : natom, kind_of, basis_parameter, &
783 : nkind, shm_master_x_data%is_assoc_atomic_block)
784 : END IF
785 : END IF
786 : ! ** Now get the density(ispin)
787 : DO img = 1, nkimages
788 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
789 : shm_master_x_data%block_offset, &
790 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
791 : antisymmetric=is_anti_symmetric)
792 : END DO
793 : END IF
794 :
795 : NULLIFY (full_ks_alpha, full_ks_beta)
796 : ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
797 : full_ks_alpha => shm_master_x_data%full_ks_alpha
798 : full_ks_alpha = 0.0_dp
799 :
800 : IF (.NOT. treat_lsd_in_core) THEN
801 : IF (nspins == 2) THEN
802 : ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
803 : full_ks_beta => shm_master_x_data%full_ks_beta
804 : full_ks_beta = 0.0_dp
805 : END IF
806 : END IF
807 :
808 : !! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
809 : !! for far field estimates. The update is only performed if the geomtry of the system changed.
810 : !! If the system is periodic, then the corresponding routines are called and some variables
811 : !! are initialized
812 :
813 : !$OMP END MASTER
814 : !$OMP BARRIER
815 :
816 : IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
817 : CALL calc_pair_dist_radii(qs_env, basis_parameter, &
818 : shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
819 : n_threads, i_thread)
820 : !$OMP BARRIER
821 : CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
822 : shm_master_x_data%screen_funct_coeffs_set, &
823 : shm_master_x_data%screen_funct_coeffs_kind, &
824 : shm_master_x_data%screen_funct_coeffs_pgf, &
825 : shm_master_x_data%pair_dist_radii_pgf, &
826 : max_set, max_pgf, n_threads, i_thread, p_work)
827 :
828 : !$OMP MASTER
829 : shm_master_x_data%screen_funct_is_initialized = .TRUE.
830 : !$OMP END MASTER
831 : END IF
832 : !$OMP BARRIER
833 :
834 : !$OMP MASTER
835 : screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
836 : screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
837 : screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
838 : radii_pgf => shm_master_x_data%pair_dist_radii_pgf
839 : !$OMP END MASTER
840 : !$OMP BARRIER
841 :
842 : !! Initialize a prefactor depending on the fraction and the number of spins
843 : IF (nspins == 1) THEN
844 : fac = 0.5_dp*hf_fraction
845 : ELSE
846 : fac = 1.0_dp*hf_fraction
847 : END IF
848 :
849 : !! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
850 : !! an optional parameter. Of course, this is only done if the geometry did change
851 : !$OMP BARRIER
852 : !$OMP MASTER
853 : CALL timeset(routineN//"_load", handle_load)
854 : !$OMP END MASTER
855 : !$OMP BARRIER
856 : IF (my_geo_change) THEN
857 : IF (actual_x_data%b_first_load_balance_energy) THEN
858 : CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
859 : screen_coeffs_set, screen_coeffs_kind, &
860 : shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
861 : kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
862 : cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
863 : nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
864 : actual_x_data%b_first_load_balance_energy = .FALSE.
865 : ELSE
866 : CALL hfx_update_load_balance(actual_x_data, para_env, &
867 : load_balance_parameter, &
868 : i_thread, n_threads, hfx_do_eval_energy)
869 : END IF
870 : END IF
871 : !$OMP BARRIER
872 : !$OMP MASTER
873 : CALL timestop(handle_load)
874 : !$OMP END MASTER
875 : !$OMP BARRIER
876 :
877 : !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
878 : !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
879 : !!
880 : !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
881 : !!
882 : !! by iterating in the following way
883 : !!
884 : !! DO iatom=1,natom and DO katom=1,natom
885 : !! DO jatom=iatom,natom DO latom=katom,natom
886 : !!
887 : !! The third symmetry
888 : !!
889 : !! (ab|cd) = (cd|ab)
890 : !!
891 : !! is taken into account by the following criterion:
892 : !!
893 : !! IF(katom+latom<=iatom+jatom) THEN
894 : !! IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
895 : !!
896 : !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
897 : !! factor ( symm_fac ).
898 : !!
899 : !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
900 : !! different hierarchies of short range screening matrices.
901 : !!
902 : !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
903 : !! defined as :
904 : !!
905 : !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
906 : !!
907 : !! This tells the process where to start the main loops and how many bunches of integrals it has to
908 : !! calculate. The original parallelization is a simple modulo distribution that is binned and
909 : !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
910 : !! we need to know which was the initial cpu_id.
911 : !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
912 : !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
913 : !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
914 :
915 : do_dynamic_load_balancing = .TRUE.
916 :
917 : IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
918 :
919 : IF (do_dynamic_load_balancing) THEN
920 : my_bin_size = SIZE(actual_x_data%distribution_energy)
921 : ELSE
922 : my_bin_size = 1
923 : END IF
924 : !! We do not need the containers if MAX_MEM = 0
925 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
926 : !! IF new md step -> reinitialize containers
927 : IF (my_geo_change) THEN
928 : CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
929 : CALL alloc_containers(actual_x_data%store_ints, my_bin_size)
930 :
931 : DO bin = 1, my_bin_size
932 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
933 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
934 : CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
935 : DO i = 1, 64
936 : CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
937 : END DO
938 : END DO
939 : END IF
940 :
941 : !! Decompress the first cache for maxvals and integrals
942 : IF (.NOT. my_geo_change) THEN
943 : DO bin = 1, my_bin_size
944 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
945 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
946 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
947 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
948 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
949 : memory_parameter%actual_memory_usage, .FALSE.)
950 : DO i = 1, 64
951 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
952 : memory_parameter%actual_memory_usage, .FALSE.)
953 : END DO
954 : END DO
955 : END IF
956 : END IF
957 :
958 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
959 : !$OMP CRITICAL(hfxenergy_io_critical)
960 : !! If we do disk storage, we have to initialize the containers/caches
961 : IF (do_disk_storage) THEN
962 : !! IF new md step -> reinitialize containers
963 : IF (my_geo_change) THEN
964 : CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
965 : DO i = 1, 64
966 : CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
967 : END DO
968 : END IF
969 : !! Decompress the first cache for maxvals and integrals
970 : IF (.NOT. my_geo_change) THEN
971 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
972 : memory_parameter%actual_memory_usage_disk, .TRUE.)
973 : DO i = 1, 64
974 : CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
975 : memory_parameter%actual_memory_usage_disk, .TRUE.)
976 : END DO
977 : END IF
978 : END IF
979 : !$OMP END CRITICAL(hfxenergy_io_critical)
980 :
981 : !$OMP BARRIER
982 : !$OMP MASTER
983 :
984 : IF (do_dynamic_load_balancing) THEN
985 : ! ** Lets construct the task list
986 : shm_total_bins = 0
987 : DO i = 1, n_threads
988 : shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
989 : END DO
990 : ALLOCATE (tmp_task_list(shm_total_bins))
991 : shm_task_counter = 0
992 : DO i = 1, n_threads
993 : DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
994 : shm_task_counter = shm_task_counter + 1
995 : tmp_task_list(shm_task_counter)%thread_id = i
996 : tmp_task_list(shm_task_counter)%bin_id = bin
997 : tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
998 : END DO
999 : END DO
1000 :
1001 : ! ** Now sort the task list
1002 : ALLOCATE (tmp_task_list_cost(shm_total_bins))
1003 : ALLOCATE (tmp_index(shm_total_bins))
1004 :
1005 : DO i = 1, shm_total_bins
1006 : tmp_task_list_cost(i) = tmp_task_list(i)%cost
1007 : END DO
1008 :
1009 : CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
1010 :
1011 : ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
1012 :
1013 : DO i = 1, shm_total_bins
1014 : shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
1015 : END DO
1016 :
1017 : shm_task_list => shm_master_x_data%task_list
1018 : shm_task_counter = 0
1019 :
1020 : DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
1021 : END IF
1022 : !$OMP END MASTER
1023 : !$OMP BARRIER
1024 :
1025 : IF (my_bin_size > 0) THEN
1026 : maxval_container => actual_x_data%store_ints%maxval_container(1)
1027 : maxval_cache => actual_x_data%store_ints%maxval_cache(1)
1028 :
1029 : integral_containers => actual_x_data%store_ints%integral_containers(:, 1)
1030 : integral_caches => actual_x_data%store_ints%integral_caches(:, 1)
1031 : END IF
1032 :
1033 : !$OMP BARRIER
1034 : !$OMP MASTER
1035 : CALL timeset(routineN//"_main", handle_main)
1036 : !$OMP END MASTER
1037 : !$OMP BARRIER
1038 :
1039 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
1040 : ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
1041 : ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
1042 :
1043 : !$OMP BARRIER
1044 : !$OMP MASTER
1045 :
1046 : !! precalculate maximum density matrix elements in blocks
1047 : actual_x_data%pmax_block = 0.0_dp
1048 : shm_pmax_block => actual_x_data%pmax_block
1049 : IF (do_p_screening) THEN
1050 : DO iatom_block = 1, SIZE(actual_x_data%blocks)
1051 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1052 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1053 : DO jatom_block = 1, SIZE(actual_x_data%blocks)
1054 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1055 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1056 : shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
1057 : END DO
1058 : END DO
1059 : END IF
1060 : shm_atomic_pair_list => actual_x_data%atomic_pair_list
1061 : IF (my_geo_change) THEN
1062 : CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
1063 : do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
1064 : actual_x_data%blocks)
1065 : END IF
1066 :
1067 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1068 : ! reset timings for the new SCF round
1069 : IF (my_geo_change) THEN
1070 : DO bin = 1, my_bin_size
1071 : actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
1072 : actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
1073 : actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
1074 : END DO
1075 : END IF
1076 : !$OMP END MASTER
1077 : !$OMP BARRIER
1078 :
1079 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1080 : nblocks = load_balance_parameter%nblocks
1081 :
1082 : bins_left = .TRUE.
1083 : do_it = .TRUE.
1084 : bin = 0
1085 : DO WHILE (bins_left)
1086 : IF (.NOT. do_dynamic_load_balancing) THEN
1087 : bin = bin + 1
1088 : IF (bin > my_bin_size) THEN
1089 : do_it = .FALSE.
1090 : bins_left = .FALSE.
1091 : ELSE
1092 : do_it = .TRUE.
1093 : bins_left = .TRUE.
1094 : distribution_energy => actual_x_data%distribution_energy(bin)
1095 : END IF
1096 : ELSE
1097 : !$OMP CRITICAL(hfxenergy_critical)
1098 : shm_task_counter = shm_task_counter + 1
1099 : IF (shm_task_counter <= shm_total_bins) THEN
1100 : my_thread_id = shm_task_list(shm_task_counter)%thread_id
1101 : my_bin_id = shm_task_list(shm_task_counter)%bin_id
1102 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1103 : maxval_cache => x_data(irep, my_thread_id)%store_ints%maxval_cache(my_bin_id)
1104 : maxval_container => x_data(irep, my_thread_id)%store_ints%maxval_container(my_bin_id)
1105 : integral_caches => x_data(irep, my_thread_id)%store_ints%integral_caches(:, my_bin_id)
1106 : integral_containers => x_data(irep, my_thread_id)%store_ints%integral_containers(:, my_bin_id)
1107 : END IF
1108 : distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
1109 : do_it = .TRUE.
1110 : bins_left = .TRUE.
1111 : IF (my_geo_change) THEN
1112 : distribution_energy%ram_counter = HUGE(distribution_energy%ram_counter)
1113 : END IF
1114 : counter = 0_Int_8
1115 : ELSE
1116 : do_it = .FALSE.
1117 : bins_left = .FALSE.
1118 : END IF
1119 : !$OMP END CRITICAL(hfxenergy_critical)
1120 : END IF
1121 :
1122 : IF (.NOT. do_it) CYCLE
1123 : !$OMP MASTER
1124 : CALL timeset(routineN//"_bin", handle_bin)
1125 : !$OMP END MASTER
1126 : bintime_start = m_walltime()
1127 : my_istart = distribution_energy%istart
1128 : my_current_counter = 0
1129 : IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
1130 : my_istart == -1_int_8) my_istart = nblocks**4
1131 : atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
1132 : latom_block = INT(MODULO(atom_block, nblocks)) + 1
1133 : tmp_block = atom_block/nblocks
1134 : katom_block = INT(MODULO(tmp_block, nblocks)) + 1
1135 : IF (latom_block < katom_block) CYCLE
1136 : tmp_block = tmp_block/nblocks
1137 : jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1138 : tmp_block = tmp_block/nblocks
1139 : iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1140 : IF (jatom_block < iatom_block) CYCLE
1141 : my_current_counter = my_current_counter + 1
1142 : IF (my_current_counter > distribution_energy%number_of_atom_quartets) EXIT atomic_blocks
1143 :
1144 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1145 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1146 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1147 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1148 : katom_start = actual_x_data%blocks(katom_block)%istart
1149 : katom_end = actual_x_data%blocks(katom_block)%iend
1150 : latom_start = actual_x_data%blocks(latom_block)%istart
1151 : latom_end = actual_x_data%blocks(latom_block)%iend
1152 :
1153 : pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block), &
1154 : shm_pmax_block(latom_block, jatom_block), &
1155 : shm_pmax_block(latom_block, iatom_block), &
1156 : shm_pmax_block(katom_block, jatom_block))
1157 :
1158 : IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
1159 :
1160 : CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
1161 : jatom_start, jatom_end, &
1162 : kind_of, basis_parameter, particle_set, &
1163 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1164 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1165 : shm_atomic_pair_list)
1166 :
1167 : CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
1168 : latom_start, latom_end, &
1169 : kind_of, basis_parameter, particle_set, &
1170 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1171 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1172 : shm_atomic_pair_list)
1173 :
1174 : DO i_list_ij = 1, list_ij%n_element
1175 :
1176 : iatom = list_ij%elements(i_list_ij)%pair(1)
1177 : jatom = list_ij%elements(i_list_ij)%pair(2)
1178 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
1179 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
1180 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
1181 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
1182 : ra = list_ij%elements(i_list_ij)%r1
1183 : rb = list_ij%elements(i_list_ij)%r2
1184 : rab2 = list_ij%elements(i_list_ij)%dist2
1185 :
1186 : la_max => basis_parameter(ikind)%lmax
1187 : la_min => basis_parameter(ikind)%lmin
1188 : npgfa => basis_parameter(ikind)%npgf
1189 : nseta = basis_parameter(ikind)%nset
1190 : zeta => basis_parameter(ikind)%zet
1191 : nsgfa => basis_parameter(ikind)%nsgf
1192 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
1193 : nsgfl_a => basis_parameter(ikind)%nsgfl
1194 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
1195 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
1196 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
1197 :
1198 : lb_max => basis_parameter(jkind)%lmax
1199 : lb_min => basis_parameter(jkind)%lmin
1200 : npgfb => basis_parameter(jkind)%npgf
1201 : nsetb = basis_parameter(jkind)%nset
1202 : zetb => basis_parameter(jkind)%zet
1203 : nsgfb => basis_parameter(jkind)%nsgf
1204 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
1205 : nsgfl_b => basis_parameter(jkind)%nsgfl
1206 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
1207 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
1208 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
1209 :
1210 : DO i_list_kl = 1, list_kl%n_element
1211 : katom = list_kl%elements(i_list_kl)%pair(1)
1212 : latom = list_kl%elements(i_list_kl)%pair(2)
1213 :
1214 : IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1215 : IF (((iatom + jatom) .EQ. (katom + latom)) .AND. (katom < iatom)) CYCLE
1216 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1217 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1218 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1219 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1220 : rc = list_kl%elements(i_list_kl)%r1
1221 : rd = list_kl%elements(i_list_kl)%r2
1222 : rcd2 = list_kl%elements(i_list_kl)%dist2
1223 :
1224 : IF (do_p_screening) THEN
1225 : pmax_atom = MAX(shm_pmax_atom(katom, iatom), &
1226 : shm_pmax_atom(latom, jatom), &
1227 : shm_pmax_atom(latom, iatom), &
1228 : shm_pmax_atom(katom, jatom))
1229 : ELSE
1230 : pmax_atom = 0.0_dp
1231 : END IF
1232 :
1233 : screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1234 : screen_coeffs_kind(jkind, ikind)%x(2)
1235 : screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1236 : screen_coeffs_kind(lkind, kkind)%x(2)
1237 :
1238 : IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1239 :
1240 : !! we want to be consistent with the KS matrix. If none of the atomic indices
1241 : !! is associated cycle
1242 : IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1243 : shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1244 : shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1245 : shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1246 :
1247 : !! calculate symmetry_factor according to degeneracy of atomic quartet
1248 : symm_fac = 0.5_dp
1249 : IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1250 : IF (katom == latom) symm_fac = symm_fac*2.0_dp
1251 : IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1252 : IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
1253 : symm_fac = 1.0_dp/symm_fac
1254 :
1255 : lc_max => basis_parameter(kkind)%lmax
1256 : lc_min => basis_parameter(kkind)%lmin
1257 : npgfc => basis_parameter(kkind)%npgf
1258 : zetc => basis_parameter(kkind)%zet
1259 : nsgfc => basis_parameter(kkind)%nsgf
1260 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1261 : nsgfl_c => basis_parameter(kkind)%nsgfl
1262 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1263 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1264 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1265 :
1266 : ld_max => basis_parameter(lkind)%lmax
1267 : ld_min => basis_parameter(lkind)%lmin
1268 : npgfd => basis_parameter(lkind)%npgf
1269 : zetd => basis_parameter(lkind)%zet
1270 : nsgfd => basis_parameter(lkind)%nsgf
1271 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1272 : nsgfl_d => basis_parameter(lkind)%nsgfl
1273 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1274 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1275 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1276 :
1277 : atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1278 : atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1279 : atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1280 : atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1281 :
1282 : IF (jatom < latom) THEN
1283 : offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1284 : ELSE
1285 : offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1286 : END IF
1287 : IF (jatom < katom) THEN
1288 : offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1289 : ELSE
1290 : offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1291 : END IF
1292 : IF (iatom < latom) THEN
1293 : offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1294 : ELSE
1295 : offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1296 : END IF
1297 : IF (iatom < katom) THEN
1298 : offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1299 : ELSE
1300 : offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1301 : END IF
1302 :
1303 : IF (do_p_screening) THEN
1304 : swap_id = 0
1305 : kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1306 : IF (ikind >= kkind) THEN
1307 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1308 : actual_x_data%map_atom_to_kind_atom(katom), &
1309 : actual_x_data%map_atom_to_kind_atom(iatom))
1310 : ELSE
1311 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1312 : actual_x_data%map_atom_to_kind_atom(iatom), &
1313 : actual_x_data%map_atom_to_kind_atom(katom))
1314 : swap_id = swap_id + 1
1315 : END IF
1316 : kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1317 : IF (jkind >= lkind) THEN
1318 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1319 : actual_x_data%map_atom_to_kind_atom(latom), &
1320 : actual_x_data%map_atom_to_kind_atom(jatom))
1321 : ELSE
1322 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1323 : actual_x_data%map_atom_to_kind_atom(jatom), &
1324 : actual_x_data%map_atom_to_kind_atom(latom))
1325 : swap_id = swap_id + 2
1326 : END IF
1327 : kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1328 : IF (ikind >= lkind) THEN
1329 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1330 : actual_x_data%map_atom_to_kind_atom(latom), &
1331 : actual_x_data%map_atom_to_kind_atom(iatom))
1332 : ELSE
1333 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1334 : actual_x_data%map_atom_to_kind_atom(iatom), &
1335 : actual_x_data%map_atom_to_kind_atom(latom))
1336 : swap_id = swap_id + 4
1337 : END IF
1338 : kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1339 : IF (jkind >= kkind) THEN
1340 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1341 : actual_x_data%map_atom_to_kind_atom(katom), &
1342 : actual_x_data%map_atom_to_kind_atom(jatom))
1343 : ELSE
1344 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1345 : actual_x_data%map_atom_to_kind_atom(jatom), &
1346 : actual_x_data%map_atom_to_kind_atom(katom))
1347 : swap_id = swap_id + 8
1348 : END IF
1349 : END IF
1350 :
1351 : !! At this stage, check for memory used in compression
1352 :
1353 : IF (my_geo_change) THEN
1354 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1355 : ! ** We know the maximum amount of integrals that we can store per MPI-process
1356 : ! ** Now we need to sum the current memory usage among all openMP threads
1357 : ! ** We can just read what is currently stored on the corresponding x_data type
1358 : ! ** If this is thread i and it tries to read the data from thread j, that is
1359 : ! ** currently writing that data, we just dont care, because the possible error
1360 : ! ** is of the order of CACHE_SIZE
1361 : mem_compression_counter = 0
1362 : DO i = 1, n_threads
1363 : !$OMP ATOMIC READ
1364 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1365 : mem_compression_counter = mem_compression_counter + &
1366 : tmp_i4*memory_parameter%cache_size
1367 : END DO
1368 : IF (mem_compression_counter > memory_parameter%max_compression_counter) THEN
1369 : buffer_overflow = .TRUE.
1370 : IF (do_dynamic_load_balancing) THEN
1371 : distribution_energy%ram_counter = counter
1372 : ELSE
1373 : memory_parameter%ram_counter = counter
1374 : END IF
1375 : ELSE
1376 : counter = counter + 1
1377 : buffer_overflow = .FALSE.
1378 : END IF
1379 : END IF
1380 : ELSE
1381 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1382 : IF (do_dynamic_load_balancing) THEN
1383 : IF (distribution_energy%ram_counter == counter) THEN
1384 : buffer_overflow = .TRUE.
1385 : ELSE
1386 : counter = counter + 1
1387 : buffer_overflow = .FALSE.
1388 : END IF
1389 :
1390 : ELSE
1391 : IF (memory_parameter%ram_counter == counter) THEN
1392 : buffer_overflow = .TRUE.
1393 : ELSE
1394 : counter = counter + 1
1395 : buffer_overflow = .FALSE.
1396 : END IF
1397 : END IF
1398 : END IF
1399 : END IF
1400 :
1401 : IF (buffer_overflow .AND. do_disk_storage) THEN
1402 : use_disk_storage = .TRUE.
1403 : buffer_overflow = .FALSE.
1404 : END IF
1405 :
1406 : IF (use_disk_storage) THEN
1407 : !$OMP ATOMIC READ
1408 : tmp_i4 = memory_parameter%actual_memory_usage_disk
1409 : mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
1410 : IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk) THEN
1411 : buffer_overflow = .TRUE.
1412 : use_disk_storage = .FALSE.
1413 : END IF
1414 : END IF
1415 :
1416 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1417 : iset = set_list_ij(i_set_list_ij)%pair(1)
1418 : jset = set_list_ij(i_set_list_ij)%pair(2)
1419 :
1420 : ncob = npgfb(jset)*ncoset(lb_max(jset))
1421 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1422 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1423 :
1424 : IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1425 :
1426 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1427 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1428 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1429 : kset = set_list_kl(i_set_list_kl)%pair(1)
1430 : lset = set_list_kl(i_set_list_kl)%pair(2)
1431 :
1432 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1433 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1434 : max_val2 = max_val1 + max_val2_set
1435 :
1436 : !! Near field screening
1437 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1438 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1439 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1440 : !! get max_vals if we screen on initial density
1441 : IF (do_p_screening) THEN
1442 : CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1443 : iset, jset, kset, lset, &
1444 : pmax_entry, swap_id)
1445 : ELSE
1446 : pmax_entry = 0.0_dp
1447 : END IF
1448 : log10_pmax = pmax_entry
1449 : max_val2 = max_val2 + log10_pmax
1450 : IF (max_val2 < log10_eps_schwarz) CYCLE
1451 : pmax_entry = EXP(log10_pmax*ln_10)
1452 :
1453 : !! store current number of integrals, update total number and number of integrals in buffer
1454 : current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1455 : IF (buffer_overflow) THEN
1456 : neris_onthefly = neris_onthefly + current_counter
1457 : END IF
1458 :
1459 : !! Get integrals from buffer and update Kohn-Sham matrix
1460 : IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change) THEN
1461 : nints = current_counter
1462 : IF (.NOT. use_disk_storage) THEN
1463 : CALL hfx_get_single_cache_element( &
1464 : estimate_to_store_int, 6, &
1465 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1466 : use_disk_storage)
1467 : ELSE
1468 : CALL hfx_get_single_cache_element( &
1469 : estimate_to_store_int, 6, &
1470 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1471 : use_disk_storage)
1472 : END IF
1473 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1474 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1475 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1476 : buffer_left = nints
1477 : buffer_start = 1
1478 : IF (.NOT. use_disk_storage) THEN
1479 : neris_incore = neris_incore + INT(nints, int_8)
1480 : ELSE
1481 : neris_disk = neris_disk + INT(nints, int_8)
1482 : END IF
1483 : DO WHILE (buffer_left > 0)
1484 : buffer_size = MIN(buffer_left, cache_size)
1485 : IF (.NOT. use_disk_storage) THEN
1486 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1487 : buffer_size, nbits, &
1488 : integral_caches(nbits), &
1489 : integral_containers(nbits), &
1490 : eps_storage, pmax_entry, &
1491 : memory_parameter%actual_memory_usage, &
1492 : use_disk_storage)
1493 : ELSE
1494 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1495 : buffer_size, nbits, &
1496 : integral_caches_disk(nbits), &
1497 : integral_containers_disk(nbits), &
1498 : eps_storage, pmax_entry, &
1499 : memory_parameter%actual_memory_usage_disk, &
1500 : use_disk_storage)
1501 : END IF
1502 : buffer_left = buffer_left - buffer_size
1503 : buffer_start = buffer_start + buffer_size
1504 : END DO
1505 : END IF
1506 : !! Calculate integrals if we run out of buffer or the geometry did change
1507 : IF (my_geo_change .OR. buffer_overflow) THEN
1508 : max_contraction_val = max_contraction(iset, iatom)* &
1509 : max_contraction(jset, jatom)* &
1510 : max_contraction(kset, katom)* &
1511 : max_contraction(lset, latom)*pmax_entry
1512 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1513 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1514 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1515 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1516 :
1517 : CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1518 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1519 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1520 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1521 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1522 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1523 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1524 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1525 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1526 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1527 : primitive_integrals, &
1528 : potential_parameter, &
1529 : actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1530 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1531 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1532 : log10_pmax, log10_eps_schwarz, &
1533 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1534 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
1535 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
1536 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
1537 : sphi_a_ext_set, &
1538 : sphi_b_ext_set, &
1539 : sphi_c_ext_set, &
1540 : sphi_d_ext_set, &
1541 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
1542 : nimages, do_periodic, p_work)
1543 :
1544 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1545 : neris_total = neris_total + nints
1546 : nprim_ints = nprim_ints + neris_tmp
1547 : ! IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1548 : ! estimate_to_store_int = EXPONENT(cartesian_estimate)
1549 : ! estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1550 : ! cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1551 : ! IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1552 : ! IF (cartesian_estimate < eps_schwarz) THEN
1553 : ! IF (.NOT. use_disk_storage) THEN
1554 : ! CALL hfx_add_single_cache_element( &
1555 : ! estimate_to_store_int, 6, &
1556 : ! maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1557 : ! use_disk_storage, max_val_memory)
1558 : ! ELSE
1559 : ! CALL hfx_add_single_cache_element( &
1560 : ! estimate_to_store_int, 6, &
1561 : ! maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1562 : ! use_disk_storage)
1563 : ! END IF
1564 : ! END IF
1565 : ! END IF
1566 : !
1567 : ! IF (cartesian_estimate < eps_schwarz) CYCLE
1568 :
1569 : !! Compress the array for storage
1570 : spherical_estimate = 0.0_dp
1571 : DO i = 1, nints
1572 : spherical_estimate = MAX(spherical_estimate, ABS(primitive_integrals(i)))
1573 : END DO
1574 :
1575 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1576 : estimate_to_store_int = EXPONENT(spherical_estimate)
1577 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1578 :
1579 : IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1580 : IF (.NOT. use_disk_storage) THEN
1581 : CALL hfx_add_single_cache_element( &
1582 : estimate_to_store_int, 6, &
1583 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1584 : use_disk_storage, max_val_memory)
1585 : ELSE
1586 : CALL hfx_add_single_cache_element( &
1587 : estimate_to_store_int, 6, &
1588 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1589 : use_disk_storage)
1590 : END IF
1591 : END IF
1592 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1593 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1594 : IF (.NOT. buffer_overflow) THEN
1595 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1596 :
1597 : ! In case of a tight eps_storage threshold the number of significant
1598 : ! bits in the integer number NINT(value*pmax_entry/eps_storage) may
1599 : ! exceed the width of the storage element. As the compression algorithm
1600 : ! is designed for IEEE 754 double precision numbers, a 64-bit signed
1601 : ! integer variable which is used to store the result of this float-to-
1602 : ! integer conversion (we have no wish to use more memory for storing
1603 : ! compressed ERIs than it is needed for uncompressed ERIs) may overflow.
1604 : ! Abort with a meaningful message when it happens.
1605 : !
1606 : ! The magic number 63 stands for the number of magnitude bits
1607 : ! (64 bits minus one sign bit).
1608 : IF (nbits > 63) THEN
1609 : WRITE (eps_schwarz_min_str, '(ES10.3E2)') &
1610 : spherical_estimate*pmax_entry/ &
1611 : (SET_EXPONENT(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
1612 :
1613 : WRITE (eps_scaling_str, '(ES10.3E2)') &
1614 : spherical_estimate*pmax_entry/(SET_EXPONENT(1.0_dp, 63)*eps_schwarz)
1615 :
1616 : CALL cp_abort(__LOCATION__, &
1617 : "Overflow during ERI's compression. Please use a larger "// &
1618 : "EPS_SCHWARZ threshold (above "//TRIM(ADJUSTL(eps_schwarz_min_str))// &
1619 : ") or increase the EPS_STORAGE_SCALING factor above "// &
1620 : TRIM(ADJUSTL(eps_scaling_str))//".")
1621 : END IF
1622 :
1623 : buffer_left = nints
1624 : buffer_start = 1
1625 : IF (.NOT. use_disk_storage) THEN
1626 : neris_incore = neris_incore + INT(nints, int_8)
1627 : ! neris_incore = neris_incore+nints
1628 : ELSE
1629 : neris_disk = neris_disk + INT(nints, int_8)
1630 : ! neris_disk = neris_disk+nints
1631 : END IF
1632 : DO WHILE (buffer_left > 0)
1633 : buffer_size = MIN(buffer_left, CACHE_SIZE)
1634 : IF (.NOT. use_disk_storage) THEN
1635 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1636 : buffer_size, nbits, &
1637 : integral_caches(nbits), &
1638 : integral_containers(nbits), &
1639 : eps_storage, pmax_entry, &
1640 : memory_parameter%actual_memory_usage, &
1641 : use_disk_storage)
1642 : ELSE
1643 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1644 : buffer_size, nbits, &
1645 : integral_caches_disk(nbits), &
1646 : integral_containers_disk(nbits), &
1647 : eps_storage, pmax_entry, &
1648 : memory_parameter%actual_memory_usage_disk, &
1649 : use_disk_storage)
1650 : END IF
1651 : buffer_left = buffer_left - buffer_size
1652 : buffer_start = buffer_start + buffer_size
1653 : END DO
1654 : ELSE
1655 : !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1656 : DO i = 1, nints
1657 : primitive_integrals(i) = primitive_integrals(i)*pmax_entry
1658 : IF (ABS(primitive_integrals(i)) > eps_storage) THEN
1659 : primitive_integrals(i) = ANINT(primitive_integrals(i)/eps_storage, dp)*eps_storage/pmax_entry
1660 : ELSE
1661 : primitive_integrals(i) = 0.0_dp
1662 : END IF
1663 : END DO
1664 : END IF
1665 : END IF
1666 : !!! DEBUG, print out primitive integrals and indices. Only works serial no OMP !!!
1667 : IF (.FALSE.) THEN
1668 : CALL print_integrals( &
1669 : iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
1670 : iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
1671 : END IF
1672 : IF (.NOT. is_anti_symmetric) THEN
1673 : !! Update Kohn-Sham matrix
1674 : CALL update_fock_matrix( &
1675 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1676 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1677 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1678 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1679 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1680 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1681 : IF (.NOT. treat_lsd_in_core) THEN
1682 : IF (nspins == 2) THEN
1683 : CALL update_fock_matrix( &
1684 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1685 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1686 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1687 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1688 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1689 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1690 : END IF
1691 : END IF
1692 : ELSE
1693 : !! Update Kohn-Sham matrix
1694 : CALL update_fock_matrix_as( &
1695 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1696 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1697 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1698 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1699 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1700 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1701 : IF (.NOT. treat_lsd_in_core) THEN
1702 : IF (nspins == 2) THEN
1703 : CALL update_fock_matrix_as( &
1704 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1705 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1706 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1707 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1708 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1709 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1710 : END IF
1711 : END IF
1712 : END IF
1713 : END DO ! i_set_list_kl
1714 : END DO ! i_set_list_ij
1715 : IF (do_disk_storage) THEN
1716 : buffer_overflow = .TRUE.
1717 : END IF
1718 : END DO !i_list_ij
1719 : END DO !i_list_kl
1720 : END DO atomic_blocks
1721 : bintime_stop = m_walltime()
1722 : IF (my_geo_change) THEN
1723 : distribution_energy%time_first_scf = bintime_stop - bintime_start
1724 : ELSE
1725 : distribution_energy%time_other_scf = &
1726 : distribution_energy%time_other_scf + bintime_stop - bintime_start
1727 : END IF
1728 : !$OMP MASTER
1729 : CALL timestop(handle_bin)
1730 : !$OMP END MASTER
1731 : END DO !bin
1732 :
1733 : !$OMP MASTER
1734 : logger => cp_get_default_logger()
1735 : do_print_load_balance_info = .FALSE.
1736 : do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1737 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1738 : !$OMP END MASTER
1739 : !$OMP BARRIER
1740 : IF (do_print_load_balance_info) THEN
1741 : iw = -1
1742 : !$OMP MASTER
1743 : iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1744 : extension=".scfLog")
1745 : !$OMP END MASTER
1746 :
1747 : CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1748 : hfx_do_eval_energy)
1749 :
1750 : !$OMP MASTER
1751 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1752 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1753 : !$OMP END MASTER
1754 : END IF
1755 :
1756 : !$OMP BARRIER
1757 : !$OMP MASTER
1758 : CALL m_memory(memsize_after)
1759 : !$OMP END MASTER
1760 : !$OMP BARRIER
1761 :
1762 : DEALLOCATE (primitive_integrals)
1763 : !$OMP BARRIER
1764 : !! Get some number about ERIS
1765 : !$OMP ATOMIC
1766 : shm_neris_total = shm_neris_total + neris_total
1767 : !$OMP ATOMIC
1768 : shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1769 : !$OMP ATOMIC
1770 : shm_nprim_ints = shm_nprim_ints + nprim_ints
1771 :
1772 : storage_counter_integrals = memory_parameter%actual_memory_usage* &
1773 : memory_parameter%cache_size
1774 : stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
1775 : memory_parameter%cache_size
1776 : stor_count_max_val = max_val_memory*memory_parameter%cache_size
1777 : !$OMP ATOMIC
1778 : shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1779 : !$OMP ATOMIC
1780 : shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
1781 : !$OMP ATOMIC
1782 : shm_neris_incore = shm_neris_incore + neris_incore
1783 : !$OMP ATOMIC
1784 : shm_neris_disk = shm_neris_disk + neris_disk
1785 : !$OMP ATOMIC
1786 : shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1787 : !$OMP BARRIER
1788 :
1789 : ! ** Calculate how much memory has already been used (might be needed for in-core forces
1790 : !$OMP MASTER
1791 : shm_mem_compression_counter = 0
1792 : DO i = 1, n_threads
1793 : !$OMP ATOMIC READ
1794 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1795 : shm_mem_compression_counter = shm_mem_compression_counter + &
1796 : tmp_i4*memory_parameter%cache_size
1797 : END DO
1798 : !$OMP END MASTER
1799 : !$OMP BARRIER
1800 : actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
1801 :
1802 : !$OMP MASTER
1803 : !! Calculate the exchange energies from the Kohn-Sham matrix. Before we can go on, we have to symmetrize.
1804 : ene_x_aa = 0.0_dp
1805 : ene_x_bb = 0.0_dp
1806 :
1807 : mb_size_p = shm_block_offset(ncpu + 1)/1024/128
1808 : mb_size_f = shm_block_offset(ncpu + 1)/1024/128
1809 : IF (.NOT. treat_lsd_in_core) THEN
1810 : IF (nspins == 2) THEN
1811 : mb_size_f = mb_size_f*2
1812 : mb_size_p = mb_size_p*2
1813 : END IF
1814 : END IF
1815 : !! size of primitive_integrals(not shared)
1816 : mb_size_buffers = INT(nsgf_max, int_8)**4*n_threads
1817 : !! fock density buffers (not shared)
1818 : mb_size_buffers = mb_size_buffers + INT(nsgf_max, int_8)**2*n_threads
1819 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
1820 : !! size of screening functions (shared)
1821 : mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
1822 : + max_set**2*nkind**2 &
1823 : + nkind**2 &
1824 : + max_pgf**2*max_set**2*nkind**2
1825 : !! is_assoc (shared)
1826 : mb_size_buffers = mb_size_buffers + natom**2
1827 : ! ** pmax_atom (shared)
1828 : IF (do_p_screening) THEN
1829 : mb_size_buffers = mb_size_buffers + natom**2
1830 : END IF
1831 : IF (screening_parameter%do_p_screening_forces) THEN
1832 : IF (memory_parameter%treat_forces_in_core) THEN
1833 : mb_size_buffers = mb_size_buffers + natom**2
1834 : END IF
1835 : END IF
1836 : ! ** Initial P only MAX(alpha,beta) (shared)
1837 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
1838 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1839 : END IF
1840 : ! ** In core forces require their own initial P
1841 : IF (screening_parameter%do_p_screening_forces) THEN
1842 : IF (memory_parameter%treat_forces_in_core) THEN
1843 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1844 : END IF
1845 : END IF
1846 :
1847 : !! mb
1848 : mb_size_buffers = mb_size_buffers/1024/128
1849 :
1850 : afac = 1.0_dp
1851 : IF (is_anti_symmetric) afac = -1.0_dp
1852 : CALL timestop(handle_main)
1853 : ene_x_aa_diag = 0.0_dp
1854 : ene_x_bb_diag = 0.0_dp
1855 : DO iatom = 1, natom
1856 : ikind = kind_of(iatom)
1857 : nseta = basis_parameter(ikind)%nset
1858 : nsgfa => basis_parameter(ikind)%nsgf
1859 : jatom = iatom
1860 : jkind = kind_of(jatom)
1861 : nsetb = basis_parameter(jkind)%nset
1862 : nsgfb => basis_parameter(jkind)%nsgf
1863 : act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
1864 : DO img = 1, nkimages
1865 : DO iset = 1, nseta
1866 : DO jset = 1, nsetb
1867 : act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
1868 : i = act_set_offset + act_atomic_block_offset - 1
1869 : DO ma = 1, nsgfa(iset)
1870 : j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
1871 : DO mb = 1, nsgfb(jset)
1872 : IF (i > j) THEN
1873 : full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
1874 : full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
1875 : IF (.NOT. treat_lsd_in_core .AND. nspins == 2) THEN
1876 : full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
1877 : full_ks_beta(j, img) = full_ks_beta(i, img)*afac
1878 : END IF
1879 : END IF
1880 : ! ** For adiabatically rescaled functionals we need the energy coming from the diagonal elements
1881 : IF (i == j) THEN
1882 : ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
1883 : IF (.NOT. treat_lsd_in_core .AND. nspins == 2) THEN
1884 : ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
1885 : END IF
1886 : END IF
1887 : i = i + 1
1888 : j = j + nsgfa(iset)
1889 : END DO
1890 : END DO
1891 : END DO
1892 : END DO
1893 : END DO
1894 : END DO
1895 :
1896 : CALL para_env%sync()
1897 : afac = 1.0_dp
1898 : IF (is_anti_symmetric) afac = 0._dp
1899 : IF (distribute_fock_matrix) THEN
1900 : !! Distribute the current KS-matrix to all the processes
1901 : CALL timeset(routineN//"_dist_KS", handle_dist_ks)
1902 : DO img = 1, nkimages
1903 : CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
1904 : shm_block_offset, kind_of, basis_parameter, &
1905 : off_diag_fac=0.5_dp, diag_fac=afac)
1906 : END DO
1907 :
1908 : NULLIFY (full_ks_alpha)
1909 : DEALLOCATE (shm_master_x_data%full_ks_alpha)
1910 : IF (.NOT. treat_lsd_in_core) THEN
1911 : IF (nspins == 2) THEN
1912 : DO img = 1, nkimages
1913 : CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
1914 : shm_block_offset, kind_of, basis_parameter, &
1915 : off_diag_fac=0.5_dp, diag_fac=afac)
1916 : END DO
1917 : NULLIFY (full_ks_beta)
1918 : DEALLOCATE (shm_master_x_data%full_ks_beta)
1919 : END IF
1920 : END IF
1921 : CALL timestop(handle_dist_ks)
1922 : END IF
1923 :
1924 : IF (distribute_fock_matrix) THEN
1925 : !! ** Calculate the exchange energy
1926 : ene_x_aa = 0.0_dp
1927 : DO img = 1, nkimages
1928 : CALL dbcsr_dot(ks_matrix(ispin, img)%matrix, rho_ao(ispin, img)%matrix, &
1929 : etmp)
1930 : ene_x_aa = ene_x_aa + etmp
1931 : END DO
1932 : !for ADMMS, we need the exchange matrix k(d) for both spins
1933 : IF (dft_control%do_admm) THEN
1934 : CPASSERT(nkimages == 1)
1935 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
1936 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1937 : END IF
1938 :
1939 : ene_x_bb = 0.0_dp
1940 : IF (nspins == 2 .AND. .NOT. treat_lsd_in_core) THEN
1941 : DO img = 1, nkimages
1942 : CALL dbcsr_dot(ks_matrix(2, img)%matrix, rho_ao(2, img)%matrix, &
1943 : etmp)
1944 : ene_x_bb = ene_x_bb + etmp
1945 : END DO
1946 : !for ADMMS, we need the exchange matrix k(d) for both spins
1947 : IF (dft_control%do_admm) THEN
1948 : CPASSERT(nkimages == 1)
1949 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
1950 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1951 : END IF
1952 : END IF
1953 :
1954 : !! Update energy type
1955 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1956 : ELSE
1957 : ! ** It is easier to correct the following expression by the diagonal energy contribution,
1958 : ! ** than explicitly going throuhg the diagonal elements
1959 : DO img = 1, nkimages
1960 : DO pa = 1, SIZE(full_ks_alpha, 1)
1961 : ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
1962 : END DO
1963 : END DO
1964 : ! ** Now correct
1965 : ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
1966 : IF (nspins == 2) THEN
1967 : DO img = 1, nkimages
1968 : DO pa = 1, SIZE(full_ks_beta, 1)
1969 : ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
1970 : END DO
1971 : END DO
1972 : ! ** Now correct
1973 : ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
1974 : END IF
1975 : CALL para_env%sum(ene_x_aa)
1976 : IF (nspins == 2) CALL para_env%sum(ene_x_bb)
1977 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1978 : END IF
1979 :
1980 : !! Print some memeory information if this is the first step
1981 : IF (my_geo_change) THEN
1982 : tmp_i8(1:8) = (/shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
1983 : shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val/)
1984 : CALL para_env%sum(tmp_i8)
1985 : shm_storage_counter_integrals = tmp_i8(1)
1986 : shm_neris_onthefly = tmp_i8(2)
1987 : shm_neris_incore = tmp_i8(3)
1988 : shm_neris_disk = tmp_i8(4)
1989 : shm_neris_total = tmp_i8(5)
1990 : shm_stor_count_int_disk = tmp_i8(6)
1991 : shm_nprim_ints = tmp_i8(7)
1992 : shm_stor_count_max_val = tmp_i8(8)
1993 : CALL para_env%max(memsize_after)
1994 : mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1995 : compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
1996 : mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
1997 : compression_factor_disk = REAL(shm_neris_disk, dp)/REAL(shm_stor_count_int_disk, dp)
1998 : mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1999 :
2000 : IF (shm_neris_incore == 0) THEN
2001 : mem_eris = 0
2002 : compression_factor = 0.0_dp
2003 : END IF
2004 : IF (shm_neris_disk == 0) THEN
2005 : mem_eris_disk = 0
2006 : compression_factor_disk = 0.0_dp
2007 : END IF
2008 :
2009 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
2010 : extension=".scfLog")
2011 : IF (iw > 0) THEN
2012 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2013 : "HFX_MEM_INFO| Number of cart. primitive ERI's calculated: ", shm_nprim_ints
2014 :
2015 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2016 : "HFX_MEM_INFO| Number of sph. ERI's calculated: ", shm_neris_total
2017 :
2018 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2019 : "HFX_MEM_INFO| Number of sph. ERI's stored in-core: ", shm_neris_incore
2020 :
2021 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2022 : "HFX_MEM_INFO| Number of sph. ERI's stored on disk: ", shm_neris_disk
2023 :
2024 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2025 : "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
2026 :
2027 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2028 : "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]: ", mem_eris
2029 :
2030 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2031 : "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
2032 :
2033 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2034 : "HFX_MEM_INFO| Total compression factor ERI's RAM: ", compression_factor
2035 :
2036 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2037 : "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]: ", mem_eris_disk
2038 :
2039 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2040 : "HFX_MEM_INFO| Total compression factor ERI's disk: ", compression_factor_disk
2041 :
2042 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2043 : "HFX_MEM_INFO| Size of density/Fock matrix [MiB]: ", 2_int_8*mb_size_p
2044 :
2045 : IF (do_periodic) THEN
2046 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2047 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2048 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2049 : "HFX_MEM_INFO| Number of periodic image cells considered: ", SIZE(shm_master_x_data%neighbor_cells)
2050 : ELSE
2051 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2052 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2053 : END IF
2054 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21),/)") &
2055 : "HFX_MEM_INFO| Est. max. program size after HFX [MiB]:", memsize_after/(1024*1024)
2056 : CALL m_flush(iw)
2057 : END IF
2058 :
2059 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
2060 : "HF_INFO")
2061 : END IF
2062 : !$OMP END MASTER
2063 :
2064 : !! flush caches if the geometry changed
2065 : IF (do_dynamic_load_balancing) THEN
2066 : my_bin_size = SIZE(actual_x_data%distribution_energy)
2067 : ELSE
2068 : my_bin_size = 1
2069 : END IF
2070 :
2071 : IF (my_geo_change) THEN
2072 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2073 : DO bin = 1, my_bin_size
2074 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2075 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2076 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2077 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2078 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
2079 : .FALSE.)
2080 : DO i = 1, 64
2081 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
2082 : memory_parameter%actual_memory_usage, .FALSE.)
2083 : END DO
2084 : END DO
2085 : END IF
2086 : END IF
2087 : !! reset all caches except we calculate all on the fly
2088 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2089 : DO bin = 1, my_bin_size
2090 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2091 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2092 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2093 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2094 :
2095 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
2096 : DO i = 1, 64
2097 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
2098 : memory_parameter%actual_memory_usage, &
2099 : .FALSE.)
2100 : END DO
2101 : END DO
2102 : END IF
2103 :
2104 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
2105 : !$OMP CRITICAL(hfxenergy_out_critical)
2106 : IF (do_disk_storage) THEN
2107 : !! flush caches if the geometry changed
2108 : IF (my_geo_change) THEN
2109 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
2110 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2111 : DO i = 1, 64
2112 : CALL hfx_flush_last_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
2113 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2114 : END DO
2115 : END IF
2116 : !! reset all caches except we calculate all on the fly
2117 : CALL hfx_reset_cache_and_container(maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
2118 : do_disk_storage)
2119 : DO i = 1, 64
2120 : CALL hfx_reset_cache_and_container(integral_caches_disk(i), integral_containers_disk(i), &
2121 : memory_parameter%actual_memory_usage_disk, do_disk_storage)
2122 : END DO
2123 : END IF
2124 : !$OMP END CRITICAL(hfxenergy_out_critical)
2125 : !$OMP BARRIER
2126 : !! Clean up
2127 : DEALLOCATE (last_sgf_global)
2128 : !$OMP MASTER
2129 : DEALLOCATE (full_density_alpha)
2130 : IF (.NOT. treat_lsd_in_core) THEN
2131 : IF (nspins == 2) THEN
2132 : DEALLOCATE (full_density_beta)
2133 : END IF
2134 : END IF
2135 : IF (do_dynamic_load_balancing) THEN
2136 : DEALLOCATE (shm_master_x_data%task_list)
2137 : END IF
2138 : !$OMP END MASTER
2139 : DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
2140 : DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
2141 : DEALLOCATE (set_list_ij, set_list_kl)
2142 :
2143 : DO i = 1, max_pgf**2
2144 : DEALLOCATE (pgf_list_ij(i)%image_list)
2145 : DEALLOCATE (pgf_list_kl(i)%image_list)
2146 : END DO
2147 :
2148 : DEALLOCATE (pgf_list_ij)
2149 : DEALLOCATE (pgf_list_kl)
2150 : DEALLOCATE (pgf_product_list)
2151 :
2152 : DEALLOCATE (max_contraction, kind_of)
2153 :
2154 : DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
2155 :
2156 : DEALLOCATE (nimages)
2157 :
2158 : !$OMP BARRIER
2159 : !$OMP END PARALLEL
2160 :
2161 34235 : CALL timestop(handle)
2162 71003390 : END SUBROUTINE integrate_four_center
2163 :
2164 : ! **************************************************************************************************
2165 : !> \brief calculates two-electron integrals of a quartet/shell using the library
2166 : !> lib_int in the periodic case
2167 : !> \param lib ...
2168 : !> \param ra ...
2169 : !> \param rb ...
2170 : !> \param rc ...
2171 : !> \param rd ...
2172 : !> \param npgfa ...
2173 : !> \param npgfb ...
2174 : !> \param npgfc ...
2175 : !> \param npgfd ...
2176 : !> \param la_min ...
2177 : !> \param la_max ...
2178 : !> \param lb_min ...
2179 : !> \param lb_max ...
2180 : !> \param lc_min ...
2181 : !> \param lc_max ...
2182 : !> \param ld_min ...
2183 : !> \param ld_max ...
2184 : !> \param nsgfa ...
2185 : !> \param nsgfb ...
2186 : !> \param nsgfc ...
2187 : !> \param nsgfd ...
2188 : !> \param sphi_a_u1 ...
2189 : !> \param sphi_a_u2 ...
2190 : !> \param sphi_a_u3 ...
2191 : !> \param sphi_b_u1 ...
2192 : !> \param sphi_b_u2 ...
2193 : !> \param sphi_b_u3 ...
2194 : !> \param sphi_c_u1 ...
2195 : !> \param sphi_c_u2 ...
2196 : !> \param sphi_c_u3 ...
2197 : !> \param sphi_d_u1 ...
2198 : !> \param sphi_d_u2 ...
2199 : !> \param sphi_d_u3 ...
2200 : !> \param zeta ...
2201 : !> \param zetb ...
2202 : !> \param zetc ...
2203 : !> \param zetd ...
2204 : !> \param primitive_integrals array of primitive_integrals
2205 : !> \param potential_parameter contains info for libint
2206 : !> \param neighbor_cells Periodic images
2207 : !> \param screen1 set based coefficients for near field screening
2208 : !> \param screen2 set based coefficients for near field screening
2209 : !> \param eps_schwarz threshold
2210 : !> \param max_contraction_val maximum multiplication factor for cart -> sph
2211 : !> \param cart_estimate maximum calculated integral value
2212 : !> \param cell cell
2213 : !> \param neris_tmp counter for calculated cart integrals
2214 : !> \param log10_pmax logarithm of initial p matrix max element
2215 : !> \param log10_eps_schwarz log of threshold
2216 : !> \param R1_pgf coefficients for radii of product distribution function
2217 : !> \param R2_pgf coefficients for radii of product distribution function
2218 : !> \param pgf1 schwarz coefficients pgf basid
2219 : !> \param pgf2 schwarz coefficients pgf basid
2220 : !> \param pgf_list_ij ...
2221 : !> \param pgf_list_kl ...
2222 : !> \param pgf_product_list ...
2223 : !> \param nsgfl_a ...
2224 : !> \param nsgfl_b ...
2225 : !> \param nsgfl_c ...
2226 : !> \param nsgfl_d ...
2227 : !> \param sphi_a_ext ...
2228 : !> \param sphi_b_ext ...
2229 : !> \param sphi_c_ext ...
2230 : !> \param sphi_d_ext ...
2231 : !> \param ee_work ...
2232 : !> \param ee_work2 ...
2233 : !> \param ee_buffer1 ...
2234 : !> \param ee_buffer2 ...
2235 : !> \param ee_primitives_tmp ...
2236 : !> \param nimages ...
2237 : !> \param do_periodic ...
2238 : !> \param p_work ...
2239 : !> \par History
2240 : !> 11.2006 created [Manuel Guidon]
2241 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
2242 : !> \author Manuel Guidon
2243 : ! **************************************************************************************************
2244 7016573 : SUBROUTINE coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
2245 : la_min, la_max, lb_min, lb_max, &
2246 : lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
2247 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2248 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
2249 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
2250 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
2251 7016573 : zeta, zetb, zetc, zetd, &
2252 7016573 : primitive_integrals, &
2253 : potential_parameter, neighbor_cells, &
2254 : screen1, screen2, eps_schwarz, max_contraction_val, &
2255 : cart_estimate, cell, neris_tmp, log10_pmax, &
2256 : log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
2257 : pgf_list_ij, pgf_list_kl, &
2258 : pgf_product_list, &
2259 7016573 : nsgfl_a, nsgfl_b, nsgfl_c, &
2260 7016573 : nsgfl_d, &
2261 7016573 : sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
2262 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2263 : nimages, do_periodic, p_work)
2264 :
2265 : TYPE(cp_libint_t) :: lib
2266 : REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
2267 : INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
2268 : lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2269 : sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
2270 : sphi_d_u3
2271 : REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
2272 : REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
2273 : REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
2274 : REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
2275 : REAL(dp), DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd) :: primitive_integrals
2276 : TYPE(hfx_potential_type) :: potential_parameter
2277 : TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
2278 : REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
2279 : max_contraction_val
2280 : REAL(dp) :: cart_estimate
2281 : TYPE(cell_type), POINTER :: cell
2282 : INTEGER(int_8) :: neris_tmp
2283 : REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
2284 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
2285 : POINTER :: R1_pgf, R2_pgf, pgf1, pgf2
2286 : TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
2287 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
2288 : DIMENSION(:), INTENT(INOUT) :: pgf_product_list
2289 : INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
2290 : REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
2291 : sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
2292 : sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
2293 : REAL(dp), DIMENSION(*) :: ee_work, ee_work2, ee_buffer1, &
2294 : ee_buffer2, ee_primitives_tmp
2295 : INTEGER, DIMENSION(*) :: nimages
2296 : LOGICAL, INTENT(IN) :: do_periodic
2297 : REAL(dp), DIMENSION(:), POINTER :: p_work
2298 :
2299 : INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
2300 : ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
2301 : nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
2302 : REAL(dp) :: EtaInv, tmp_max, ZetaInv
2303 :
2304 : CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
2305 : pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
2306 : nelements_ij, &
2307 7016573 : neighbor_cells, nimages, do_periodic)
2308 : CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
2309 : pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
2310 : nelements_kl, &
2311 7016573 : neighbor_cells, nimages, do_periodic)
2312 :
2313 7016573 : cart_estimate = 0.0_dp
2314 7016573 : neris_tmp = 0
2315 479755951 : primitive_integrals = 0.0_dp
2316 7016573 : max_l = la_max + lb_max + lc_max + ld_max
2317 23683142 : DO list_ij = 1, nelements_ij
2318 16666569 : ZetaInv = pgf_list_ij(list_ij)%ZetaInv
2319 16666569 : ipgf = pgf_list_ij(list_ij)%ipgf
2320 16666569 : jpgf = pgf_list_ij(list_ij)%jpgf
2321 :
2322 85286365 : DO list_kl = 1, nelements_kl
2323 61603223 : EtaInv = pgf_list_kl(list_kl)%ZetaInv
2324 61603223 : kpgf = pgf_list_kl(list_kl)%ipgf
2325 61603223 : lpgf = pgf_list_kl(list_kl)%jpgf
2326 :
2327 : CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
2328 : nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
2329 61603223 : potential_parameter, max_l, do_periodic)
2330 :
2331 61603223 : s_offset_a = 0
2332 151446538 : DO la = la_min, la_max
2333 73176746 : s_offset_b = 0
2334 73176746 : ncoa = nco(la)
2335 73176746 : nsgfla = nsgfl_a(la)
2336 73176746 : nsoa = nso(la)
2337 :
2338 156574258 : DO lb = lb_min, lb_max
2339 83397512 : s_offset_c = 0
2340 83397512 : ncob = nco(lb)
2341 83397512 : nsgflb = nsgfl_b(lb)
2342 83397512 : nsob = nso(lb)
2343 :
2344 196341204 : DO lc = lc_min, lc_max
2345 112943692 : s_offset_d = 0
2346 112943692 : ncoc = nco(lc)
2347 112943692 : nsgflc = nsgfl_c(lc)
2348 112943692 : nsoc = nso(lc)
2349 :
2350 264419122 : DO ld = ld_min, ld_max
2351 151475430 : ncod = nco(ld)
2352 151475430 : nsgfld = nsgfl_d(ld)
2353 151475430 : nsod = nso(ld)
2354 :
2355 151475430 : tmp_max = 0.0_dp
2356 : CALL evaluate_eri(lib, nproducts, pgf_product_list, &
2357 : la, lb, lc, ld, &
2358 : ncoa, ncob, ncoc, ncod, &
2359 : nsgfa, nsgfb, nsgfc, nsgfd, &
2360 : primitive_integrals, &
2361 : max_contraction_val, tmp_max, eps_schwarz, &
2362 : neris_tmp, ZetaInv, EtaInv, &
2363 : s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
2364 : nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
2365 : sphi_a_ext(1, la + 1, ipgf), &
2366 : sphi_b_ext(1, lb + 1, jpgf), &
2367 : sphi_c_ext(1, lc + 1, kpgf), &
2368 : sphi_d_ext(1, ld + 1, lpgf), &
2369 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2370 151475430 : p_work)
2371 151475430 : cart_estimate = MAX(tmp_max, cart_estimate)
2372 264419122 : s_offset_d = s_offset_d + nsod*nsgfld
2373 : END DO !ld
2374 196341204 : s_offset_c = s_offset_c + nsoc*nsgflc
2375 : END DO !lc
2376 156574258 : s_offset_b = s_offset_b + nsob*nsgflb
2377 : END DO !lb
2378 134779969 : s_offset_a = s_offset_a + nsoa*nsgfla
2379 : END DO !la
2380 : END DO
2381 : END DO
2382 :
2383 7016573 : END SUBROUTINE coulomb4
2384 :
2385 : ! **************************************************************************************************
2386 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
2387 : !> a symmetric upper triangle NxN matrix
2388 : !> The compiler should inline this function, therefore it appears in
2389 : !> several modules
2390 : !> \param i 2d index
2391 : !> \param j 2d index
2392 : !> \param N matrix size
2393 : !> \return ...
2394 : !> \par History
2395 : !> 03.2009 created [Manuel Guidon]
2396 : !> \author Manuel Guidon
2397 : ! **************************************************************************************************
2398 28264 : PURE FUNCTION get_1D_idx(i, j, N)
2399 : INTEGER, INTENT(IN) :: i, j
2400 : INTEGER(int_8), INTENT(IN) :: N
2401 : INTEGER(int_8) :: get_1D_idx
2402 :
2403 : INTEGER(int_8) :: min_ij
2404 :
2405 28264 : min_ij = MIN(i, j)
2406 28264 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2407 :
2408 28264 : END FUNCTION get_1D_idx
2409 :
2410 : ! **************************************************************************************************
2411 : !> \brief This routine prefetches density/fock matrix elements and stores them
2412 : !> in cache friendly arrays. These buffers are then used to update the
2413 : !> fock matrix
2414 : !> \param ma_max Size of matrix blocks
2415 : !> \param mb_max Size of matrix blocks
2416 : !> \param mc_max Size of matrix blocks
2417 : !> \param md_max Size of matrix blocks
2418 : !> \param fac multiplication factor (spin)
2419 : !> \param symm_fac multiplication factor (symmetry)
2420 : !> \param density upper triangular density matrix
2421 : !> \param ks upper triangular fock matrix
2422 : !> \param prim primitive integrals
2423 : !> \param pbd buffer that will contain P(b,d)
2424 : !> \param pbc buffer that will contain P(b,c)
2425 : !> \param pad buffer that will contain P(a,d)
2426 : !> \param pac buffer that will contain P(a,c)
2427 : !> \param kbd buffer for KS(b,d)
2428 : !> \param kbc buffer for KS(b,c)
2429 : !> \param kad buffer for KS(a,d)
2430 : !> \param kac buffer for KS(a,c)
2431 : !> \param iatom ...
2432 : !> \param jatom ...
2433 : !> \param katom ...
2434 : !> \param latom ...
2435 : !> \param iset ...
2436 : !> \param jset ...
2437 : !> \param kset ...
2438 : !> \param lset ...
2439 : !> \param offset_bd_set ...
2440 : !> \param offset_bc_set ...
2441 : !> \param offset_ad_set ...
2442 : !> \param offset_ac_set ...
2443 : !> \param atomic_offset_bd ...
2444 : !> \param atomic_offset_bc ...
2445 : !> \param atomic_offset_ad ...
2446 : !> \param atomic_offset_ac ...
2447 : !> \par History
2448 : !> 03.2009 created [Manuel Guidon]
2449 : !> \author Manuel Guidon
2450 : ! **************************************************************************************************
2451 :
2452 66580461 : SUBROUTINE update_fock_matrix(ma_max, mb_max, mc_max, md_max, &
2453 66580461 : fac, symm_fac, density, ks, prim, &
2454 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2455 : iatom, jatom, katom, latom, &
2456 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2457 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2458 : atomic_offset_ac)
2459 :
2460 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2461 : REAL(dp), INTENT(IN) :: fac, symm_fac
2462 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2463 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2464 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2465 : INTENT(IN) :: prim
2466 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2467 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2468 : kset, lset
2469 : INTEGER, DIMENSION(:, :), POINTER, INTENT(IN) :: offset_bd_set, offset_bc_set, &
2470 : offset_ad_set, offset_ac_set
2471 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2472 : atomic_offset_ad, atomic_offset_ac
2473 :
2474 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2475 : offset_ad, offset_bc, offset_bd
2476 : REAL(dp) :: ki
2477 :
2478 66580461 : IF (jatom >= latom) THEN
2479 65812845 : i = 1
2480 65812845 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2481 65812845 : j = offset_bd
2482 196772710 : DO md = 1, md_max
2483 436191493 : DO mb = 1, mb_max
2484 239418783 : pbd(i) = density(j)
2485 239418783 : i = i + 1
2486 370378648 : j = j + 1
2487 : END DO
2488 : END DO
2489 : ELSE
2490 767616 : i = 1
2491 767616 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2492 2235460 : DO md = 1, md_max
2493 1467844 : j = offset_bd + md - 1
2494 5429209 : DO mb = 1, mb_max
2495 3193749 : pbd(i) = density(j)
2496 3193749 : i = i + 1
2497 4661593 : j = j + md_max
2498 : END DO
2499 : END DO
2500 : END IF
2501 66580461 : IF (jatom >= katom) THEN
2502 66580461 : i = 1
2503 66580461 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2504 66580461 : j = offset_bc
2505 220267495 : DO mc = 1, mc_max
2506 492875354 : DO mb = 1, mb_max
2507 272607859 : pbc(i) = density(j)
2508 272607859 : i = i + 1
2509 426294893 : j = j + 1
2510 : END DO
2511 : END DO
2512 : ELSE
2513 0 : i = 1
2514 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2515 0 : DO mc = 1, mc_max
2516 0 : j = offset_bc + mc - 1
2517 0 : DO mb = 1, mb_max
2518 0 : pbc(i) = density(j)
2519 0 : i = i + 1
2520 0 : j = j + mc_max
2521 : END DO
2522 : END DO
2523 : END IF
2524 66580461 : IF (iatom >= latom) THEN
2525 48619876 : i = 1
2526 48619876 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2527 48619876 : j = offset_ad
2528 152379450 : DO md = 1, md_max
2529 375294635 : DO ma = 1, ma_max
2530 222915185 : pad(i) = density(j)
2531 222915185 : i = i + 1
2532 326674759 : j = j + 1
2533 : END DO
2534 : END DO
2535 : ELSE
2536 17960585 : i = 1
2537 17960585 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2538 46628720 : DO md = 1, md_max
2539 28668135 : j = offset_ad + md - 1
2540 115178882 : DO ma = 1, ma_max
2541 68550162 : pad(i) = density(j)
2542 68550162 : i = i + 1
2543 97218297 : j = j + md_max
2544 : END DO
2545 : END DO
2546 : END IF
2547 66580461 : IF (iatom >= katom) THEN
2548 63747509 : i = 1
2549 63747509 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2550 63747509 : j = offset_ac
2551 212470156 : DO mc = 1, mc_max
2552 536259188 : DO ma = 1, ma_max
2553 323789032 : pac(i) = density(j)
2554 323789032 : i = i + 1
2555 472511679 : j = j + 1
2556 : END DO
2557 : END DO
2558 : ELSE
2559 2832952 : i = 1
2560 2832952 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2561 7797339 : DO mc = 1, mc_max
2562 4964387 : j = offset_ac + mc - 1
2563 21020183 : DO ma = 1, ma_max
2564 13222844 : pac(i) = density(j)
2565 13222844 : i = i + 1
2566 18187231 : j = j + mc_max
2567 : END DO
2568 : END DO
2569 : END IF
2570 :
2571 66580461 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2572 66580461 : IF (jatom >= latom) THEN
2573 : i = 1
2574 : j = offset_bd
2575 196772710 : DO md = 1, md_max
2576 436191493 : DO mb = 1, mb_max
2577 239418783 : ki = kbd(i)
2578 239418783 : !$OMP ATOMIC
2579 : ks(j) = ks(j) + ki
2580 239418783 : i = i + 1
2581 370378648 : j = j + 1
2582 : END DO
2583 : END DO
2584 : ELSE
2585 : i = 1
2586 2235460 : DO md = 1, md_max
2587 1467844 : j = offset_bd + md - 1
2588 5429209 : DO mb = 1, mb_max
2589 3193749 : ki = kbd(i)
2590 3193749 : !$OMP ATOMIC
2591 : ks(j) = ks(j) + ki
2592 3193749 : i = i + 1
2593 4661593 : j = j + md_max
2594 : END DO
2595 : END DO
2596 : END IF
2597 66580461 : IF (jatom >= katom) THEN
2598 : i = 1
2599 : j = offset_bc
2600 220267495 : DO mc = 1, mc_max
2601 492875354 : DO mb = 1, mb_max
2602 272607859 : ki = kbc(i)
2603 272607859 : !$OMP ATOMIC
2604 : ks(j) = ks(j) + ki
2605 272607859 : i = i + 1
2606 426294893 : j = j + 1
2607 : END DO
2608 : END DO
2609 : ELSE
2610 : i = 1
2611 0 : DO mc = 1, mc_max
2612 0 : j = offset_bc + mc - 1
2613 0 : DO mb = 1, mb_max
2614 0 : ki = kbc(i)
2615 0 : !$OMP ATOMIC
2616 : ks(j) = ks(j) + ki
2617 0 : i = i + 1
2618 0 : j = j + mc_max
2619 : END DO
2620 : END DO
2621 : END IF
2622 66580461 : IF (iatom >= latom) THEN
2623 : i = 1
2624 : j = offset_ad
2625 152379450 : DO md = 1, md_max
2626 375294635 : DO ma = 1, ma_max
2627 222915185 : ki = kad(i)
2628 222915185 : !$OMP ATOMIC
2629 : ks(j) = ks(j) + ki
2630 222915185 : i = i + 1
2631 326674759 : j = j + 1
2632 : END DO
2633 : END DO
2634 : ELSE
2635 : i = 1
2636 46628720 : DO md = 1, md_max
2637 28668135 : j = offset_ad + md - 1
2638 115178882 : DO ma = 1, ma_max
2639 68550162 : ki = kad(i)
2640 68550162 : !$OMP ATOMIC
2641 : ks(j) = ks(j) + ki
2642 68550162 : i = i + 1
2643 97218297 : j = j + md_max
2644 : END DO
2645 : END DO
2646 : END IF
2647 66580461 : IF (iatom >= katom) THEN
2648 : i = 1
2649 : j = offset_ac
2650 212470156 : DO mc = 1, mc_max
2651 536259188 : DO ma = 1, ma_max
2652 323789032 : ki = kac(i)
2653 323789032 : !$OMP ATOMIC
2654 : ks(j) = ks(j) + ki
2655 323789032 : i = i + 1
2656 472511679 : j = j + 1
2657 : END DO
2658 : END DO
2659 : ELSE
2660 : i = 1
2661 7797339 : DO mc = 1, mc_max
2662 4964387 : j = offset_ac + mc - 1
2663 21020183 : DO ma = 1, ma_max
2664 13222844 : ki = kac(i)
2665 13222844 : !$OMP ATOMIC
2666 : ks(j) = ks(j) + ki
2667 13222844 : i = i + 1
2668 18187231 : j = j + mc_max
2669 : END DO
2670 : END DO
2671 : END IF
2672 66580461 : END SUBROUTINE update_fock_matrix
2673 :
2674 : ! **************************************************************************************************
2675 : !> \brief ...
2676 : !> \param ma_max ...
2677 : !> \param mb_max ...
2678 : !> \param mc_max ...
2679 : !> \param md_max ...
2680 : !> \param fac ...
2681 : !> \param symm_fac ...
2682 : !> \param density ...
2683 : !> \param ks ...
2684 : !> \param prim ...
2685 : !> \param pbd ...
2686 : !> \param pbc ...
2687 : !> \param pad ...
2688 : !> \param pac ...
2689 : !> \param kbd ...
2690 : !> \param kbc ...
2691 : !> \param kad ...
2692 : !> \param kac ...
2693 : !> \param iatom ...
2694 : !> \param jatom ...
2695 : !> \param katom ...
2696 : !> \param latom ...
2697 : !> \param iset ...
2698 : !> \param jset ...
2699 : !> \param kset ...
2700 : !> \param lset ...
2701 : !> \param offset_bd_set ...
2702 : !> \param offset_bc_set ...
2703 : !> \param offset_ad_set ...
2704 : !> \param offset_ac_set ...
2705 : !> \param atomic_offset_bd ...
2706 : !> \param atomic_offset_bc ...
2707 : !> \param atomic_offset_ad ...
2708 : !> \param atomic_offset_ac ...
2709 : ! **************************************************************************************************
2710 4975109 : SUBROUTINE update_fock_matrix_as(ma_max, mb_max, mc_max, md_max, &
2711 4975109 : fac, symm_fac, density, ks, prim, &
2712 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2713 : iatom, jatom, katom, latom, &
2714 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2715 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2716 : atomic_offset_ac)
2717 :
2718 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2719 : REAL(dp), INTENT(IN) :: fac, symm_fac
2720 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2721 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2722 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2723 : INTENT(IN) :: prim
2724 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2725 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2726 : kset, lset
2727 : INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2728 : offset_ad_set, offset_ac_set
2729 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2730 : atomic_offset_ad, atomic_offset_ac
2731 :
2732 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2733 : offset_ad, offset_bc, offset_bd
2734 :
2735 4975109 : IF (jatom >= latom) THEN
2736 4969175 : i = 1
2737 4969175 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2738 4969175 : j = offset_bd
2739 13542415 : DO md = 1, md_max
2740 25967993 : DO mb = 1, mb_max
2741 12425578 : pbd(i) = +density(j)
2742 12425578 : i = i + 1
2743 20998818 : j = j + 1
2744 : END DO
2745 : END DO
2746 : ELSE
2747 5934 : i = 1
2748 5934 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2749 13154 : DO md = 1, md_max
2750 7220 : j = offset_bd + md - 1
2751 23570 : DO mb = 1, mb_max
2752 10416 : pbd(i) = -density(j)
2753 10416 : i = i + 1
2754 17636 : j = j + md_max
2755 : END DO
2756 : END DO
2757 : END IF
2758 4975109 : IF (jatom >= katom) THEN
2759 4975109 : i = 1
2760 4975109 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2761 4975109 : j = offset_bc
2762 15134036 : DO mc = 1, mc_max
2763 29447906 : DO mb = 1, mb_max
2764 14313870 : pbc(i) = -density(j)
2765 14313870 : i = i + 1
2766 24472797 : j = j + 1
2767 : END DO
2768 : END DO
2769 : ELSE
2770 0 : i = 1
2771 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2772 0 : DO mc = 1, mc_max
2773 0 : j = offset_bc + mc - 1
2774 0 : DO mb = 1, mb_max
2775 0 : pbc(i) = density(j)
2776 0 : i = i + 1
2777 0 : j = j + mc_max
2778 : END DO
2779 : END DO
2780 : END IF
2781 4975109 : IF (iatom >= latom) THEN
2782 3754017 : i = 1
2783 3754017 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2784 3754017 : j = offset_ad
2785 10877962 : DO md = 1, md_max
2786 24230763 : DO ma = 1, ma_max
2787 13352801 : pad(i) = -density(j)
2788 13352801 : i = i + 1
2789 20476746 : j = j + 1
2790 : END DO
2791 : END DO
2792 : ELSE
2793 1221092 : i = 1
2794 1221092 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2795 2677607 : DO md = 1, md_max
2796 1456515 : j = offset_ad + md - 1
2797 5893296 : DO ma = 1, ma_max
2798 3215689 : pad(i) = density(j)
2799 3215689 : i = i + 1
2800 4672204 : j = j + md_max
2801 : END DO
2802 : END DO
2803 : END IF
2804 4975109 : IF (iatom >= katom) THEN
2805 4800293 : i = 1
2806 4800293 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2807 4800293 : j = offset_ac
2808 14707836 : DO mc = 1, mc_max
2809 33684431 : DO ma = 1, ma_max
2810 18976595 : pac(i) = +density(j)
2811 18976595 : i = i + 1
2812 28884138 : j = j + 1
2813 : END DO
2814 : END DO
2815 : ELSE
2816 174816 : i = 1
2817 174816 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2818 426200 : DO mc = 1, mc_max
2819 251384 : j = offset_ac + mc - 1
2820 1062297 : DO ma = 1, ma_max
2821 636097 : pac(i) = -density(j)
2822 636097 : i = i + 1
2823 887481 : j = j + mc_max
2824 : END DO
2825 : END DO
2826 : END IF
2827 :
2828 4975109 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2829 :
2830 4975109 : IF (jatom >= latom) THEN
2831 : i = 1
2832 : j = offset_bd
2833 13542415 : DO md = 1, md_max
2834 25967993 : DO mb = 1, mb_max
2835 12425578 : !$OMP ATOMIC
2836 : ks(j) = ks(j) + kbd(i)
2837 12425578 : i = i + 1
2838 20998818 : j = j + 1
2839 : END DO
2840 : END DO
2841 : ELSE
2842 : i = 1
2843 13154 : DO md = 1, md_max
2844 7220 : j = offset_bd + md - 1
2845 23570 : DO mb = 1, mb_max
2846 10416 : !$OMP ATOMIC
2847 : ks(j) = ks(j) - kbd(i)
2848 10416 : i = i + 1
2849 17636 : j = j + md_max
2850 : END DO
2851 : END DO
2852 : END IF
2853 4975109 : IF (jatom >= katom) THEN
2854 : i = 1
2855 : j = offset_bc
2856 15134036 : DO mc = 1, mc_max
2857 29447906 : DO mb = 1, mb_max
2858 14313870 : !$OMP ATOMIC
2859 : ks(j) = ks(j) - kbc(i)
2860 14313870 : i = i + 1
2861 24472797 : j = j + 1
2862 : END DO
2863 : END DO
2864 : ELSE
2865 : i = 1
2866 0 : DO mc = 1, mc_max
2867 0 : j = offset_bc + mc - 1
2868 0 : DO mb = 1, mb_max
2869 0 : !$OMP ATOMIC
2870 : ks(j) = ks(j) + kbc(i)
2871 0 : i = i + 1
2872 0 : j = j + mc_max
2873 : END DO
2874 : END DO
2875 : END IF
2876 4975109 : IF (iatom >= latom) THEN
2877 : i = 1
2878 : j = offset_ad
2879 10877962 : DO md = 1, md_max
2880 24230763 : DO ma = 1, ma_max
2881 13352801 : !$OMP ATOMIC
2882 : ks(j) = ks(j) - kad(i)
2883 13352801 : i = i + 1
2884 20476746 : j = j + 1
2885 : END DO
2886 : END DO
2887 : ELSE
2888 : i = 1
2889 2677607 : DO md = 1, md_max
2890 1456515 : j = offset_ad + md - 1
2891 5893296 : DO ma = 1, ma_max
2892 3215689 : !$OMP ATOMIC
2893 : ks(j) = ks(j) + kad(i)
2894 3215689 : i = i + 1
2895 4672204 : j = j + md_max
2896 : END DO
2897 : END DO
2898 : END IF
2899 : ! XXXXXXXXXXXXXXXXXXXXXXXX
2900 4975109 : IF (iatom >= katom) THEN
2901 : i = 1
2902 : j = offset_ac
2903 14707836 : DO mc = 1, mc_max
2904 33684431 : DO ma = 1, ma_max
2905 18976595 : !$OMP ATOMIC
2906 : ks(j) = ks(j) + kac(i)
2907 18976595 : i = i + 1
2908 28884138 : j = j + 1
2909 : END DO
2910 : END DO
2911 : ELSE
2912 : i = 1
2913 426200 : DO mc = 1, mc_max
2914 251384 : j = offset_ac + mc - 1
2915 1062297 : DO ma = 1, ma_max
2916 636097 : !$OMP ATOMIC
2917 : ks(j) = ks(j) - kac(i)
2918 636097 : i = i + 1
2919 887481 : j = j + mc_max
2920 : END DO
2921 : END DO
2922 : END IF
2923 :
2924 4975109 : END SUBROUTINE update_fock_matrix_as
2925 :
2926 : ! **************************************************************************************************
2927 : !> \brief ...
2928 : !> \param i ...
2929 : !> \param j ...
2930 : !> \param k ...
2931 : !> \param l ...
2932 : !> \param set_offsets ...
2933 : !> \param atom_offsets ...
2934 : !> \param iset ...
2935 : !> \param jset ...
2936 : !> \param kset ...
2937 : !> \param lset ...
2938 : !> \param ma_max ...
2939 : !> \param mb_max ...
2940 : !> \param mc_max ...
2941 : !> \param md_max ...
2942 : !> \param prim ...
2943 : ! **************************************************************************************************
2944 0 : SUBROUTINE print_integrals(i, j, k, l, set_offsets, atom_offsets, iset, jset, kset, lset, ma_max, mb_max, mc_max, md_max, prim)
2945 : INTEGER :: i, j, k, l
2946 : INTEGER, DIMENSION(:, :, :, :), POINTER :: set_offsets
2947 : INTEGER, DIMENSION(:, :), POINTER :: atom_offsets
2948 : INTEGER :: iset, jset, kset, lset, ma_max, mb_max, &
2949 : mc_max, md_max
2950 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2951 : INTENT(IN) :: prim
2952 :
2953 : INTEGER :: iint, ma, mb, mc, md
2954 :
2955 0 : iint = 0
2956 0 : DO md = 1, md_max
2957 0 : DO mc = 1, mc_max
2958 0 : DO mb = 1, mb_max
2959 0 : DO ma = 1, ma_max
2960 0 : iint = iint + 1
2961 0 : IF (ABS(prim(iint)) .GT. 0.0000000000001) &
2962 0 : WRITE (99, *) atom_offsets(i, 1) + ma + set_offsets(iset, 1, i, 1) - 1, &
2963 0 : atom_offsets(j, 1) + ma + set_offsets(jset, 1, j, 1) - 1, &
2964 0 : atom_offsets(k, 1) + ma + set_offsets(kset, 1, k, 1) - 1, &
2965 0 : atom_offsets(l, 1) + ma + set_offsets(lset, 1, l, 1) - 1, &
2966 0 : prim(iint)
2967 : END DO
2968 : END DO
2969 : END DO
2970 : END DO
2971 :
2972 0 : END SUBROUTINE print_integrals
2973 :
2974 : #:include "hfx_get_pmax_val.fypp"
2975 :
2976 : END MODULE hfx_energy_potential
|