Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate MP2 energy
10 : !> \par History
11 : !> 06.2011 created [Mauro Del Ben]
12 : !> \author Mauro Del Ben
13 : ! **************************************************************************************************
14 : MODULE mp2_direct_method
15 : USE cell_types, ONLY: cell_type
16 : USE cp_log_handling, ONLY: cp_get_default_logger,&
17 : cp_logger_type
18 : USE hfx_energy_potential, ONLY: coulomb4
19 : USE hfx_load_balance_methods, ONLY: cost_model,&
20 : p1_energy,&
21 : p2_energy,&
22 : p3_energy
23 : USE hfx_pair_list_methods, ONLY: build_pair_list_mp2
24 : USE hfx_types, ONLY: hfx_basis_type,&
25 : hfx_pgf_list,&
26 : hfx_pgf_product_list,&
27 : hfx_potential_type,&
28 : hfx_screen_coeff_type,&
29 : hfx_type,&
30 : pair_set_list_type
31 : USE input_constants, ONLY: do_potential_TShPSC
32 : USE kinds, ONLY: dp,&
33 : int_8
34 : USE libint_wrapper, ONLY: cp_libint_t
35 : USE machine, ONLY: m_flush
36 : USE mathconstants, ONLY: zero
37 : USE message_passing, ONLY: mp_para_env_release,&
38 : mp_para_env_type
39 : USE mp2_ri_libint, ONLY: prepare_integral_calc
40 : USE mp2_types, ONLY: init_TShPSC_lmax,&
41 : mp2_biel_type,&
42 : mp2_type,&
43 : pair_list_type_mp2
44 : USE orbital_pointers, ONLY: ncoset
45 : USE particle_types, ONLY: particle_type
46 : USE qs_environment_types, ONLY: qs_environment_type
47 : USE t_sh_p_s_c, ONLY: free_C0
48 : #include "./base/base_uses.f90"
49 :
50 : IMPLICIT NONE
51 : PRIVATE
52 :
53 : PUBLIC :: mp2_direct_energy, mp2_canonical_direct_single_batch
54 :
55 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_direct_method'
56 :
57 : !***
58 :
59 : CONTAINS
60 :
61 : ! **************************************************************************************************
62 : !> \brief ...
63 : !> \param dimen ...
64 : !> \param occ_i ...
65 : !> \param occ_j ...
66 : !> \param mp2_biel ...
67 : !> \param mp2_env ...
68 : !> \param C_i ...
69 : !> \param Auto_i ...
70 : !> \param Emp2 ...
71 : !> \param Emp2_Cou ...
72 : !> \param Emp2_ex ...
73 : !> \param qs_env ...
74 : !> \param para_env ...
75 : !> \param unit_nr ...
76 : !> \param C_j ...
77 : !> \param Auto_j ...
78 : ! **************************************************************************************************
79 26 : SUBROUTINE mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, C_i, Auto_i, Emp2, Emp2_Cou, Emp2_ex, &
80 26 : qs_env, para_env, unit_nr, C_j, Auto_j)
81 : INTEGER :: dimen, occ_i, occ_j
82 : TYPE(mp2_biel_type) :: mp2_biel
83 : TYPE(mp2_type) :: mp2_env
84 : REAL(KIND=dp), DIMENSION(dimen, dimen) :: C_i
85 : REAL(KIND=dp), DIMENSION(dimen) :: Auto_i
86 : REAL(KIND=dp) :: Emp2, Emp2_Cou, Emp2_ex
87 : TYPE(qs_environment_type), POINTER :: qs_env
88 : TYPE(mp_para_env_type), POINTER :: para_env
89 : INTEGER :: unit_nr
90 : REAL(KIND=dp), DIMENSION(dimen, dimen), OPTIONAL :: C_j
91 : REAL(KIND=dp), DIMENSION(dimen), OPTIONAL :: Auto_j
92 :
93 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_direct_energy'
94 : REAL(KIND=dp), PARAMETER :: zero = 0.D+00
95 :
96 : INTEGER :: batch_number, color_sub, counter, elements_ij_proc, group_counter, handle, i, &
97 : i_batch, i_batch_start, i_group_counter, j, j_batch_start, j_group_counter, last_batch, &
98 : max_batch_number, max_batch_size, max_set, minimum_memory_needed, my_batch_size, &
99 : my_I_batch_size, my_I_occupied_end, my_I_occupied_start, my_J_batch_size, &
100 : my_J_occupied_end, my_J_occupied_start, natom, Ni_occupied, Nj_occupied, number_groups, &
101 : number_i_subset, number_j_subset, one, sqrt_number_groups, total_I_size_batch_group, &
102 : total_J_size_batch_group, virt_i, virt_j
103 26 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_sizes, batch_sizes_tmp, &
104 26 : vector_batch_I_size_group, &
105 26 : vector_batch_J_size_group
106 26 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_list_proc, ij_list_proc_temp, &
107 26 : ij_matrix
108 : LOGICAL :: alpha_beta_case
109 : TYPE(mp_para_env_type), POINTER :: para_env_sub
110 :
111 26 : CALL timeset(routineN, handle)
112 :
113 26 : alpha_beta_case = .FALSE.
114 26 : IF (PRESENT(C_j) .AND. PRESENT(Auto_j)) alpha_beta_case = .TRUE.
115 :
116 26 : IF (unit_nr > 0 .AND. mp2_env%potential_parameter%potential_type == do_potential_TShPSC) THEN
117 1 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T64,F12.6,A5)') 'Truncated MP2 method, Rt=', &
118 2 : mp2_env%potential_parameter%cutoff_radius, ' Bohr'
119 : END IF
120 :
121 : ! create the local para env
122 : ! each para_env_sub corresponds to a group that is going to compute
123 : ! all the integrals. To each group a batch I is assigned and the
124 : ! communication takes place only inside the group
125 26 : number_groups = para_env%num_pe/mp2_env%mp2_num_proc
126 26 : IF (number_groups*mp2_env%mp2_num_proc /= para_env%num_pe) THEN
127 0 : CPABORT(" The number of processors needs to be a multiple of the processors per group. ")
128 : END IF
129 26 : IF (number_groups > occ_i*occ_j) THEN
130 2 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Number of groups greater then the number of IJ pairs!'
131 2 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Consider using more processors per group for improved efficiency'
132 : END IF
133 :
134 26 : color_sub = para_env%mepos/mp2_env%mp2_num_proc
135 26 : ALLOCATE (para_env_sub)
136 26 : CALL para_env_sub%from_split(para_env, color_sub)
137 :
138 : ! calculate the maximal size of the batch, according to the maximum RS size
139 26 : max_set = SIZE(mp2_biel%index_table, 2)
140 26 : minimum_memory_needed = (8*(max_set**4))/1024**2
141 26 : IF (minimum_memory_needed > mp2_env%mp2_memory) THEN
142 0 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T66,F12.6,A3)') 'Memory required below the minimum, new memory:', &
143 0 : minimum_memory_needed, 'MiB'
144 0 : mp2_env%mp2_memory = minimum_memory_needed
145 : END IF
146 :
147 : ! Distribute the batches over the groups in
148 : ! a rectangular fashion, bigger size for J index
149 : ! the sizes of the I batches should be as small as possible
150 26 : sqrt_number_groups = INT(SQRT(REAL(number_groups, KIND=dp)))
151 26 : DO i = 1, number_groups
152 26 : IF (MOD(number_groups, i) == 0) THEN
153 26 : IF (sqrt_number_groups/i <= 1) THEN
154 : number_j_subset = i
155 : EXIT
156 : END IF
157 : END IF
158 : END DO
159 26 : number_i_subset = number_groups/number_j_subset
160 :
161 26 : IF (number_i_subset < number_j_subset) THEN
162 0 : number_i_subset = number_j_subset
163 0 : number_j_subset = number_groups/number_i_subset
164 : END IF
165 :
166 : ! Distribute the I index and the J index over groups
167 26 : total_I_size_batch_group = occ_i/number_i_subset
168 26 : IF (total_I_size_batch_group < 1) total_I_size_batch_group = 1
169 78 : ALLOCATE (vector_batch_I_size_group(0:number_i_subset - 1))
170 :
171 78 : vector_batch_I_size_group = 0
172 78 : DO i = 0, number_i_subset - 1
173 78 : vector_batch_I_size_group(i) = total_I_size_batch_group
174 : END DO
175 78 : IF (SUM(vector_batch_I_size_group) /= occ_i) THEN
176 54 : one = 1
177 54 : IF (SUM(vector_batch_I_size_group) > occ_i) one = -1
178 18 : i = -1
179 : DO
180 18 : i = i + 1
181 18 : vector_batch_I_size_group(i) = vector_batch_I_size_group(i) + one
182 54 : IF (SUM(vector_batch_I_size_group) == occ_i) EXIT
183 18 : IF (i == number_i_subset - 1) i = -1
184 : END DO
185 : END IF
186 :
187 26 : total_J_size_batch_group = occ_j/number_j_subset
188 26 : IF (total_J_size_batch_group < 1) total_J_size_batch_group = 1
189 78 : ALLOCATE (vector_batch_J_size_group(0:number_j_subset - 1))
190 :
191 52 : vector_batch_J_size_group = 0
192 52 : DO i = 0, number_J_subset - 1
193 52 : vector_batch_J_size_group(i) = total_J_size_batch_group
194 : END DO
195 52 : IF (SUM(vector_batch_J_size_group) /= occ_j) THEN
196 0 : one = 1
197 0 : IF (SUM(vector_batch_J_size_group) > occ_j) one = -1
198 0 : i = -1
199 : DO
200 0 : i = i + 1
201 0 : vector_batch_J_size_group(i) = vector_batch_J_size_group(i) + one
202 0 : IF (SUM(vector_batch_J_size_group) == occ_j) EXIT
203 0 : IF (i == number_J_subset - 1) i = -1
204 : END DO
205 : END IF
206 :
207 : ! now the starting and ending I and J occupied orbitals are assigned to each group
208 26 : group_counter = 0
209 26 : i_group_counter = 0
210 26 : my_I_occupied_start = 1
211 39 : DO i = 0, number_i_subset - 1
212 : my_J_occupied_start = 1
213 : j_group_counter = 0
214 52 : DO j = 0, number_j_subset - 1
215 39 : group_counter = group_counter + 1
216 39 : IF (color_sub == group_counter - 1) EXIT
217 13 : my_J_occupied_start = my_J_occupied_start + vector_batch_J_size_group(j)
218 52 : j_group_counter = j_group_counter + 1
219 : END DO
220 39 : IF (color_sub == group_counter - 1) EXIT
221 13 : my_I_occupied_start = my_I_occupied_start + vector_batch_I_size_group(i)
222 39 : i_group_counter = i_group_counter + 1
223 : END DO
224 26 : my_I_occupied_end = my_I_occupied_start + vector_batch_I_size_group(i_group_counter) - 1
225 26 : my_I_batch_size = vector_batch_I_size_group(i_group_counter)
226 26 : my_J_occupied_end = my_J_occupied_start + vector_batch_J_size_group(j_group_counter) - 1
227 26 : my_J_batch_size = vector_batch_J_size_group(j_group_counter)
228 :
229 26 : DEALLOCATE (vector_batch_I_size_group)
230 26 : DEALLOCATE (vector_batch_J_size_group)
231 :
232 : max_batch_size = MIN( &
233 : MAX(1, &
234 : INT(mp2_env%mp2_memory*INT(1024, KIND=int_8)**2/ &
235 : (8*(2*dimen - occ_i)*INT(dimen, KIND=int_8)*my_J_batch_size/para_env_sub%num_pe))) &
236 26 : , my_I_batch_size)
237 26 : IF (max_batch_size < 1) THEN
238 1 : max_batch_size = INT((8*(occ_i + 1)*INT(dimen, KIND=int_8)**2/para_env%num_pe)/1024**2)
239 1 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T72,I6,A3)') 'More memory required, at least:', max_batch_size, 'MiB'
240 1 : max_batch_size = 1
241 : END IF
242 :
243 : ! create the size of the batches inside the group
244 26 : my_batch_size = my_I_batch_size
245 77 : ALLOCATE (batch_sizes(my_batch_size))
246 :
247 85 : batch_sizes = -HUGE(0)
248 : batch_number = 0
249 51 : DO i = 1, my_batch_size
250 45 : IF (i*max_batch_size > my_batch_size) EXIT
251 25 : batch_number = batch_number + 1
252 51 : batch_sizes(i) = max_batch_size
253 : END DO
254 26 : last_batch = my_batch_size - max_batch_size*batch_number
255 26 : IF (last_batch > 0) THEN
256 0 : batch_number = batch_number + 1
257 0 : batch_sizes(batch_number) = last_batch
258 : END IF
259 :
260 77 : ALLOCATE (batch_sizes_tmp(batch_number))
261 51 : batch_sizes_tmp(1:batch_number) = batch_sizes(1:batch_number)
262 26 : DEALLOCATE (batch_sizes)
263 51 : ALLOCATE (batch_sizes(batch_number))
264 51 : batch_sizes(:) = batch_sizes_tmp
265 26 : DEALLOCATE (batch_sizes_tmp)
266 :
267 51 : max_batch_size = MAXVAL(batch_sizes)
268 26 : CALL para_env%max(max_batch_size)
269 26 : max_batch_number = batch_number
270 26 : CALL para_env%max(max_batch_number)
271 26 : IF (unit_nr > 0) THEN
272 13 : WRITE (unit_nr, '(T3,A,T76,I5)') 'Maximum used batch size: ', max_batch_size
273 13 : WRITE (unit_nr, '(T3,A,T76,I5)') 'Number of integral recomputations: ', max_batch_number
274 13 : CALL m_flush(unit_nr)
275 : END IF
276 :
277 : ! Batches sizes exceed the occupied orbitals allocated for group
278 51 : CPASSERT(SUM(batch_sizes) <= my_batch_size)
279 :
280 26 : virt_i = dimen - occ_i
281 26 : virt_j = dimen - occ_j
282 26 : natom = SIZE(mp2_biel%index_table, 1)
283 :
284 26 : CALL para_env%sync()
285 26 : Emp2 = zero
286 26 : Emp2_Cou = zero
287 26 : Emp2_ex = zero
288 26 : i_batch_start = my_I_occupied_start - 1
289 26 : j_batch_start = my_J_occupied_start - 1
290 26 : Nj_occupied = my_J_batch_size
291 51 : DO i_batch = 1, batch_number
292 :
293 25 : Ni_occupied = batch_sizes(i_batch)
294 :
295 25 : counter = -1
296 100 : ALLOCATE (ij_matrix(Ni_occupied, Nj_occupied))
297 :
298 456 : ij_matrix = 0
299 84 : DO i = 1, Ni_occupied
300 402 : DO j = 1, Nj_occupied
301 318 : counter = counter + 1
302 377 : IF (MOD(counter, para_env_sub%num_pe) == para_env_sub%mepos) THEN
303 318 : ij_matrix(i, j) = ij_matrix(i, j) + 1
304 : END IF
305 : END DO
306 : END DO
307 :
308 75 : ALLOCATE (ij_list_proc_temp(Ni_occupied*occ_j, 2))
309 :
310 25 : elements_ij_proc = 0
311 84 : DO i = 1, Ni_occupied
312 402 : DO j = 1, Nj_occupied
313 318 : IF (ij_matrix(i, j) == 0) CYCLE
314 318 : elements_ij_proc = elements_ij_proc + 1
315 318 : ij_list_proc_temp(elements_ij_proc, 1) = i
316 377 : ij_list_proc_temp(elements_ij_proc, 2) = j
317 : END DO
318 : END DO
319 25 : DEALLOCATE (ij_matrix)
320 :
321 75 : ALLOCATE (ij_list_proc(elements_ij_proc, 2))
322 343 : DO i = 1, elements_ij_proc
323 318 : ij_list_proc(i, 1) = ij_list_proc_temp(i, 1)
324 343 : ij_list_proc(i, 2) = ij_list_proc_temp(i, 2)
325 : END DO
326 25 : DEALLOCATE (ij_list_proc_temp)
327 :
328 25 : IF (.NOT. alpha_beta_case) THEN
329 : CALL mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env_sub, &
330 : mp2_biel, dimen, C_i, Auto_i, i_batch_start, Ni_occupied, occ_i, &
331 21 : elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start)
332 : ELSE
333 : CALL mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env_sub, &
334 : mp2_biel, dimen, C_i, Auto_i, i_batch_start, Ni_occupied, occ_i, &
335 : elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
336 4 : occ_j, C_j, Auto_j)
337 : END IF
338 :
339 25 : i_batch_start = i_batch_start + Ni_occupied
340 :
341 51 : DEALLOCATE (ij_list_proc)
342 :
343 : END DO
344 :
345 26 : CALL para_env%sum(Emp2_Cou)
346 26 : CALL para_env%sum(Emp2_Ex)
347 26 : CALL para_env%sum(Emp2)
348 :
349 26 : CALL mp_para_env_release(para_env_sub)
350 :
351 26 : CALL timestop(handle)
352 :
353 52 : END SUBROUTINE mp2_direct_energy
354 :
355 : ! **************************************************************************************************
356 : !> \brief ...
357 : !> \param Emp2 ...
358 : !> \param Emp2_Cou ...
359 : !> \param Emp2_ex ...
360 : !> \param mp2_env ...
361 : !> \param qs_env ...
362 : !> \param para_env ...
363 : !> \param mp2_biel ...
364 : !> \param dimen ...
365 : !> \param C ...
366 : !> \param Auto ...
367 : !> \param i_batch_start ...
368 : !> \param Ni_occupied ...
369 : !> \param occupied ...
370 : !> \param elements_ij_proc ...
371 : !> \param ij_list_proc ...
372 : !> \param Nj_occupied ...
373 : !> \param j_batch_start ...
374 : !> \param occupied_beta ...
375 : !> \param C_beta ...
376 : !> \param Auto_beta ...
377 : !> \param Integ_MP2 ...
378 : !> \par History
379 : !> 06.2011 created [Mauro Del Ben]
380 : !> \author Mauro Del Ben
381 : ! **************************************************************************************************
382 43 : SUBROUTINE mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env, &
383 86 : mp2_biel, dimen, C, Auto, i_batch_start, Ni_occupied, &
384 43 : occupied, elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
385 10 : occupied_beta, C_beta, Auto_beta, Integ_MP2)
386 :
387 : REAL(KIND=dp), INTENT(INOUT) :: Emp2, Emp2_Cou, Emp2_ex
388 : TYPE(mp2_type) :: mp2_env
389 : TYPE(qs_environment_type), POINTER :: qs_env
390 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
391 : TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
392 : INTEGER, INTENT(IN) :: dimen
393 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C
394 : REAL(KIND=dp), DIMENSION(dimen), INTENT(IN) :: Auto
395 : INTEGER, INTENT(IN) :: i_batch_start, Ni_occupied, occupied, &
396 : elements_ij_proc
397 : INTEGER, DIMENSION(elements_ij_proc, 2), &
398 : INTENT(IN) :: ij_list_proc
399 : INTEGER, INTENT(IN) :: Nj_occupied, j_batch_start
400 : INTEGER, INTENT(IN), OPTIONAL :: occupied_beta
401 : REAL(KIND=dp), DIMENSION(dimen, dimen), &
402 : INTENT(IN), OPTIONAL :: C_beta
403 : REAL(KIND=dp), DIMENSION(dimen), INTENT(IN), &
404 : OPTIONAL :: Auto_beta
405 : REAL(KIND=dp), ALLOCATABLE, &
406 : DIMENSION(:, :, :, :), INTENT(OUT), OPTIONAL :: Integ_MP2
407 :
408 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_canonical_direct_single_batch'
409 :
410 : INTEGER :: case_index, counter_proc, elements_ij_proc_rec, elements_kl_proc, global_counter, &
411 : handle, i, i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
412 : i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_start, iatom, iatom_end, &
413 : iatom_start, iiB, ij_elem_max, ikind, index_ij_rec, index_ij_send, index_kl, &
414 : index_proc_ij, index_proc_shift, iset, jatom, jatom_end, jatom_start, jjB, jkind, jset, &
415 : katom, katom_end, katom_start, kkB, kkind, kset, latom, latom_end, latom_start, lkind, &
416 : llB, lset, max_num_call_sec_transf, max_pgf, max_set, multiple
417 : INTEGER :: my_num_call_sec_transf, natom, ncob, nints, nseta, nsetb, nsgf_max, nspins, &
418 : primitive_counter, proc_num, proc_receive, proc_send, R_offset_rec, Rsize_rec, &
419 : S_offset_rec, same_size_kl_index, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, &
420 : sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, Ssize_rec, &
421 : step_size, total_num_RS_task
422 : INTEGER(int_8) :: estimate_to_store_int, neris_tmp, &
423 : neris_total, nprim_ints
424 43 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, nimages, proc_num_task, &
425 43 : same_size_kl_elements_counter
426 43 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: kl_list_proc, task_counter_RS, &
427 43 : task_counter_RS_temp
428 : INTEGER, DIMENSION(4) :: RS_counter_temp
429 : INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
430 43 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, &
431 43 : lc_min, ld_max, ld_min, npgfa, npgfb, &
432 43 : npgfc, npgfd, nsgfa, nsgfb, nsgfc, &
433 43 : nsgfd
434 43 : INTEGER, DIMENSION(:, :), POINTER :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
435 : LOGICAL :: alpha_beta_case, case_send_receive, &
436 : copy_integrals, do_periodic
437 43 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
438 : REAL(KIND=dp) :: cartesian_estimate, coeffs_kind_max0, cost_tmp, eps_schwarz, ln_10, &
439 : log10_eps_schwarz, log10_pmax, max_contraction_val, max_val1, max_val2, max_val2_set, &
440 : pmax_atom, pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, &
441 : screen_kind_kl
442 43 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cost_RS, cost_RS_temp, ee_buffer1, &
443 43 : ee_buffer2, ee_primitives_tmp, &
444 43 : ee_work, ee_work2, primitive_integrals
445 43 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIb_RS_mat_rec, C_beta_T, max_contraction
446 43 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb, BIb_RS_mat_rec_big, zero_mat_big
447 43 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: BI1, MNRS
448 43 : REAL(KIND=dp), DIMENSION(:), POINTER :: p_work
449 43 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: shm_pmax_block, zeta, zetb, zetc, zetd
450 43 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
451 43 : sphi_c_ext_set, sphi_d_ext_set
452 43 : REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
453 43 : sphi_d_ext
454 86 : REAL(KIND=dp), DIMENSION(dimen, 2) :: zero_mat
455 86 : REAL(KIND=dp), DIMENSION(dimen, dimen) :: C_T
456 : TYPE(cell_type), POINTER :: cell
457 : TYPE(cp_libint_t) :: private_lib
458 : TYPE(cp_logger_type), POINTER :: logger
459 43 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
460 43 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
461 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
462 43 : DIMENSION(:) :: pgf_product_list
463 : TYPE(hfx_potential_type) :: mp2_potential_parameter
464 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
465 43 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
466 43 : tmp_screen_pgf1, tmp_screen_pgf2
467 : TYPE(hfx_screen_coeff_type), &
468 43 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
469 : TYPE(hfx_screen_coeff_type), &
470 43 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
471 : TYPE(hfx_type), POINTER :: actual_x_data
472 43 : TYPE(pair_list_type_mp2) :: list_ij, list_kl
473 : TYPE(pair_set_list_type), ALLOCATABLE, &
474 43 : DIMENSION(:) :: set_list_ij, set_list_kl
475 43 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
476 :
477 43 : CALL timeset(routineN, handle)
478 :
479 : ! The Integ_MP2 will contain the (ia|jb) integrals, necessary for example
480 : ! for the RI-MP2 basis optimization. In this case the number of ij batches
481 : ! has to be equal to 1 (all integrals over molecular orbitals are computed
482 : ! in a single step).
483 43 : copy_integrals = .FALSE.
484 43 : IF (PRESENT(Integ_MP2)) copy_integrals = .TRUE.
485 :
486 43 : alpha_beta_case = .FALSE.
487 :
488 : CALL prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, &
489 : do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, &
490 : nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, &
491 : ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, &
492 : pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, &
493 : private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, &
494 43 : radii_pgf)
495 :
496 43 : ln_10 = LOG(10.0_dp)
497 :
498 43 : neris_tmp = 0_int_8
499 43 : neris_total = 0_int_8
500 43 : nprim_ints = 0_int_8
501 :
502 : !!!!!!!!!
503 880 : ALLOCATE (list_ij%elements(natom**2))
504 837 : ALLOCATE (list_kl%elements(natom**2))
505 : !!!!!!!!!
506 :
507 201 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
508 15190 : ALLOCATE (set_list_ij((max_set*natom)**2))
509 15147 : ALLOCATE (set_list_kl((max_set*natom)**2))
510 :
511 : !! precalculate maximum density matrix elements in blocks
512 365 : actual_x_data%pmax_block = 0.0_dp
513 43 : shm_pmax_block => actual_x_data%pmax_block
514 :
515 43 : shm_atomic_pair_list => actual_x_data%atomic_pair_list
516 :
517 43 : iatom_start = 1
518 43 : iatom_end = natom
519 43 : jatom_start = 1
520 43 : jatom_end = natom
521 43 : katom_start = 1
522 43 : katom_end = natom
523 43 : latom_start = 1
524 43 : latom_end = natom
525 :
526 : CALL build_pair_list_mp2(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
527 : jatom_start, jatom_end, &
528 : kind_of, basis_parameter, particle_set, &
529 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
530 : coeffs_kind_max0, log10_eps_schwarz, cell, 0.D+00, &
531 43 : shm_atomic_pair_list)
532 :
533 : CALL build_pair_list_mp2(natom, list_kl, set_list_kl, katom_start, katom_end, &
534 : latom_start, latom_end, &
535 : kind_of, basis_parameter, particle_set, &
536 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
537 : coeffs_kind_max0, log10_eps_schwarz, cell, 0.D+00, &
538 43 : shm_atomic_pair_list)
539 :
540 215 : ALLOCATE (BIb(dimen, dimen, elements_ij_proc))
541 1296103 : BIb = 0.0D+00
542 64195 : C_T = TRANSPOSE(C)
543 :
544 43 : IF (PRESENT(occupied_beta) .AND. PRESENT(C_beta) .AND. PRESENT(Auto_beta)) THEN
545 40 : ALLOCATE (C_beta_T(dimen, dimen))
546 9322 : C_beta_T(:, :) = TRANSPOSE(C_beta)
547 : alpha_beta_case = .TRUE.
548 : END IF
549 :
550 43 : ij_elem_max = elements_ij_proc
551 43 : CALL para_env%max(ij_elem_max)
552 :
553 : ! calculate the minimum multiple of num_pe >= to Ni_occupied*occupied, in such a way
554 : ! that the i, j loop is performed exactly the same number of time for each procewssor
555 43 : multiple = 0
556 : DO
557 402 : multiple = multiple + para_env%num_pe
558 402 : IF (multiple >= Ni_occupied*Nj_occupied) EXIT
559 : END DO
560 :
561 : ! proc_num_task contains the number of times the second occupied
562 : ! orbital transformation is called for each processor, needs for balancing
563 : ! the point to point send
564 129 : ALLOCATE (proc_num_task(0:para_env%num_pe - 1))
565 :
566 104 : proc_num_task = 0
567 :
568 43 : counter_proc = 0
569 204 : DO i_list_ij = 1, list_ij%n_element
570 161 : iatom = list_ij%elements(i_list_ij)%pair(1)
571 161 : jatom = list_ij%elements(i_list_ij)%pair(2)
572 161 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
573 161 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
574 161 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
575 161 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
576 :
577 161 : nsgfb => basis_parameter(jkind)%nsgf
578 161 : nsgfa => basis_parameter(ikind)%nsgf
579 :
580 6381 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
581 6177 : iset = set_list_ij(i_set_list_ij)%pair(1)
582 6177 : jset = set_list_ij(i_set_list_ij)%pair(2)
583 6177 : IF (iatom == jatom .AND. jset < iset) CYCLE
584 :
585 4707 : counter_proc = counter_proc + 1
586 4707 : proc_num = MOD(counter_proc, para_env%num_pe)
587 :
588 6338 : proc_num_task(proc_num) = proc_num_task(proc_num) + 1
589 :
590 : END DO
591 : END DO
592 : ! calculate the exact maximum number of calls to the second occupied
593 : ! orbital transformation
594 : ! max_num_call_sec_transf=MAXVAL(proc_num_task)
595 :
596 : ! distribute the RS pair over all processor
597 129 : ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 3))
598 :
599 14131 : kl_list_proc = 0
600 :
601 : counter_proc = 0
602 : elements_kl_proc = 0
603 204 : DO i_list_ij = 1, list_ij%n_element
604 161 : iatom = list_ij%elements(i_list_ij)%pair(1)
605 161 : jatom = list_ij%elements(i_list_ij)%pair(2)
606 161 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
607 161 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
608 161 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
609 161 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
610 :
611 161 : nsgfb => basis_parameter(jkind)%nsgf
612 161 : nsgfa => basis_parameter(ikind)%nsgf
613 :
614 6381 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
615 6177 : iset = set_list_ij(i_set_list_ij)%pair(1)
616 6177 : jset = set_list_ij(i_set_list_ij)%pair(2)
617 6177 : IF (iatom == jatom .AND. jset < iset) CYCLE
618 :
619 4707 : counter_proc = counter_proc + 1
620 4707 : proc_num = MOD(counter_proc, para_env%num_pe)
621 :
622 4868 : IF (proc_num == para_env%mepos) THEN
623 4653 : elements_kl_proc = elements_kl_proc + 1
624 4653 : kl_list_proc(elements_kl_proc, 1) = i_list_ij
625 4653 : kl_list_proc(elements_kl_proc, 2) = i_set_list_ij
626 4653 : kl_list_proc(elements_kl_proc, 3) = counter_proc
627 : END IF
628 :
629 : END DO
630 : END DO
631 :
632 104 : total_num_RS_task = SUM(proc_num_task)
633 129 : ALLOCATE (task_counter_RS(total_num_RS_task, 4))
634 :
635 129 : ALLOCATE (cost_RS(total_num_RS_task))
636 :
637 19043 : task_counter_RS = 0
638 4750 : cost_RS = 0.0_dp
639 :
640 129 : DO case_index = 1, 2
641 :
642 : my_num_call_sec_transf = 0
643 9392 : DO index_kl = 1, elements_kl_proc
644 :
645 9306 : i_list_ij = kl_list_proc(index_kl, 1)
646 9306 : i_set_list_ij = kl_list_proc(index_kl, 2)
647 :
648 9306 : iatom = list_ij%elements(i_list_ij)%pair(1)
649 9306 : jatom = list_ij%elements(i_list_ij)%pair(2)
650 9306 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
651 9306 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
652 9306 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
653 9306 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
654 37224 : ra = list_ij%elements(i_list_ij)%r1
655 37224 : rb = list_ij%elements(i_list_ij)%r2
656 9306 : rab2 = list_ij%elements(i_list_ij)%dist2
657 :
658 9306 : la_max => basis_parameter(ikind)%lmax
659 9306 : la_min => basis_parameter(ikind)%lmin
660 9306 : npgfa => basis_parameter(ikind)%npgf
661 9306 : nseta = basis_parameter(ikind)%nset
662 9306 : zeta => basis_parameter(ikind)%zet
663 9306 : nsgfa => basis_parameter(ikind)%nsgf
664 9306 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
665 9306 : nsgfl_a => basis_parameter(ikind)%nsgfl
666 9306 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
667 9306 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
668 9306 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
669 :
670 9306 : lb_max => basis_parameter(jkind)%lmax
671 9306 : lb_min => basis_parameter(jkind)%lmin
672 9306 : npgfb => basis_parameter(jkind)%npgf
673 9306 : nsetb = basis_parameter(jkind)%nset
674 9306 : zetb => basis_parameter(jkind)%zet
675 9306 : nsgfb => basis_parameter(jkind)%nsgf
676 9306 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
677 9306 : nsgfl_b => basis_parameter(jkind)%nsgfl
678 9306 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
679 9306 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
680 9306 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
681 :
682 9306 : iset = set_list_ij(i_set_list_ij)%pair(1)
683 9306 : jset = set_list_ij(i_set_list_ij)%pair(2)
684 :
685 9306 : ncob = npgfb(jset)*ncoset(lb_max(jset))
686 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
687 9306 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
688 :
689 9306 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
690 9306 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
691 :
692 9306 : IF (case_index == 1) THEN
693 4653 : global_counter = kl_list_proc(index_kl, 3)
694 4653 : task_counter_RS(global_counter, 1) = i_list_ij
695 4653 : task_counter_RS(global_counter, 2) = i_set_list_ij
696 4653 : task_counter_RS(global_counter, 3) = nsgfb(jset)*nsgfa(iset)
697 : END IF
698 :
699 9306 : IF (ALLOCATED(BI1)) DEALLOCATE (BI1)
700 55836 : ALLOCATE (BI1(dimen, Ni_occupied, nsgfb(jset), nsgfa(iset)))
701 :
702 12485758 : BI1 = 0.D+00
703 :
704 71604 : DO i_list_kl = 1, list_kl%n_element
705 :
706 62298 : katom = list_kl%elements(i_list_kl)%pair(1)
707 62298 : latom = list_kl%elements(i_list_kl)%pair(2)
708 :
709 62298 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
710 62298 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
711 62298 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
712 62298 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
713 249192 : rc = list_kl%elements(i_list_kl)%r1
714 249192 : rd = list_kl%elements(i_list_kl)%r2
715 62298 : rcd2 = list_kl%elements(i_list_kl)%dist2
716 :
717 62298 : pmax_atom = 0.0_dp
718 :
719 : screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
720 62298 : screen_coeffs_kind(jkind, ikind)%x(2)
721 : screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
722 62298 : screen_coeffs_kind(lkind, kkind)%x(2)
723 :
724 : !!!!! Change the loop order
725 62298 : IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
726 : !!!!!
727 62274 : IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
728 :
729 62274 : lc_max => basis_parameter(kkind)%lmax
730 62274 : lc_min => basis_parameter(kkind)%lmin
731 62274 : npgfc => basis_parameter(kkind)%npgf
732 62274 : zetc => basis_parameter(kkind)%zet
733 62274 : nsgfc => basis_parameter(kkind)%nsgf
734 62274 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
735 62274 : nsgfl_c => basis_parameter(kkind)%nsgfl
736 62274 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
737 62274 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
738 62274 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
739 :
740 62274 : ld_max => basis_parameter(lkind)%lmax
741 62274 : ld_min => basis_parameter(lkind)%lmin
742 62274 : npgfd => basis_parameter(lkind)%npgf
743 62274 : zetd => basis_parameter(lkind)%zet
744 62274 : nsgfd => basis_parameter(lkind)%nsgf
745 62274 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
746 62274 : nsgfl_d => basis_parameter(lkind)%nsgfl
747 62274 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
748 62274 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
749 62274 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
750 :
751 2761710 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
752 2690130 : kset = set_list_kl(i_set_list_kl)%pair(1)
753 2690130 : lset = set_list_kl(i_set_list_kl)%pair(2)
754 :
755 2690130 : IF (katom == latom .AND. lset < kset) CYCLE
756 :
757 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
758 2076726 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
759 2076726 : max_val2 = max_val1 + max_val2_set
760 :
761 : !! Near field screening
762 2076726 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
763 2023538 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
764 2023538 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
765 : !! get max_vals if we screen on initial density
766 2023538 : pmax_entry = 0.0_dp
767 :
768 2023538 : log10_pmax = pmax_entry
769 2023538 : max_val2 = max_val2 + log10_pmax
770 : IF (max_val2 < log10_eps_schwarz) CYCLE
771 2023538 : pmax_entry = EXP(log10_pmax*ln_10)
772 :
773 2085836 : IF (case_index == 2) THEN
774 1011769 : IF (ALLOCATED(MNRS)) DEALLOCATE (MNRS)
775 6070614 : ALLOCATE (MNRS(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset)))
776 :
777 82996187 : MNRS = 0.D+00
778 :
779 : max_contraction_val = max_contraction(iset, iatom)* &
780 : max_contraction(jset, jatom)* &
781 : max_contraction(kset, katom)* &
782 1011769 : max_contraction(lset, latom)*pmax_entry
783 1011769 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
784 1011769 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
785 1011769 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
786 1011769 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
787 :
788 : CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
789 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
790 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
791 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
792 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
793 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
794 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
795 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
796 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
797 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
798 : primitive_integrals, &
799 : mp2_potential_parameter, &
800 : actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
801 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
802 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
803 : log10_pmax, log10_eps_schwarz, &
804 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
805 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
806 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
807 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
808 : sphi_a_ext_set, &
809 : sphi_b_ext_set, &
810 : sphi_c_ext_set, &
811 : sphi_d_ext_set, &
812 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
813 1011769 : nimages, do_periodic, p_work)
814 :
815 1011769 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
816 1011769 : neris_total = neris_total + nints
817 1011769 : nprim_ints = nprim_ints + neris_tmp
818 1011769 : IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
819 1011769 : estimate_to_store_int = EXPONENT(cartesian_estimate)
820 1011769 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
821 1011769 : cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
822 :
823 1011769 : IF (cartesian_estimate < eps_schwarz) CYCLE
824 :
825 : primitive_counter = 0
826 3501876 : DO llB = 1, nsgfd(lset)
827 10171505 : DO kkB = 1, nsgfc(kset)
828 28159746 : DO jjB = 1, nsgfb(jset)
829 80452823 : DO iiB = 1, nsgfa(iset)
830 54784876 : primitive_counter = primitive_counter + 1
831 73783194 : MNRS(llB, kkB, jjB, iiB) = primitive_integrals(primitive_counter)
832 : END DO
833 : END DO
834 : END DO
835 : END DO
836 :
837 : CALL transform_occupied_orbitals_first(dimen, iatom, jatom, katom, latom, &
838 : iset, jset, kset, lset, &
839 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
840 : i_batch_start, Ni_occupied, &
841 1010077 : MNRS, C_T, mp2_biel, BI1)
842 : ELSE
843 1011769 : task_counter_RS(global_counter, 4) = task_counter_RS(global_counter, 4) + 1
844 :
845 1011769 : cost_tmp = 0.0_dp
846 : cost_tmp = cost_model(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset), &
847 : npgfd(lset), npgfc(kset), npgfb(jset), npgfa(iset), &
848 : max_val2/log10_eps_schwarz, &
849 1011769 : p1_energy, p2_energy, p3_energy)
850 1011769 : cost_RS(global_counter) = cost_RS(global_counter) + cost_tmp
851 : END IF
852 :
853 : END DO ! i_set_list_kl
854 : END DO ! i_list_kl
855 :
856 9392 : IF (case_index == 2) THEN
857 4653 : my_num_call_sec_transf = my_num_call_sec_transf + 1
858 4653 : IF (.NOT. alpha_beta_case) THEN
859 3993 : IF (.NOT. mp2_env%direct_canonical%big_send) THEN
860 : CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
861 : nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
862 : BI1, C_T, mp2_biel, para_env, elements_ij_proc, &
863 0 : multiple, BIb)
864 : ELSE
865 : CALL transform_occupied_orbitals_second_big( &
866 : dimen, iatom, jatom, iset, jset, &
867 : nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
868 3993 : ij_elem_max, BI1, C_T, mp2_biel, para_env, elements_ij_proc, BIb)
869 : END IF
870 : ELSE
871 660 : IF (.NOT. mp2_env%direct_canonical%big_send) THEN
872 : CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
873 : nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
874 : BI1, C_beta_T, mp2_biel, para_env, elements_ij_proc, &
875 0 : multiple, BIb)
876 : ELSE
877 : CALL transform_occupied_orbitals_second_big( &
878 : dimen, iatom, jatom, iset, jset, &
879 : nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
880 660 : ij_elem_max, BI1, C_beta_T, mp2_biel, para_env, elements_ij_proc, BIb)
881 : END IF
882 : END IF
883 : END IF
884 :
885 : END DO !i_list_ij
886 :
887 129 : IF (case_index == 1) THEN
888 43 : CALL para_env%sum(task_counter_RS)
889 43 : CALL para_env%sum(cost_RS)
890 86 : ALLOCATE (task_counter_RS_temp(total_num_RS_task, 4))
891 :
892 86 : ALLOCATE (cost_RS_temp(total_num_RS_task))
893 :
894 43 : step_size = 1
895 129 : ALLOCATE (same_size_kl_elements_counter((nsgf_max**2 + 1)/step_size + 1))
896 :
897 1372 : same_size_kl_elements_counter = 0
898 :
899 43 : same_size_kl_index = 0
900 43 : global_counter = 0
901 43 : DO iiB = nsgf_max**2 + 1, 0, -step_size
902 149802 : DO jjB = 1, total_num_RS_task
903 149802 : IF (task_counter_RS(jjB, 3) > iiB - step_size .AND. task_counter_RS(jjB, 3) <= iiB) THEN
904 4707 : global_counter = global_counter + 1
905 23535 : task_counter_RS_temp(global_counter, 1:4) = task_counter_RS(jjB, 1:4)
906 4707 : cost_RS_temp(global_counter) = cost_RS(jjB)
907 : END IF
908 : END DO
909 1329 : same_size_kl_index = same_size_kl_index + 1
910 1329 : same_size_kl_elements_counter(same_size_kl_index) = global_counter
911 : END DO
912 :
913 43 : DEALLOCATE (task_counter_RS)
914 43 : DEALLOCATE (cost_RS)
915 :
916 43 : i_start = 1
917 1372 : DO same_size_kl_index = 1, SIZE(same_size_kl_elements_counter)
918 6036 : DO iiB = i_start, same_size_kl_elements_counter(same_size_kl_index)
919 100828 : DO jjB = iiB + 1, same_size_kl_elements_counter(same_size_kl_index)
920 :
921 99499 : IF (cost_RS_temp(jjB) >= cost_RS_temp(iiB)) THEN
922 213860 : RS_counter_temp = task_counter_RS_temp(iiB, 1:4)
923 213860 : task_counter_RS_temp(iiB, 1:4) = task_counter_RS_temp(jjB, 1:4)
924 213860 : task_counter_RS_temp(jjB, 1:4) = RS_counter_temp
925 :
926 42772 : cost_tmp = cost_RS_temp(iiB)
927 42772 : cost_RS_temp(iiB) = cost_RS_temp(jjB)
928 42772 : cost_RS_temp(jjB) = cost_tmp
929 : END IF
930 : END DO
931 : END DO
932 1372 : i_start = same_size_kl_elements_counter(same_size_kl_index) + 1
933 : END DO
934 :
935 104 : proc_num_task = 0
936 4750 : DO counter_proc = 1, total_num_RS_task
937 4707 : proc_num = MOD(counter_proc, para_env%num_pe)
938 4750 : proc_num_task(proc_num) = proc_num_task(proc_num) + 1
939 : END DO
940 :
941 104 : max_num_call_sec_transf = MAXVAL(proc_num_task)
942 :
943 43 : DEALLOCATE (kl_list_proc)
944 129 : ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 2))
945 :
946 9435 : kl_list_proc = 0
947 :
948 : elements_kl_proc = 0
949 4750 : DO counter_proc = 1, total_num_RS_task
950 4707 : proc_num = MOD(counter_proc, para_env%num_pe)
951 4750 : IF (proc_num == para_env%mepos) THEN
952 4653 : elements_kl_proc = elements_kl_proc + 1
953 4653 : kl_list_proc(elements_kl_proc, 1) = task_counter_RS_temp(counter_proc, 1)
954 4653 : kl_list_proc(elements_kl_proc, 2) = task_counter_RS_temp(counter_proc, 2)
955 : END IF
956 : END DO
957 :
958 43 : DEALLOCATE (task_counter_RS_temp)
959 43 : DEALLOCATE (cost_RS_temp)
960 : END IF
961 : END DO ! case_index
962 :
963 43 : size_parameter_send(1) = 1
964 43 : size_parameter_send(2) = 1
965 43 : size_parameter_send(3) = 0
966 43 : size_parameter_send(4) = 0
967 43 : size_parameter_send(5) = elements_ij_proc
968 :
969 43 : IF (mp2_env%direct_canonical%big_send) THEN
970 172 : ALLOCATE (zero_mat_big(dimen, 2, ij_elem_max))
971 :
972 : END IF
973 :
974 43 : DO iiB = my_num_call_sec_transf + 1, max_num_call_sec_transf
975 43 : DO index_proc_shift = 0, para_env%num_pe - 1
976 :
977 0 : proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
978 0 : proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
979 :
980 0 : case_send_receive = (proc_send /= para_env%mepos)
981 :
982 0 : IF (case_send_receive) THEN
983 : ! the processor starts to send (and receive?)
984 :
985 0 : CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
986 :
987 0 : Rsize_rec = size_parameter_rec(1)
988 0 : Ssize_rec = size_parameter_rec(2)
989 0 : R_offset_rec = size_parameter_rec(3)
990 0 : S_offset_rec = size_parameter_rec(4)
991 0 : elements_ij_proc_rec = size_parameter_rec(5)
992 0 : IF (.NOT. mp2_env%direct_canonical%big_send) THEN
993 0 : ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec))
994 : ELSE
995 0 : ALLOCATE (BIb_RS_mat_rec_big(dimen, Rsize_rec + Ssize_rec, ij_elem_max))
996 : END IF
997 : ELSE
998 0 : elements_ij_proc_rec = elements_ij_proc
999 : END IF
1000 :
1001 0 : IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1002 0 : index_ij_send = 0
1003 0 : index_ij_rec = 0
1004 0 : DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1005 :
1006 0 : zero_mat = 0.D+00
1007 :
1008 0 : IF (case_send_receive) THEN
1009 :
1010 0 : CALL para_env%sendrecv(zero_mat, proc_send, BIb_RS_mat_rec, proc_receive)
1011 :
1012 0 : index_ij_rec = index_ij_rec + 1
1013 0 : IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1014 :
1015 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) = &
1016 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) + &
1017 0 : BIb_RS_mat_rec(1:dimen, 1:Rsize_rec)
1018 :
1019 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) = &
1020 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) + &
1021 0 : BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec)
1022 :
1023 : END IF
1024 : END IF
1025 :
1026 : END DO
1027 : ELSE
1028 0 : zero_mat_big = 0.D+00
1029 :
1030 0 : IF (case_send_receive) THEN
1031 :
1032 0 : CALL para_env%sendrecv(zero_mat_big, proc_send, BIb_RS_mat_rec_big, proc_receive)
1033 :
1034 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) = &
1035 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) + &
1036 0 : BIb_RS_mat_rec_big(1:dimen, 1:Rsize_rec, 1:elements_ij_proc)
1037 :
1038 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) = &
1039 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) + &
1040 0 : BIb_RS_mat_rec_big(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec, 1:elements_ij_proc)
1041 :
1042 : END IF
1043 : END IF
1044 :
1045 0 : IF (case_send_receive) THEN
1046 0 : IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1047 0 : DEALLOCATE (BIb_RS_mat_rec)
1048 : ELSE
1049 0 : DEALLOCATE (BIb_RS_mat_rec_big)
1050 : END IF
1051 : END IF
1052 :
1053 : END DO
1054 : END DO
1055 :
1056 43 : IF (mp2_env%direct_canonical%big_send) THEN
1057 43 : DEALLOCATE (zero_mat_big)
1058 : END IF
1059 :
1060 43 : logger => cp_get_default_logger()
1061 :
1062 43 : DEALLOCATE (primitive_integrals)
1063 :
1064 43 : IF (.NOT. alpha_beta_case) THEN
1065 : CALL transform_virtual_orbitals_and_accumulate(dimen, occupied, dimen - occupied, i_batch_start, &
1066 : j_batch_start, BIb, C, Auto, elements_ij_proc, ij_list_proc, &
1067 33 : nspins, Emp2, Emp2_Cou, Emp2_ex)
1068 : ELSE
1069 : CALL transform_virtual_orbitals_and_accumulate_ABcase( &
1070 : dimen, occupied, occupied_beta, dimen - occupied, dimen - occupied_beta, &
1071 : i_batch_start, j_batch_start, &
1072 : BIb, C, C_beta, Auto, Auto_beta, &
1073 10 : elements_ij_proc, ij_list_proc, Emp2, Emp2_Cou)
1074 10 : DEALLOCATE (C_beta_T)
1075 : END IF
1076 :
1077 43 : IF (copy_integrals) THEN
1078 18 : IF (.NOT. alpha_beta_case) THEN
1079 72 : ALLOCATE (Integ_MP2(dimen - occupied, dimen - occupied, occupied, occupied))
1080 11976 : Integ_MP2 = 0.0_dp
1081 72 : DO i = 1, elements_ij_proc
1082 60 : iiB = ij_list_proc(i, 1)
1083 60 : jjB = ij_list_proc(i, 2)
1084 5976 : Integ_MP2(:, :, iiB + i_batch_start, jjB + j_batch_start) = BIb(1:dimen - occupied, 1:dimen - occupied, i)
1085 : END DO
1086 : ELSE
1087 36 : ALLOCATE (Integ_MP2(dimen - occupied, dimen - occupied_beta, occupied, occupied_beta))
1088 5346 : Integ_MP2 = 0.0_dp
1089 30 : DO i = 1, elements_ij_proc
1090 24 : iiB = ij_list_proc(i, 1)
1091 24 : jjB = ij_list_proc(i, 2)
1092 2670 : Integ_MP2(:, :, iiB + i_batch_start, jjB + j_batch_start) = BIb(1:dimen - occupied, 1:dimen - occupied_beta, i)
1093 : END DO
1094 : END IF
1095 : END IF
1096 43 : DEALLOCATE (BIb)
1097 :
1098 43 : DEALLOCATE (set_list_ij, set_list_kl)
1099 :
1100 1665 : DO i = 1, max_pgf**2
1101 1622 : DEALLOCATE (pgf_list_ij(i)%image_list)
1102 1665 : DEALLOCATE (pgf_list_kl(i)%image_list)
1103 : END DO
1104 :
1105 43 : DEALLOCATE (pgf_list_ij)
1106 43 : DEALLOCATE (pgf_list_kl)
1107 43 : DEALLOCATE (pgf_product_list)
1108 :
1109 43 : DEALLOCATE (max_contraction, kind_of)
1110 :
1111 43 : DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
1112 :
1113 43 : DEALLOCATE (nimages)
1114 :
1115 43 : IF (mp2_env%potential_parameter%potential_type == do_potential_TShPSC) THEN
1116 2 : init_TShPSC_lmax = -1
1117 2 : CALL free_C0()
1118 : END IF
1119 :
1120 43 : CALL timestop(handle)
1121 :
1122 139 : END SUBROUTINE mp2_canonical_direct_single_batch
1123 :
1124 : ! **************************************************************************************************
1125 : !> \brief ...
1126 : !> \param dimen ...
1127 : !> \param latom ...
1128 : !> \param katom ...
1129 : !> \param jatom ...
1130 : !> \param iatom ...
1131 : !> \param lset ...
1132 : !> \param kset ...
1133 : !> \param jset ...
1134 : !> \param iset ...
1135 : !> \param Ssize ...
1136 : !> \param Rsize ...
1137 : !> \param Nsize ...
1138 : !> \param Msize ...
1139 : !> \param i_batch_start ...
1140 : !> \param Ni_occupied ...
1141 : !> \param MNRS ...
1142 : !> \param C_T ...
1143 : !> \param mp2_biel ...
1144 : !> \param BI1 ...
1145 : ! **************************************************************************************************
1146 1010077 : SUBROUTINE transform_occupied_orbitals_first(dimen, latom, katom, jatom, iatom, &
1147 : lset, kset, jset, iset, &
1148 : Ssize, Rsize, Nsize, Msize, &
1149 : i_batch_start, Ni_occupied, &
1150 1010077 : MNRS, C_T, mp2_biel, BI1)
1151 :
1152 : INTEGER, INTENT(IN) :: dimen, latom, katom, jatom, iatom, lset, &
1153 : kset, jset, iset, Ssize, Rsize, Nsize, &
1154 : Msize, i_batch_start, Ni_occupied
1155 : REAL(KIND=dp), &
1156 : DIMENSION(Msize, Nsize, Rsize, Ssize), &
1157 : INTENT(IN) :: MNRS
1158 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
1159 : TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1160 : REAL(KIND=dp), &
1161 : DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1162 : INTENT(INOUT) :: BI1
1163 :
1164 : INTEGER :: i, i_global, m, M_global, M_offset, &
1165 : M_start, n, N_global, N_offset, r, &
1166 : R_offset, R_start, s, S_offset
1167 : REAL(KIND=dp) :: MNRS_element
1168 :
1169 1010077 : N_offset = mp2_biel%index_table(jatom, jset) - 1
1170 1010077 : M_offset = mp2_biel%index_table(iatom, iset) - 1
1171 1010077 : S_offset = mp2_biel%index_table(latom, lset) - 1
1172 1010077 : R_offset = mp2_biel%index_table(katom, kset) - 1
1173 :
1174 3549406 : DO S = 1, Ssize
1175 2539329 : R_start = 1
1176 2539329 : IF (katom == latom .AND. kset == lset) R_start = S
1177 9924964 : DO R = R_start, Rsize
1178 :
1179 : ! fast i don't know why
1180 25956088 : DO N = 1, Nsize
1181 17041201 : N_global = N + N_offset
1182 17041201 : M_start = 1
1183 17041201 : IF (iatom == jatom .AND. iset == jset) THEN
1184 1686183 : M = N
1185 1686183 : M_global = M + M_offset
1186 1686183 : MNRS_element = MNRS(M, N, R, S)
1187 7428033 : DO i = 1, Ni_occupied
1188 5741850 : i_global = i + i_batch_start
1189 7428033 : BI1(N_global, i, R, S) = BI1(N_global, i, R, S) + C_T(i_global, M_global)*MNRS_element
1190 : END DO
1191 1686183 : M_start = N + 1
1192 : END IF
1193 :
1194 71485095 : DO M = M_start, Msize
1195 48068336 : M_global = M + M_offset
1196 48068336 : MNRS_element = MNRS(M, N, R, S)
1197 247797157 : DO i = 1, Ni_occupied
1198 182687620 : i_global = i + i_batch_start
1199 182687620 : BI1(N_global, i, R, S) = BI1(N_global, i, R, S) + C_T(i_global, M_global)*MNRS_element
1200 230755956 : BI1(M_global, i, R, S) = BI1(M_global, i, R, S) + C_T(i_global, N_global)*MNRS_element
1201 : END DO
1202 : END DO
1203 : END DO
1204 :
1205 : END DO
1206 : END DO
1207 :
1208 1010077 : END SUBROUTINE transform_occupied_orbitals_first
1209 :
1210 : ! **************************************************************************************************
1211 : !> \brief ...
1212 : !> \param dimen ...
1213 : !> \param latom ...
1214 : !> \param katom ...
1215 : !> \param lset ...
1216 : !> \param kset ...
1217 : !> \param Ssize ...
1218 : !> \param Rsize ...
1219 : !> \param Ni_occupied ...
1220 : !> \param Nj_occupied ...
1221 : !> \param j_batch_start ...
1222 : !> \param BI1 ...
1223 : !> \param C_T ...
1224 : !> \param mp2_biel ...
1225 : !> \param para_env ...
1226 : !> \param elements_ij_proc ...
1227 : !> \param multiple ...
1228 : !> \param BIb ...
1229 : ! **************************************************************************************************
1230 0 : SUBROUTINE transform_occupied_orbitals_second(dimen, latom, katom, lset, kset, &
1231 : Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1232 0 : BI1, C_T, mp2_biel, para_env, &
1233 : elements_ij_proc, &
1234 0 : multiple, BIb)
1235 :
1236 : INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, Ssize, &
1237 : Rsize, Ni_occupied, Nj_occupied, &
1238 : j_batch_start
1239 : REAL(KIND=dp), &
1240 : DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1241 : INTENT(IN) :: BI1
1242 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
1243 : TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1244 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1245 : INTEGER, INTENT(IN) :: elements_ij_proc, multiple
1246 : REAL(KIND=dp), &
1247 : DIMENSION(dimen, dimen, elements_ij_proc), &
1248 : INTENT(INOUT) :: BIb
1249 :
1250 : CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_occupied_orbitals_second'
1251 :
1252 : INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1253 : index_proc_shift, j, n, proc_receive, proc_send, r, R_global, R_offset, R_offset_rec, &
1254 : R_start, Rsize_rec, s, S_global, S_offset, S_offset_rec, Ssize_rec
1255 : INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1256 : LOGICAL :: case_send_receive
1257 : REAL(KIND=dp) :: C_T_R, C_T_S
1258 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIb_RS_mat_rec
1259 0 : REAL(KIND=dp), DIMENSION(dimen, Rsize+Ssize) :: BIb_RS_mat
1260 :
1261 0 : CALL timeset(routineN, handle)
1262 :
1263 0 : S_offset = mp2_biel%index_table(latom, lset) - 1
1264 0 : R_offset = mp2_biel%index_table(katom, kset) - 1
1265 :
1266 0 : size_parameter_send(1) = Rsize
1267 0 : size_parameter_send(2) = Ssize
1268 0 : size_parameter_send(3) = R_offset
1269 0 : size_parameter_send(4) = S_offset
1270 0 : size_parameter_send(5) = elements_ij_proc
1271 :
1272 0 : DO index_proc_shift = 0, para_env%num_pe - 1
1273 :
1274 0 : proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
1275 0 : proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
1276 :
1277 0 : case_send_receive = (proc_send /= para_env%mepos)
1278 :
1279 0 : IF (case_send_receive) THEN
1280 : ! the processor starts to send (and receive?)
1281 :
1282 0 : CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1283 :
1284 0 : Rsize_rec = size_parameter_rec(1)
1285 0 : Ssize_rec = size_parameter_rec(2)
1286 0 : R_offset_rec = size_parameter_rec(3)
1287 0 : S_offset_rec = size_parameter_rec(4)
1288 0 : elements_ij_proc_rec = size_parameter_rec(5)
1289 0 : ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec))
1290 :
1291 : ELSE
1292 0 : elements_ij_proc_rec = elements_ij_proc
1293 : END IF
1294 :
1295 0 : index_ij_send = 0
1296 0 : index_ij_rec = 0
1297 0 : DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1298 :
1299 0 : BIb_RS_mat = zero
1300 0 : IF (index_proc_ij <= Ni_occupied*Nj_occupied) THEN
1301 :
1302 0 : index_ij_send = index_ij_send + 1
1303 :
1304 0 : i = (index_proc_ij - 1)/Nj_occupied + 1
1305 0 : j = index_proc_ij - (i - 1)*Nj_occupied + j_batch_start
1306 :
1307 0 : DO S = 1, Ssize
1308 0 : S_global = S + S_offset
1309 0 : R_start = 1
1310 0 : IF (katom == latom .AND. kset == lset) R_start = S
1311 0 : DO R = R_start, Rsize
1312 0 : R_global = R + R_offset
1313 :
1314 0 : IF (R_global /= S_global) THEN
1315 0 : C_T_R = C_T(j, R_global)
1316 0 : C_T_S = C_T(j, S_global)
1317 0 : DO N = 1, dimen
1318 0 : BIb_RS_mat(N, R) = BIb_RS_mat(N, R) + C_T_S*BI1(N, i, R, S)
1319 : END DO
1320 0 : DO N = 1, dimen
1321 0 : BIb_RS_mat(N, Rsize + S) = BIb_RS_mat(N, Rsize + S) + C_T_R*BI1(N, i, R, S)
1322 : END DO
1323 : ELSE
1324 0 : C_T_S = C_T(j, S_global)
1325 0 : DO N = 1, dimen
1326 0 : BIb_RS_mat(N, R) = BIb_RS_mat(N, R) + C_T_S*BI1(N, i, R, S)
1327 : END DO
1328 : END IF
1329 :
1330 : END DO
1331 : END DO
1332 :
1333 : END IF
1334 :
1335 0 : IF (case_send_receive) THEN
1336 :
1337 0 : CALL para_env%sendrecv(BIb_RS_mat, proc_send, BIb_RS_mat_rec, proc_receive)
1338 :
1339 0 : index_ij_rec = index_ij_rec + 1
1340 0 : IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1341 :
1342 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) = &
1343 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) + &
1344 0 : BIb_RS_mat_rec(1:dimen, 1:Rsize_rec)
1345 :
1346 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) = &
1347 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) + &
1348 0 : BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec)
1349 :
1350 : END IF
1351 : ELSE
1352 : ! the processor is the sender and receiver itself
1353 0 : IF (index_ij_send <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1354 :
1355 : BIb(1:dimen, R_offset + 1:R_offset + Rsize, index_ij_send) = &
1356 0 : BIb(1:dimen, R_offset + 1:R_offset + Rsize, index_ij_send) + BIb_RS_mat(1:dimen, 1:Rsize)
1357 :
1358 : BIb(1:dimen, S_offset + 1:S_offset + Ssize, index_ij_send) = &
1359 0 : BIb(1:dimen, S_offset + 1:S_offset + Ssize, index_ij_send) + BIb_RS_mat(1:dimen, Rsize + 1:Rsize + Ssize)
1360 :
1361 : END IF
1362 : END IF
1363 :
1364 : END DO ! loop over the ij of the processor
1365 :
1366 0 : IF (case_send_receive) THEN
1367 0 : DEALLOCATE (BIb_RS_mat_rec)
1368 : END IF
1369 :
1370 : END DO ! loop over the processor starting from itself
1371 :
1372 0 : CALL timestop(handle)
1373 :
1374 0 : END SUBROUTINE transform_occupied_orbitals_second
1375 :
1376 : ! **************************************************************************************************
1377 : !> \brief ...
1378 : !> \param dimen ...
1379 : !> \param latom ...
1380 : !> \param katom ...
1381 : !> \param lset ...
1382 : !> \param kset ...
1383 : !> \param Ssize ...
1384 : !> \param Rsize ...
1385 : !> \param Ni_occupied ...
1386 : !> \param Nj_occupied ...
1387 : !> \param j_batch_start ...
1388 : !> \param ij_elem_max ...
1389 : !> \param BI1 ...
1390 : !> \param C_T ...
1391 : !> \param mp2_biel ...
1392 : !> \param para_env ...
1393 : !> \param elements_ij_proc ...
1394 : !> \param BIb ...
1395 : ! **************************************************************************************************
1396 4653 : SUBROUTINE transform_occupied_orbitals_second_big(dimen, latom, katom, lset, kset, &
1397 : Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1398 4653 : ij_elem_max, BI1, C_T, mp2_biel, para_env, &
1399 4653 : elements_ij_proc, BIb)
1400 :
1401 : INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, Ssize, &
1402 : Rsize, Ni_occupied, Nj_occupied, &
1403 : j_batch_start, ij_elem_max
1404 : REAL(KIND=dp), &
1405 : DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1406 : INTENT(IN) :: BI1
1407 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
1408 : TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1409 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1410 : INTEGER, INTENT(IN) :: elements_ij_proc
1411 : REAL(KIND=dp), &
1412 : DIMENSION(dimen, dimen, elements_ij_proc), &
1413 : INTENT(INOUT) :: BIb
1414 :
1415 : CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_occupied_orbitals_second_big'
1416 :
1417 : INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1418 : index_proc_shift, j, n, proc_receive, proc_send, r, R_global, R_offset, R_offset_rec, &
1419 : R_start, Rsize_rec, s, S_global, S_offset, S_offset_rec, Ssize_rec
1420 : INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1421 : LOGICAL :: case_send_receive
1422 : REAL(KIND=dp) :: C_T_R, C_T_S
1423 4653 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_RS_mat_rec
1424 : REAL(KIND=dp), &
1425 9306 : DIMENSION(dimen, Rsize+Ssize, ij_elem_max) :: BIb_RS_mat
1426 :
1427 4653 : CALL timeset(routineN, handle)
1428 :
1429 4653 : S_offset = mp2_biel%index_table(latom, lset) - 1
1430 4653 : R_offset = mp2_biel%index_table(katom, kset) - 1
1431 :
1432 4653 : size_parameter_send(1) = Rsize
1433 4653 : size_parameter_send(2) = Ssize
1434 4653 : size_parameter_send(3) = R_offset
1435 4653 : size_parameter_send(4) = S_offset
1436 4653 : size_parameter_send(5) = elements_ij_proc
1437 :
1438 9360 : DO index_proc_shift = 0, para_env%num_pe - 1
1439 :
1440 4707 : proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
1441 4707 : proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
1442 :
1443 4707 : case_send_receive = (proc_send /= para_env%mepos)
1444 :
1445 4707 : IF (case_send_receive) THEN
1446 : ! the processor starts to send (and receive?)
1447 :
1448 54 : CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1449 :
1450 54 : Rsize_rec = size_parameter_rec(1)
1451 54 : Ssize_rec = size_parameter_rec(2)
1452 54 : R_offset_rec = size_parameter_rec(3)
1453 54 : S_offset_rec = size_parameter_rec(4)
1454 54 : elements_ij_proc_rec = size_parameter_rec(5)
1455 270 : ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec, ij_elem_max))
1456 : ELSE
1457 4707 : elements_ij_proc_rec = elements_ij_proc
1458 : END IF
1459 :
1460 4707 : index_ij_send = 0
1461 4707 : index_ij_rec = 0
1462 30728964 : BIb_RS_mat = zero
1463 :
1464 81312 : DO index_proc_ij = proc_send + 1, Ni_occupied*Nj_occupied, para_env%num_pe
1465 :
1466 76605 : index_ij_send = index_ij_send + 1
1467 :
1468 76605 : i = (index_proc_ij - 1)/Nj_occupied + 1
1469 76605 : j = index_proc_ij - (i - 1)*Nj_occupied + j_batch_start
1470 :
1471 290238 : DO S = 1, Ssize
1472 208926 : S_global = S + S_offset
1473 208926 : R_start = 1
1474 208926 : IF (katom == latom .AND. kset == lset) R_start = S
1475 907160 : DO R = R_start, Rsize
1476 621629 : R_global = R + R_offset
1477 :
1478 830555 : IF (R_global /= S_global) THEN
1479 601864 : C_T_R = C_T(j, R_global)
1480 601864 : C_T_S = C_T(j, S_global)
1481 45769383 : DO N = 1, dimen
1482 45769383 : BIb_RS_mat(N, R, index_ij_send) = BIb_RS_mat(N, R, index_ij_send) + C_T_S*BI1(N, i, R, S)
1483 : END DO
1484 45769383 : DO N = 1, dimen
1485 45769383 : BIb_RS_mat(N, Rsize + S, index_ij_send) = BIb_RS_mat(N, Rsize + S, index_ij_send) + C_T_R*BI1(N, i, R, S)
1486 : END DO
1487 : ELSE
1488 19765 : C_T_S = C_T(j, S_global)
1489 1295658 : DO N = 1, dimen
1490 1295658 : BIb_RS_mat(N, R, index_ij_send) = BIb_RS_mat(N, R, index_ij_send) + C_T_S*BI1(N, i, R, S)
1491 : END DO
1492 : END IF
1493 :
1494 : END DO
1495 : END DO
1496 :
1497 : END DO
1498 :
1499 9360 : IF (case_send_receive) THEN
1500 :
1501 54 : CALL para_env%sendrecv(BIb_RS_mat, proc_send, BIb_RS_mat_rec, proc_receive)
1502 :
1503 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) = &
1504 : BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) + &
1505 16182 : BIb_RS_mat_rec(1:dimen, 1:Rsize_rec, 1:elements_ij_proc)
1506 :
1507 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) = &
1508 : BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) + &
1509 15006 : BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec, 1:elements_ij_proc)
1510 :
1511 54 : DEALLOCATE (BIb_RS_mat_rec)
1512 : ELSE
1513 : ! the processor is the sender and receiver itself
1514 : BIb(1:dimen, R_offset + 1:R_offset + Rsize, 1:elements_ij_proc) = &
1515 : BIb(1:dimen, R_offset + 1:R_offset + Rsize, 1:elements_ij_proc) + &
1516 16293273 : BIb_RS_mat(1:dimen, 1:Rsize, 1:elements_ij_proc)
1517 :
1518 : BIb(1:dimen, S_offset + 1:S_offset + Ssize, 1:elements_ij_proc) = &
1519 : BIb(1:dimen, S_offset + 1:S_offset + Ssize, 1:elements_ij_proc) + &
1520 14485815 : BIb_RS_mat(1:dimen, Rsize + 1:Rsize + Ssize, 1:elements_ij_proc)
1521 :
1522 : END IF
1523 :
1524 : END DO ! loop over the processor starting from itself
1525 :
1526 4653 : CALL timestop(handle)
1527 :
1528 4653 : END SUBROUTINE transform_occupied_orbitals_second_big
1529 :
1530 : ! **************************************************************************************************
1531 : !> \brief ...
1532 : !> \param dimen ...
1533 : !> \param occupied ...
1534 : !> \param virtual ...
1535 : !> \param i_batch_start ...
1536 : !> \param j_batch_start ...
1537 : !> \param BIb ...
1538 : !> \param C ...
1539 : !> \param Auto ...
1540 : !> \param elements_ij_proc ...
1541 : !> \param ij_list_proc ...
1542 : !> \param nspins ...
1543 : !> \param Emp2 ...
1544 : !> \param Emp2_Cou ...
1545 : !> \param Emp2_ex ...
1546 : ! **************************************************************************************************
1547 33 : SUBROUTINE transform_virtual_orbitals_and_accumulate(dimen, occupied, virtual, i_batch_start, &
1548 33 : j_batch_start, BIb, C, Auto, elements_ij_proc, &
1549 33 : ij_list_proc, nspins, Emp2, Emp2_Cou, Emp2_ex)
1550 :
1551 : INTEGER, INTENT(IN) :: dimen, occupied, virtual, i_batch_start, &
1552 : j_batch_start
1553 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1554 : INTENT(INOUT) :: BIb
1555 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C
1556 : REAL(KIND=dp), DIMENSION(dimen), INTENT(IN) :: Auto
1557 : INTEGER, INTENT(IN) :: elements_ij_proc
1558 : INTEGER, DIMENSION(elements_ij_proc, 2), &
1559 : INTENT(IN) :: ij_list_proc
1560 : INTEGER, INTENT(IN) :: nspins
1561 : REAL(KIND=dp), INTENT(INOUT) :: Emp2, Emp2_Cou, Emp2_ex
1562 :
1563 : CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_virtual_orbitals_and_accumulate'
1564 : REAL(KIND=dp), PARAMETER :: zero = 0.0_dp
1565 :
1566 : INTEGER :: a, a_global, b, b_global, handle, i, &
1567 : i_global, index_ij, j, j_global
1568 : REAL(KIND=dp) :: iajb, ibja, parz, two
1569 33 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIa
1570 :
1571 33 : CALL timeset(routineN, handle)
1572 :
1573 132 : ALLOCATE (BIa(dimen, virtual))
1574 :
1575 48871 : BIa = zero
1576 389 : DO index_ij = 1, elements_ij_proc
1577 :
1578 : CALL DGEMM('T', 'N', dimen, virtual, dimen, 1.0_dp, Bib(1, 1, index_ij), &
1579 356 : dimen, C(1, occupied + 1), dimen, 0.0_dp, Bia(1, 1), dimen)
1580 1100036 : Bib(1:dimen, 1:virtual, index_ij) = Bia(1:dimen, 1:virtual)
1581 :
1582 : END DO
1583 :
1584 33 : DEALLOCATE (BIa)
1585 132 : ALLOCATE (BIa(virtual, virtual))
1586 :
1587 43719 : BIa = zero
1588 389 : DO index_ij = 1, elements_ij_proc
1589 :
1590 : CALL DGEMM('T', 'N', virtual, virtual, dimen, 1.0_dp, Bib(1, 1, index_ij), dimen, C(1, occupied + 1), dimen, 0.0_dp, &
1591 356 : BIa(1, 1), virtual)
1592 980141 : BIb(1:virtual, 1:virtual, index_ij) = BIa(1:virtual, 1:virtual)
1593 :
1594 : END DO
1595 :
1596 33 : two = 2.0_dp/nspins
1597 389 : DO index_ij = 1, elements_ij_proc
1598 356 : i = ij_list_proc(index_ij, 1)
1599 356 : j = ij_list_proc(index_ij, 2)
1600 356 : i_global = i + i_batch_start
1601 356 : j_global = j + j_batch_start
1602 16600 : DO a = 1, virtual
1603 16211 : a_global = a + occupied
1604 980108 : DO b = 1, virtual
1605 963541 : b_global = b + occupied
1606 963541 : iajb = BIb(a, b, index_ij)
1607 963541 : ibja = BIb(b, a, index_ij)
1608 963541 : parz = iajb/(Auto(i_global) + Auto(j_global) - Auto(a_global) - Auto(b_global))
1609 : ! parz=parz*(two*iajb-ibja) !Full
1610 : ! parz=parz*(iajb) !Coulomb
1611 : ! parz=parz*(ibja) !Coulomb
1612 : ! Emp2=Emp2+parz/nspins
1613 963541 : Emp2_Cou = Emp2_Cou + parz*two*(iajb)/nspins
1614 963541 : Emp2_ex = Emp2_ex - parz*(ibja)/nspins
1615 979752 : Emp2 = Emp2 + parz*(two*iajb - ibja)/nspins
1616 : END DO
1617 : END DO
1618 : END DO
1619 :
1620 33 : DEALLOCATE (BIa)
1621 :
1622 33 : CALL timestop(handle)
1623 :
1624 33 : END SUBROUTINE transform_virtual_orbitals_and_accumulate
1625 :
1626 : ! **************************************************************************************************
1627 : !> \brief ...
1628 : !> \param dimen ...
1629 : !> \param occ_i ...
1630 : !> \param occ_j ...
1631 : !> \param virt_i ...
1632 : !> \param virt_j ...
1633 : !> \param i_batch_start ...
1634 : !> \param j_batch_start ...
1635 : !> \param BIb ...
1636 : !> \param C_i ...
1637 : !> \param C_j ...
1638 : !> \param Auto_i ...
1639 : !> \param Auto_j ...
1640 : !> \param elements_ij_proc ...
1641 : !> \param ij_list_proc ...
1642 : !> \param Emp2 ...
1643 : !> \param Emp2_Cou ...
1644 : ! **************************************************************************************************
1645 10 : SUBROUTINE transform_virtual_orbitals_and_accumulate_ABcase(dimen, occ_i, occ_j, virt_i, virt_j, i_batch_start, &
1646 10 : j_batch_start, BIb, C_i, C_j, Auto_i, Auto_j, elements_ij_proc, &
1647 10 : ij_list_proc, Emp2, Emp2_Cou)
1648 :
1649 : INTEGER, INTENT(IN) :: dimen, occ_i, occ_j, virt_i, virt_j, &
1650 : i_batch_start, j_batch_start
1651 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1652 : INTENT(INOUT) :: BIb
1653 : REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_i, C_j
1654 : REAL(KIND=dp), DIMENSION(dimen), INTENT(IN) :: Auto_i, Auto_j
1655 : INTEGER, INTENT(IN) :: elements_ij_proc
1656 : INTEGER, DIMENSION(elements_ij_proc, 2), &
1657 : INTENT(IN) :: ij_list_proc
1658 : REAL(KIND=dp), INTENT(INOUT) :: Emp2, Emp2_Cou
1659 :
1660 : CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_virtual_orbitals_and_accumulate_ABcase'
1661 : REAL(KIND=dp), PARAMETER :: two = 2.D+00, zero = 0.D+00
1662 :
1663 : INTEGER :: a, a_global, b, b_global, handle, i, &
1664 : i_global, index_ij, j, j_global, n, s
1665 : REAL(KIND=dp) :: iajb, parz
1666 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIa
1667 :
1668 10 : CALL timeset(routineN, handle)
1669 :
1670 40 : ALLOCATE (BIa(dimen, virt_i))
1671 :
1672 56 : DO index_ij = 1, elements_ij_proc
1673 :
1674 1236 : DO a = 1, virt_i
1675 1190 : a_global = a + occ_i
1676 51930 : DO S = 1, dimen
1677 : parz = zero
1678 2449752 : DO N = 1, dimen
1679 2449752 : parz = parz + C_i(N, a_global)*BIb(N, S, index_ij)
1680 : END DO
1681 51884 : BIa(S, a) = parz
1682 : END DO
1683 : END DO
1684 :
1685 51940 : BIb(1:dimen, 1:virt_i, index_ij) = BIa
1686 :
1687 : END DO
1688 :
1689 10 : DEALLOCATE (BIa)
1690 40 : ALLOCATE (BIa(virt_i, virt_j))
1691 :
1692 56 : DO index_ij = 1, elements_ij_proc
1693 :
1694 1236 : DO a = 1, virt_i
1695 47824 : DO b = 1, virt_j
1696 46588 : b_global = b + occ_j
1697 46588 : parz = zero
1698 2257144 : DO S = 1, dimen
1699 2257144 : parz = parz + C_j(S, b_global)*BIb(S, a, index_ij)
1700 : END DO
1701 47778 : BIa(a, b) = parz
1702 : END DO
1703 : END DO
1704 :
1705 47904 : BIb(1:virt_i, 1:virt_j, index_ij) = BIa
1706 :
1707 : END DO
1708 :
1709 56 : DO index_ij = 1, elements_ij_proc
1710 46 : i = ij_list_proc(index_ij, 1)
1711 46 : j = ij_list_proc(index_ij, 2)
1712 46 : i_global = i + i_batch_start
1713 46 : j_global = j + j_batch_start
1714 1246 : DO a = 1, virt_i
1715 1190 : a_global = a + occ_i
1716 47824 : DO b = 1, virt_j
1717 46588 : b_global = b + occ_j
1718 46588 : iajb = BIb(a, b, index_ij)
1719 46588 : parz = iajb*iajb/(Auto_i(i_global) + Auto_j(j_global) - Auto_i(a_global) - Auto_j(b_global))
1720 46588 : Emp2_Cou = Emp2_Cou + parz/two
1721 47778 : Emp2 = Emp2 + parz/two
1722 : END DO
1723 : END DO
1724 : END DO
1725 :
1726 10 : DEALLOCATE (BIa)
1727 :
1728 10 : CALL timestop(handle)
1729 :
1730 10 : END SUBROUTINE transform_virtual_orbitals_and_accumulate_ABcase
1731 :
1732 : END MODULE mp2_direct_method
|