Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
10 : !> \par History
11 : !> 10.2013 created [Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_ri_grad
14 : USE atomic_kind_types, ONLY: atomic_kind_type,&
15 : get_atomic_kind_set
16 : USE cell_types, ONLY: cell_type
17 : USE cp_blacs_env, ONLY: cp_blacs_env_type
18 : USE cp_control_types, ONLY: dft_control_type
19 : USE cp_dbcsr_api, ONLY: &
20 : dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
21 : dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
22 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
23 : dbcsr_deallocate_matrix_set
24 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_param
25 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
26 : cp_fm_struct_release,&
27 : cp_fm_struct_type
28 : USE cp_fm_types, ONLY: cp_fm_create,&
29 : cp_fm_get_info,&
30 : cp_fm_release,&
31 : cp_fm_set_all,&
32 : cp_fm_to_fm_submat,&
33 : cp_fm_type
34 : USE input_constants, ONLY: do_eri_gpw,&
35 : do_eri_mme,&
36 : ri_mp2_laplace,&
37 : ri_mp2_method_gpw,&
38 : ri_rpa_method_gpw
39 : USE kinds, ONLY: dp
40 : USE libint_2c_3c, ONLY: compare_potential_types
41 : USE message_passing, ONLY: mp_para_env_release,&
42 : mp_para_env_type,&
43 : mp_request_null,&
44 : mp_request_type,&
45 : mp_waitall
46 : USE mp2_eri, ONLY: mp2_eri_2c_integrate,&
47 : mp2_eri_3c_integrate,&
48 : mp2_eri_deallocate_forces,&
49 : mp2_eri_force
50 : USE mp2_eri_gpw, ONLY: cleanup_gpw,&
51 : integrate_potential_forces_2c,&
52 : integrate_potential_forces_3c_1c,&
53 : integrate_potential_forces_3c_2c,&
54 : prepare_gpw
55 : USE mp2_types, ONLY: integ_mat_buffer_type,&
56 : integ_mat_buffer_type_2D,&
57 : mp2_type
58 : USE parallel_gemm_api, ONLY: parallel_gemm
59 : USE particle_types, ONLY: particle_type
60 : USE pw_env_types, ONLY: pw_env_type
61 : USE pw_poisson_types, ONLY: pw_poisson_type
62 : USE pw_pool_types, ONLY: pw_pool_type
63 : USE pw_types, ONLY: pw_c1d_gs_type,&
64 : pw_r3d_rs_type
65 : USE qs_environment_types, ONLY: get_qs_env,&
66 : qs_environment_type
67 : USE qs_force_types, ONLY: allocate_qs_force,&
68 : qs_force_type,&
69 : zero_qs_force
70 : USE qs_kind_types, ONLY: qs_kind_type
71 : USE qs_ks_types, ONLY: qs_ks_env_type
72 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
73 : USE task_list_types, ONLY: task_list_type
74 : USE util, ONLY: get_limit
75 : USE virial_types, ONLY: virial_type
76 : #include "./base/base_uses.f90"
77 :
78 : IMPLICIT NONE
79 :
80 : PRIVATE
81 :
82 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
83 :
84 : PUBLIC :: calc_ri_mp2_nonsep
85 :
86 : CONTAINS
87 :
88 : ! **************************************************************************************************
89 : !> \brief Calculate the non-separable part of the gradients and update the
90 : !> Lagrangian
91 : !> \param qs_env ...
92 : !> \param mp2_env ...
93 : !> \param para_env ...
94 : !> \param para_env_sub ...
95 : !> \param cell ...
96 : !> \param particle_set ...
97 : !> \param atomic_kind_set ...
98 : !> \param qs_kind_set ...
99 : !> \param mo_coeff ...
100 : !> \param nmo ...
101 : !> \param homo ...
102 : !> \param dimen_RI ...
103 : !> \param Eigenval ...
104 : !> \param my_group_L_start ...
105 : !> \param my_group_L_end ...
106 : !> \param my_group_L_size ...
107 : !> \param sab_orb_sub ...
108 : !> \param mat_munu ...
109 : !> \param blacs_env_sub ...
110 : !> \author Mauro Del Ben
111 : ! **************************************************************************************************
112 264 : SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
113 264 : atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, &
114 : my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
115 : blacs_env_sub)
116 : TYPE(qs_environment_type), POINTER :: qs_env
117 : TYPE(mp2_type) :: mp2_env
118 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
119 : TYPE(cell_type), POINTER :: cell
120 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
121 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
122 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
123 : TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: mo_coeff
124 : INTEGER, INTENT(IN) :: nmo
125 : INTEGER, DIMENSION(:), INTENT(IN) :: homo
126 : INTEGER, INTENT(IN) :: dimen_RI
127 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: Eigenval
128 : INTEGER, INTENT(IN) :: my_group_L_start, my_group_L_end, &
129 : my_group_L_size
130 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
131 : POINTER :: sab_orb_sub
132 : TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu
133 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
134 :
135 : CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
136 :
137 : INTEGER :: dimen, eri_method, handle, handle2, i, &
138 : ikind, ispin, itmp(2), L_counter, LLL, &
139 : my_P_end, my_P_size, my_P_start, nspins
140 264 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, natom_of_kind, &
141 264 : virtual
142 : LOGICAL :: alpha_beta, use_virial
143 : REAL(KIND=dp) :: cutoff_old, eps_filter, factor, &
144 : factor_2c, relative_cutoff_old
145 264 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old
146 264 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: G_PQ_local, G_PQ_local_2
147 : REAL(KIND=dp), DIMENSION(3, 3) :: h_stress, pv_virial
148 264 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: I_tmp2
149 : TYPE(cp_eri_mme_param), POINTER :: eri_param
150 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
151 264 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: L1_mu_i, L2_nu_a
152 : TYPE(dbcsr_p_type) :: matrix_P_munu
153 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:) :: mo_coeff_o, mo_coeff_v
154 264 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :) :: G_P_ia
155 264 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_munu_local, matrix_P_munu_local
156 : TYPE(dbcsr_type) :: matrix_P_munu_nosym
157 264 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
158 : TYPE(dft_control_type), POINTER :: dft_control
159 264 : TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:) :: force_2c, force_2c_RI, force_3c_aux, &
160 264 : force_3c_orb_mu, force_3c_orb_nu
161 1056 : TYPE(pw_c1d_gs_type) :: dvg(3), pot_g, rho_g, rho_g_copy
162 : TYPE(pw_env_type), POINTER :: pw_env_sub
163 : TYPE(pw_poisson_type), POINTER :: poisson_env
164 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
165 : TYPE(pw_r3d_rs_type) :: psi_L, rho_r
166 264 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force, mp2_force
167 : TYPE(qs_ks_env_type), POINTER :: ks_env
168 : TYPE(task_list_type), POINTER :: task_list_sub
169 : TYPE(virial_type), POINTER :: virial
170 :
171 264 : CALL timeset(routineN, handle)
172 :
173 264 : eri_method = mp2_env%eri_method
174 264 : eri_param => mp2_env%eri_mme_param
175 :
176 : ! Find out whether we have a closed or open shell
177 264 : nspins = SIZE(homo)
178 264 : alpha_beta = (nspins == 2)
179 :
180 264 : dimen = nmo
181 792 : ALLOCATE (virtual(nspins))
182 614 : virtual(:) = dimen - homo(:)
183 264 : eps_filter = mp2_env%mp2_gpw%eps_filter
184 29636 : ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
185 614 : DO ispin = 1, nspins
186 350 : mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
187 350 : mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
188 16122 : DO LLL = 1, my_group_L_size
189 15858 : G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
190 : END DO
191 : END DO
192 264 : DEALLOCATE (mp2_env%ri_grad%G_P_ia)
193 :
194 264 : itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
195 264 : my_P_start = itmp(1)
196 264 : my_P_end = itmp(2)
197 264 : my_P_size = itmp(2) - itmp(1) + 1
198 :
199 1056 : ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
200 997646 : G_PQ_local = 0.0_dp
201 944212 : G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
202 264 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
203 944212 : G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
204 264 : CALL para_env_sub%sum(G_PQ_local)
205 264 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
206 36 : ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
207 41844 : G_PQ_local_2 = 0.0_dp
208 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
209 12 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
210 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
211 12 : CALL para_env_sub%sum(G_PQ_local_2)
212 : END IF
213 :
214 : ! create matrix holding the back transformation (G_P_inu)
215 1142 : ALLOCATE (matrix_P_inu(nspins))
216 614 : DO ispin = 1, nspins
217 614 : CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
218 : END DO
219 :
220 : ! non symmetric matrix
221 : CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
222 264 : matrix_type=dbcsr_type_no_symmetry)
223 :
224 : ! create Lagrangian matrices in mixed AO/MO formalism
225 878 : ALLOCATE (Lag_mu_i_1(nspins))
226 614 : DO ispin = 1, nspins
227 350 : CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
228 614 : CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
229 : END DO
230 :
231 878 : ALLOCATE (Lag_nu_a_2(nspins))
232 614 : DO ispin = 1, nspins
233 350 : CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
234 614 : CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
235 : END DO
236 :
237 : ! get forces
238 264 : NULLIFY (force, virial)
239 264 : CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
240 :
241 : ! check if we want to calculate the virial
242 264 : use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
243 :
244 264 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
245 264 : NULLIFY (mp2_force)
246 264 : CALL allocate_qs_force(mp2_force, natom_of_kind)
247 264 : DEALLOCATE (natom_of_kind)
248 264 : CALL zero_qs_force(mp2_force)
249 264 : mp2_env%ri_grad%mp2_force => mp2_force
250 :
251 264 : factor_2c = -4.0_dp
252 264 : IF (mp2_env%method == ri_rpa_method_gpw) THEN
253 20 : factor_2c = -1.0_dp
254 20 : IF (alpha_beta) factor_2c = -2.0_dp
255 : END IF
256 :
257 : ! prepare integral derivatives with mme method
258 264 : IF (eri_method .EQ. do_eri_mme) THEN
259 1184 : ALLOCATE (matrix_P_munu_local(my_group_L_size))
260 1158 : ALLOCATE (mat_munu_local(my_group_L_size))
261 26 : L_counter = 0
262 1132 : DO LLL = my_group_L_start, my_group_L_end
263 1106 : L_counter = L_counter + 1
264 1106 : ALLOCATE (mat_munu_local(L_counter)%matrix)
265 : CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
266 1106 : matrix_type=dbcsr_type_symmetric)
267 1106 : CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
268 1106 : CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
269 :
270 : CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
271 : G_P_ia(:, L_counter), matrix_P_inu, &
272 1132 : mo_coeff_v, mo_coeff_o, eps_filter)
273 : END DO
274 :
275 78 : ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
276 95790 : I_tmp2(:, :) = 0.0_dp
277 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
278 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
279 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
280 26 : eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
281 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
282 0 : I_tmp2(:, :) = 0.0_dp
283 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
284 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
285 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
286 0 : eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
287 : END IF
288 26 : DEALLOCATE (I_tmp2)
289 :
290 : CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
291 : first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
292 : basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
293 : sab_nl=sab_orb_sub, eri_method=eri_method, &
294 : pabc=matrix_P_munu_local, &
295 26 : force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
296 :
297 26 : L_counter = 0
298 1132 : DO LLL = my_group_L_start, my_group_L_end
299 1106 : L_counter = L_counter + 1
300 2432 : DO ispin = 1, nspins
301 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
302 2432 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
303 : END DO
304 :
305 : ! The matrices of G_P_ia are deallocated here
306 : CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
307 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
308 1132 : eps_filter)
309 : END DO
310 :
311 72 : DO ikind = 1, SIZE(force)
312 : mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
313 : force_3c_orb_mu(ikind)%forces(:, :) + &
314 : force_3c_orb_nu(ikind)%forces(:, :) + &
315 334 : force_3c_aux(ikind)%forces(:, :)
316 :
317 72 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
318 0 : mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
319 : END IF
320 : END DO
321 :
322 26 : CALL mp2_eri_deallocate_forces(force_2c)
323 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
324 0 : CALL mp2_eri_deallocate_forces(force_2c_RI)
325 : END IF
326 26 : CALL mp2_eri_deallocate_forces(force_3c_aux)
327 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
328 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
329 26 : CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
330 26 : CALL dbcsr_deallocate_matrix_set(mat_munu_local)
331 :
332 238 : ELSEIF (eri_method == do_eri_gpw) THEN
333 238 : CALL get_qs_env(qs_env, ks_env=ks_env)
334 :
335 238 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
336 :
337 : ! Supporting stuff for GPW
338 : CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
339 238 : auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
340 :
341 : ! in case virial is required we need auxiliary pw
342 : ! for calculate the MP2-volume contribution to the virial
343 : ! (hartree potential derivatives)
344 238 : IF (use_virial) THEN
345 50 : CALL auxbas_pw_pool%create_pw(rho_g_copy)
346 200 : DO i = 1, 3
347 200 : CALL auxbas_pw_pool%create_pw(dvg(i))
348 : END DO
349 : END IF
350 :
351 : ! start main loop over auxiliary basis functions
352 238 : CALL timeset(routineN//"_loop", handle2)
353 :
354 238 : IF (use_virial) h_stress = 0.0_dp
355 :
356 238 : L_counter = 0
357 10712 : DO LLL = my_group_L_start, my_group_L_end
358 10474 : L_counter = L_counter + 1
359 :
360 : CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
361 : G_P_ia(:, L_counter), matrix_P_inu, &
362 10474 : mo_coeff_v, mo_coeff_o, eps_filter)
363 :
364 : CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
365 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
366 : pot_g, mp2_env%potential_parameter, use_virial, &
367 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
368 10474 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
369 :
370 10474 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
371 :
372 : CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
373 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
374 : pot_g, mp2_env%ri_metric, use_virial, &
375 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
376 498 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
377 : END IF
378 :
379 36382 : IF (use_virial) pv_virial = virial%pv_virial
380 : CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
381 10474 : task_list_sub)
382 10474 : IF (use_virial) THEN
383 28067 : h_stress = h_stress + (virial%pv_virial - pv_virial)
384 28067 : virial%pv_virial = pv_virial
385 : END IF
386 :
387 : ! The matrices of G_P_ia are deallocated here
388 : CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
389 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
390 10474 : eps_filter)
391 :
392 : CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
393 : mp2_env%ri_metric, &
394 : ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
395 : h_stress, para_env_sub, kind_of, atom_of_kind, &
396 10712 : qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
397 : END DO
398 :
399 238 : CALL timestop(handle2)
400 :
401 238 : DEALLOCATE (kind_of)
402 238 : DEALLOCATE (atom_of_kind)
403 :
404 238 : IF (use_virial) THEN
405 50 : CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
406 200 : DO i = 1, 3
407 200 : CALL auxbas_pw_pool%give_back_pw(dvg(i))
408 : END DO
409 : END IF
410 :
411 : CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
412 238 : task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
413 :
414 238 : CALL dbcsr_release(matrix_P_munu%matrix)
415 238 : DEALLOCATE (matrix_P_munu%matrix)
416 :
417 : END IF
418 :
419 864 : IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
420 :
421 264 : DEALLOCATE (G_PQ_local)
422 264 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
423 :
424 264 : CALL dbcsr_release(matrix_P_munu_nosym)
425 :
426 614 : DO ispin = 1, nspins
427 614 : CALL dbcsr_release(matrix_P_inu(ispin))
428 : END DO
429 264 : DEALLOCATE (matrix_P_inu, G_P_ia)
430 :
431 : ! move the forces in the correct place
432 264 : IF (eri_method .EQ. do_eri_gpw) THEN
433 704 : DO ikind = 1, SIZE(mp2_force)
434 6338 : mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
435 3640 : force(ikind)%rho_elec(:, :) = 0.0_dp
436 : END DO
437 : END IF
438 :
439 : ! Now we move from the local matrices to the global ones
440 : ! defined over all MPI tasks
441 : ! Start with moving from the DBCSR to FM for the lagrangians
442 :
443 1756 : ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
444 614 : DO ispin = 1, nspins
445 : ! Now we move from the local matrices to the global ones
446 : ! defined over all MPI tasks
447 : ! Start with moving from the DBCSR to FM for the lagrangians
448 350 : NULLIFY (fm_struct_tmp)
449 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
450 350 : nrow_global=dimen, ncol_global=homo(ispin))
451 350 : CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
452 350 : CALL cp_fm_struct_release(fm_struct_tmp)
453 350 : CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
454 350 : CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
455 :
456 : ! release Lag_mu_i_1
457 350 : CALL dbcsr_release(Lag_mu_i_1(ispin))
458 :
459 350 : NULLIFY (fm_struct_tmp)
460 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
461 350 : nrow_global=dimen, ncol_global=virtual(ispin))
462 350 : CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
463 350 : CALL cp_fm_struct_release(fm_struct_tmp)
464 350 : CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
465 350 : CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
466 :
467 : ! release Lag_nu_a_2
468 614 : CALL dbcsr_release(Lag_nu_a_2(ispin))
469 : END DO
470 264 : DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
471 :
472 : ! Set the factor to multiply P_ij (depends on the open or closed shell)
473 264 : factor = 1.0_dp
474 264 : IF (alpha_beta) factor = 0.50_dp
475 :
476 614 : DO ispin = 1, nspins
477 : CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), dimen, para_env, &
478 : para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
479 614 : factor, ispin)
480 : END DO
481 264 : DEALLOCATE (L1_mu_i, L2_nu_a)
482 :
483 264 : CALL timestop(handle)
484 :
485 528 : END SUBROUTINE calc_ri_mp2_nonsep
486 :
487 : ! **************************************************************************************************
488 : !> \brief Transforms G_P_ia to G_P_munu
489 : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
490 : !> \param G_P_munu_nosym ...
491 : !> \param mat_munu ...
492 : !> \param G_P_ia ...
493 : !> \param G_P_inu ...
494 : !> \param mo_coeff_v ...
495 : !> \param mo_coeff_o ...
496 : !> \param eps_filter ...
497 : ! **************************************************************************************************
498 11580 : SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
499 11580 : mo_coeff_v, mo_coeff_o, eps_filter)
500 : TYPE(dbcsr_type), POINTER :: G_P_munu
501 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu_nosym, mat_munu
502 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
503 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
504 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
505 : REAL(KIND=dp), INTENT(IN) :: eps_filter
506 :
507 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
508 :
509 : INTEGER :: handle
510 :
511 11580 : IF (.NOT. ASSOCIATED(G_P_munu)) THEN
512 1344 : ALLOCATE (G_P_munu)
513 : CALL dbcsr_create(G_P_munu, template=mat_munu, &
514 1344 : matrix_type=dbcsr_type_symmetric)
515 : END IF
516 :
517 11580 : CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
518 :
519 : ! symmetrize
520 11580 : CALL timeset(routineN//"_symmetrize", handle)
521 11580 : CALL dbcsr_set(G_P_munu, 0.0_dp)
522 11580 : CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
523 : CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
524 11580 : alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
525 : ! this is a trick to avoid that integrate_v_rspace starts to cry
526 11580 : CALL dbcsr_copy(mat_munu, G_P_munu, keep_sparsity=.TRUE.)
527 11580 : CALL dbcsr_copy(G_P_munu, mat_munu)
528 :
529 11580 : CALL timestop(handle)
530 :
531 11580 : END SUBROUTINE G_P_transform_MO_to_AO
532 :
533 : ! **************************************************************************************************
534 : !> \brief ...
535 : !> \param G_P_ia ...
536 : !> \param G_P_inu ...
537 : !> \param G_P_munu ...
538 : !> \param mo_coeff_v ...
539 : !> \param mo_coeff_o ...
540 : !> \param eps_filter ...
541 : ! **************************************************************************************************
542 11580 : SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
543 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
544 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
545 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu
546 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
547 : REAL(KIND=dp), INTENT(IN) :: eps_filter
548 :
549 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
550 :
551 : INTEGER :: handle, ispin
552 : REAL(KIND=dp) :: factor
553 :
554 11580 : CALL timeset(routineN, handle)
555 :
556 11580 : factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
557 :
558 11580 : CALL dbcsr_set(G_P_munu, 0.0_dp)
559 :
560 27088 : DO ispin = 1, SIZE(G_P_ia)
561 : ! first back-transformation a->nu
562 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
563 15508 : 0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
564 :
565 : ! second back-transformation i->mu
566 : CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
567 27088 : 1.0_dp, G_P_munu, filter_eps=eps_filter)
568 : END DO
569 :
570 11580 : CALL timestop(handle)
571 :
572 11580 : END SUBROUTINE G_P_transform_alpha_beta
573 :
574 : ! **************************************************************************************************
575 : !> \brief ...
576 : !> \param mat_munu ...
577 : !> \param matrix_P_inu ...
578 : !> \param Lag_mu_i_1 ...
579 : !> \param G_P_ia ...
580 : !> \param mo_coeff_o ...
581 : !> \param Lag_nu_a_2 ...
582 : !> \param eps_filter ...
583 : ! **************************************************************************************************
584 11580 : SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
585 23160 : G_P_ia, mo_coeff_o, Lag_nu_a_2, &
586 : eps_filter)
587 : TYPE(dbcsr_type), INTENT(IN) :: mat_munu
588 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: matrix_P_inu, Lag_mu_i_1
589 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: G_P_ia
590 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_o
591 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: Lag_nu_a_2
592 : REAL(KIND=dp), INTENT(IN) :: eps_filter
593 :
594 : CHARACTER(LEN=*), PARAMETER :: routineN = 'update_lagrangian'
595 :
596 : INTEGER :: handle, ispin
597 :
598 : ! update lagrangian
599 11580 : CALL timeset(routineN, handle)
600 :
601 27088 : DO ispin = 1, SIZE(G_P_ia)
602 : ! first contract mat_munu with the half back transformed Gamma_i_nu
603 : ! in order to update Lag_mu_i_1
604 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
605 15508 : 1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
606 :
607 : ! transform first index of mat_munu and store the result into matrix_P_inu
608 15508 : CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
609 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
610 15508 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
611 :
612 : ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
613 : ! in order to update Lag_nu_a_2
614 : CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
615 15508 : 1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
616 :
617 : ! release the actual gamma_P_ia
618 15508 : CALL dbcsr_release(G_P_ia(ispin)%matrix)
619 27088 : DEALLOCATE (G_P_ia(ispin)%matrix)
620 : END DO
621 :
622 11580 : CALL timestop(handle)
623 :
624 11580 : END SUBROUTINE update_lagrangian
625 :
626 : ! **************************************************************************************************
627 : !> \brief ...
628 : !> \param qs_env ...
629 : !> \param mp2_env ...
630 : !> \param mo_coeff ...
631 : !> \param homo ...
632 : !> \param virtual ...
633 : !> \param dimen ...
634 : !> \param para_env ...
635 : !> \param para_env_sub ...
636 : !> \param Eigenval ...
637 : !> \param L1_mu_i ...
638 : !> \param L2_nu_a ...
639 : !> \param factor ...
640 : !> \param kspin ...
641 : ! **************************************************************************************************
642 350 : SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, &
643 350 : Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
644 : TYPE(qs_environment_type), POINTER :: qs_env
645 : TYPE(mp2_type) :: mp2_env
646 : TYPE(cp_fm_type), INTENT(IN) :: mo_coeff
647 : INTEGER, INTENT(IN) :: homo, virtual, dimen
648 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
649 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval
650 : TYPE(cp_fm_type), INTENT(INOUT) :: L1_mu_i, L2_nu_a
651 : REAL(KIND=dp), INTENT(IN) :: factor
652 : INTEGER, INTENT(IN) :: kspin
653 :
654 : CHARACTER(LEN=*), PARAMETER :: routineN = 'create_W_P'
655 :
656 : INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
657 : iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
658 : my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_local, ncol_local_1i, &
659 : ncol_local_2a, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, number_of_rec, &
660 : number_of_send, proc_receive, proc_receive_static, proc_send, proc_send_ex, &
661 : proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, rec_row_size, &
662 : send_col_size, send_counter, send_pcol, send_prow, send_row_size, size_rec_buffer, &
663 : size_send_buffer
664 350 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size, &
665 350 : pos_info, pos_info_ex, proc_2_send_pos
666 350 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
667 350 : my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
668 350 : sizes_2a
669 350 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: col_indeces_info_1i, &
670 350 : col_indeces_info_2a, &
671 350 : row_indeces_info_1i, &
672 350 : row_indeces_info_2a
673 350 : INTEGER, DIMENSION(:), POINTER :: col_indices, col_indices_1i, &
674 350 : col_indices_2a, row_indices, &
675 350 : row_indices_1i, row_indices_2a
676 350 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ab_rec, ab_send, mat_rec, mat_send
677 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
678 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
679 : TYPE(cp_fm_type) :: fm_P_ij, L_mu_q
680 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
681 350 : DIMENSION(:) :: buffer_rec, buffer_send
682 : TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
683 350 : DIMENSION(:) :: buffer_cyclic
684 : TYPE(mp_para_env_type), POINTER :: para_env_exchange
685 350 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
686 :
687 350 : CALL timeset(routineN, handle)
688 :
689 : ! create the globally distributed mixed lagrangian
690 350 : NULLIFY (blacs_env)
691 350 : CALL get_qs_env(qs_env, blacs_env=blacs_env)
692 :
693 350 : NULLIFY (fm_struct_tmp)
694 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
695 350 : nrow_global=dimen, ncol_global=dimen)
696 350 : CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
697 350 : CALL cp_fm_struct_release(fm_struct_tmp)
698 350 : CALL cp_fm_set_all(L_mu_q, 0.0_dp)
699 :
700 : ! create all information array
701 1050 : ALLOCATE (pos_info(0:para_env%num_pe - 1))
702 350 : CALL para_env%allgather(para_env_sub%mepos, pos_info)
703 :
704 : ! get matrix information for the global
705 : CALL cp_fm_get_info(matrix=L_mu_q, &
706 : nrow_local=nrow_local, &
707 : ncol_local=ncol_local, &
708 : row_indices=row_indices, &
709 350 : col_indices=col_indices)
710 350 : myprow = L_mu_q%matrix_struct%context%mepos(1)
711 350 : mypcol = L_mu_q%matrix_struct%context%mepos(2)
712 350 : nprow = L_mu_q%matrix_struct%context%num_pe(1)
713 350 : npcol = L_mu_q%matrix_struct%context%num_pe(2)
714 :
715 1400 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
716 1400 : grid_2_mepos = 0
717 350 : grid_2_mepos(myprow, mypcol) = para_env%mepos
718 350 : CALL para_env%sum(grid_2_mepos)
719 :
720 : ! get matrix information for L1_mu_i
721 : CALL cp_fm_get_info(matrix=L1_mu_i, &
722 : nrow_local=nrow_local_1i, &
723 : ncol_local=ncol_local_1i, &
724 : row_indices=row_indices_1i, &
725 350 : col_indices=col_indices_1i)
726 :
727 1050 : ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
728 1050 : CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
729 :
730 : ! get matrix information for L2_nu_a
731 : CALL cp_fm_get_info(matrix=L2_nu_a, &
732 : nrow_local=nrow_local_2a, &
733 : ncol_local=ncol_local_2a, &
734 : row_indices=row_indices_2a, &
735 350 : col_indices=col_indices_2a)
736 :
737 1050 : ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
738 1050 : CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
739 :
740 : ! Here we perform a ring communication scheme taking into account
741 : ! for the sub-group distribution of the source matrices.
742 : ! as a first step we need to redistribute the data within
743 : ! the subgroup.
744 : ! In order to do so we have to allocate the structure
745 : ! that will hold the local data involved in communication, this
746 : ! structure will be the same for processes in different subgroups
747 : ! sharing the same position in the subgroup.
748 : ! -1) create the exchange para_env
749 350 : color_exchange = para_env_sub%mepos
750 350 : ALLOCATE (para_env_exchange)
751 350 : CALL para_env_exchange%from_split(para_env, color_exchange)
752 1050 : ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
753 350 : CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
754 1050 : ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
755 1050 : CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
756 :
757 : ! 0) store some info about indeces of the fm matrices (subgroup)
758 350 : CALL timeset(routineN//"_inx", handle2)
759 : ! matrix L1_mu_i
760 716 : max_row_size = MAXVAL(sizes_1i(1, :))
761 716 : max_col_size = MAXVAL(sizes_1i(2, :))
762 1400 : ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
763 1400 : ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
764 1050 : ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
765 1050 : ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
766 20558 : row_indeces_info_1i = 0
767 4742 : col_indeces_info_1i = 0
768 350 : dummy_proc = 0
769 : ! row
770 6762 : DO iiB = 1, nrow_local_1i
771 6412 : i_global = row_indices_1i(iiB)
772 6412 : send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
773 6412 : i_local = L_mu_q%matrix_struct%g2l_row(i_global)
774 6412 : my_row_indeces_info_1i(1, iiB) = send_prow
775 6762 : my_row_indeces_info_1i(2, iiB) = i_local
776 : END DO
777 : ! col
778 1620 : DO jjB = 1, ncol_local_1i
779 1270 : j_global = col_indices_1i(jjB)
780 1270 : send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
781 1270 : j_local = L_mu_q%matrix_struct%g2l_col(j_global)
782 1270 : my_col_indeces_info_1i(1, jjB) = send_pcol
783 1620 : my_col_indeces_info_1i(2, jjB) = j_local
784 : END DO
785 350 : CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
786 350 : CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
787 350 : DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
788 :
789 : ! matrix L2_nu_a
790 716 : max_row_size = MAXVAL(sizes_2a(1, :))
791 716 : max_col_size = MAXVAL(sizes_2a(2, :))
792 1400 : ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
793 1400 : ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
794 1050 : ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
795 1050 : ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
796 20558 : row_indeces_info_2a = 0
797 17636 : col_indeces_info_2a = 0
798 : ! row
799 6762 : DO iiB = 1, nrow_local_2a
800 6412 : i_global = row_indices_2a(iiB)
801 6412 : send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
802 6412 : i_local = L_mu_q%matrix_struct%g2l_row(i_global)
803 6412 : my_row_indeces_info_2a(1, iiB) = send_prow
804 6762 : my_row_indeces_info_2a(2, iiB) = i_local
805 : END DO
806 : ! col
807 5682 : DO jjB = 1, ncol_local_2a
808 5332 : j_global = col_indices_2a(jjB) + homo
809 5332 : send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
810 5332 : j_local = L_mu_q%matrix_struct%g2l_col(j_global)
811 5332 : my_col_indeces_info_2a(1, jjB) = send_pcol
812 5682 : my_col_indeces_info_2a(2, jjB) = j_local
813 : END DO
814 350 : CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
815 350 : CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
816 350 : DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
817 350 : CALL timestop(handle2)
818 :
819 : ! 1) define the map for sending data in the subgroup starting with L1_mu_i
820 350 : CALL timeset(routineN//"_subinfo", handle2)
821 1050 : ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
822 716 : map_send_size = 0
823 1620 : DO jjB = 1, ncol_local_1i
824 1270 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
825 25460 : DO iiB = 1, nrow_local_1i
826 23840 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
827 23840 : proc_send = grid_2_mepos(send_prow, send_pcol)
828 23840 : proc_send_sub = pos_info(proc_send)
829 25110 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
830 : END DO
831 : END DO
832 : ! and the same for L2_nu_a
833 5682 : DO jjB = 1, ncol_local_2a
834 5332 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
835 123674 : DO iiB = 1, nrow_local_2a
836 117992 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
837 117992 : proc_send = grid_2_mepos(send_prow, send_pcol)
838 117992 : proc_send_sub = pos_info(proc_send)
839 123324 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
840 : END DO
841 : END DO
842 : ! and exchange data in order to create map_rec_size
843 1050 : ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
844 716 : map_rec_size = 0
845 350 : CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
846 350 : CALL timestop(handle2)
847 :
848 : ! 2) reorder data in sending buffer
849 350 : CALL timeset(routineN//"_sub_Bsend", handle2)
850 : ! count the number of messages (include myself)
851 350 : number_of_send = 0
852 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
853 366 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
854 716 : IF (map_send_size(proc_send) > 0) THEN
855 350 : number_of_send = number_of_send + 1
856 : END IF
857 : END DO
858 : ! allocate the structure that will hold the messages to be sent
859 1400 : ALLOCATE (buffer_send(number_of_send))
860 350 : send_counter = 0
861 1050 : ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
862 716 : proc_2_send_pos = 0
863 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
864 366 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
865 366 : size_send_buffer = map_send_size(proc_send)
866 716 : IF (map_send_size(proc_send) > 0) THEN
867 350 : send_counter = send_counter + 1
868 : ! allocate the sending buffer (msg)
869 1050 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
870 142182 : buffer_send(send_counter)%msg = 0.0_dp
871 350 : buffer_send(send_counter)%proc = proc_send
872 350 : proc_2_send_pos(proc_send) = send_counter
873 : END IF
874 : END DO
875 : ! loop over the locally held data and fill the buffer_send
876 : ! for doing that we need an array that keep track if the
877 : ! sequential increase of the index for each message
878 1050 : ALLOCATE (iii_vet(number_of_send))
879 700 : iii_vet = 0
880 1620 : DO jjB = 1, ncol_local_1i
881 1270 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
882 25460 : DO iiB = 1, nrow_local_1i
883 23840 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
884 23840 : proc_send = grid_2_mepos(send_prow, send_pcol)
885 23840 : proc_send_sub = pos_info(proc_send)
886 23840 : send_counter = proc_2_send_pos(proc_send_sub)
887 23840 : iii_vet(send_counter) = iii_vet(send_counter) + 1
888 23840 : iii = iii_vet(send_counter)
889 25110 : buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
890 : END DO
891 : END DO
892 : ! release the local data of L1_mu_i
893 350 : DEALLOCATE (L1_mu_i%local_data)
894 : ! and the same for L2_nu_a
895 5682 : DO jjB = 1, ncol_local_2a
896 5332 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
897 123674 : DO iiB = 1, nrow_local_2a
898 117992 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
899 117992 : proc_send = grid_2_mepos(send_prow, send_pcol)
900 117992 : proc_send_sub = pos_info(proc_send)
901 117992 : send_counter = proc_2_send_pos(proc_send_sub)
902 117992 : iii_vet(send_counter) = iii_vet(send_counter) + 1
903 117992 : iii = iii_vet(send_counter)
904 123324 : buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
905 : END DO
906 : END DO
907 350 : DEALLOCATE (L2_nu_a%local_data)
908 350 : DEALLOCATE (proc_2_send_pos)
909 350 : DEALLOCATE (iii_vet)
910 350 : CALL timestop(handle2)
911 :
912 : ! 3) create the buffer for receive, post the message with irecv
913 : ! and send the messages non-blocking
914 350 : CALL timeset(routineN//"_sub_isendrecv", handle2)
915 : ! count the number of messages to be received
916 350 : number_of_rec = 0
917 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
918 366 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
919 716 : IF (map_rec_size(proc_receive) > 0) THEN
920 350 : number_of_rec = number_of_rec + 1
921 : END IF
922 : END DO
923 1400 : ALLOCATE (buffer_rec(number_of_rec))
924 350 : rec_counter = 0
925 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
926 366 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
927 366 : size_rec_buffer = map_rec_size(proc_receive)
928 716 : IF (map_rec_size(proc_receive) > 0) THEN
929 350 : rec_counter = rec_counter + 1
930 : ! prepare the buffer for receive
931 1050 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
932 142182 : buffer_rec(rec_counter)%msg = 0.0_dp
933 350 : buffer_rec(rec_counter)%proc = proc_receive
934 : ! post the message to be received (not need to send to myself)
935 350 : IF (proc_receive /= para_env_sub%mepos) THEN
936 : CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
937 0 : buffer_rec(rec_counter)%msg_req)
938 : END IF
939 : END IF
940 : END DO
941 : ! send messages
942 1400 : ALLOCATE (req_send(number_of_send))
943 700 : req_send = mp_request_null
944 350 : send_counter = 0
945 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
946 366 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
947 716 : IF (map_send_size(proc_send) > 0) THEN
948 350 : send_counter = send_counter + 1
949 350 : IF (proc_send == para_env_sub%mepos) THEN
950 142182 : buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
951 : ELSE
952 : CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
953 0 : buffer_send(send_counter)%msg_req)
954 0 : req_send(send_counter) = buffer_send(send_counter)%msg_req
955 : END IF
956 : END IF
957 : END DO
958 350 : DEALLOCATE (map_send_size)
959 350 : CALL timestop(handle2)
960 :
961 : ! 4) (if memory is a problem we should move this part after point 5)
962 : ! Here we create the new buffer for cyclic(ring) communication and
963 : ! we fill it with the data received from the other member of the
964 : ! subgroup
965 350 : CALL timeset(routineN//"_Bcyclic", handle2)
966 : ! first allocata new structure
967 1734 : ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
968 1034 : DO iproc = 0, para_env_exchange%num_pe - 1
969 684 : rec_row_size = sizes(1, iproc)
970 684 : rec_col_size = sizes(2, iproc)
971 2736 : ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
972 155690 : buffer_cyclic(iproc)%msg = 0.0_dp
973 : END DO
974 : ! now collect data from other member of the subgroup and fill
975 : ! buffer_cyclic
976 350 : rec_counter = 0
977 716 : DO proc_shift = 0, para_env_sub%num_pe - 1
978 366 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
979 366 : size_rec_buffer = map_rec_size(proc_receive)
980 716 : IF (map_rec_size(proc_receive) > 0) THEN
981 350 : rec_counter = rec_counter + 1
982 :
983 : ! wait for the message
984 350 : IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
985 :
986 350 : CALL timeset(routineN//"_fill", handle3)
987 350 : iii = 0
988 1620 : DO jjB = 1, sizes_1i(2, proc_receive)
989 1270 : send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
990 1270 : j_local = col_indeces_info_1i(2, jjB, proc_receive)
991 25460 : DO iiB = 1, sizes_1i(1, proc_receive)
992 23840 : send_prow = row_indeces_info_1i(1, iiB, proc_receive)
993 23840 : proc_send = grid_2_mepos(send_prow, send_pcol)
994 23840 : proc_send_sub = pos_info(proc_send)
995 23840 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
996 23840 : iii = iii + 1
997 23840 : i_local = row_indeces_info_1i(2, iiB, proc_receive)
998 23840 : proc_send_ex = pos_info_ex(proc_send)
999 25110 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1000 : END DO
1001 : END DO
1002 : ! and the same for L2_nu_a
1003 5682 : DO jjB = 1, sizes_2a(2, proc_receive)
1004 5332 : send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
1005 5332 : j_local = col_indeces_info_2a(2, jjB, proc_receive)
1006 123674 : DO iiB = 1, sizes_2a(1, proc_receive)
1007 117992 : send_prow = row_indeces_info_2a(1, iiB, proc_receive)
1008 117992 : proc_send = grid_2_mepos(send_prow, send_pcol)
1009 117992 : proc_send_sub = pos_info(proc_send)
1010 117992 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
1011 117992 : iii = iii + 1
1012 117992 : i_local = row_indeces_info_2a(2, iiB, proc_receive)
1013 117992 : proc_send_ex = pos_info_ex(proc_send)
1014 123324 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1015 : END DO
1016 : END DO
1017 350 : CALL timestop(handle3)
1018 :
1019 : ! deallocate the received message
1020 350 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1021 : END IF
1022 : END DO
1023 350 : DEALLOCATE (row_indeces_info_1i)
1024 350 : DEALLOCATE (col_indeces_info_1i)
1025 350 : DEALLOCATE (row_indeces_info_2a)
1026 350 : DEALLOCATE (col_indeces_info_2a)
1027 700 : DEALLOCATE (buffer_rec)
1028 350 : DEALLOCATE (map_rec_size)
1029 350 : CALL timestop(handle2)
1030 :
1031 : ! 5) Wait for all messeges to be sent in the subgroup
1032 350 : CALL timeset(routineN//"_sub_waitall", handle2)
1033 350 : CALL mp_waitall(req_send(:))
1034 700 : DO send_counter = 1, number_of_send
1035 700 : DEALLOCATE (buffer_send(send_counter)%msg)
1036 : END DO
1037 700 : DEALLOCATE (buffer_send)
1038 350 : DEALLOCATE (req_send)
1039 350 : CALL timestop(handle2)
1040 :
1041 : ! 6) Start with ring communication
1042 350 : CALL timeset(routineN//"_ring", handle2)
1043 350 : proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
1044 350 : proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
1045 1034 : max_row_size = MAXVAL(sizes(1, :))
1046 1034 : max_col_size = MAXVAL(sizes(2, :))
1047 1400 : ALLOCATE (mat_send(max_row_size, max_col_size))
1048 1050 : ALLOCATE (mat_rec(max_row_size, max_col_size))
1049 81926 : mat_send = 0.0_dp
1050 80131 : mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
1051 350 : DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
1052 684 : DO proc_shift = 1, para_env_exchange%num_pe - 1
1053 334 : proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
1054 :
1055 334 : rec_row_size = sizes(1, proc_receive)
1056 334 : rec_col_size = sizes(2, proc_receive)
1057 :
1058 77004 : mat_rec = 0.0_dp
1059 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1060 334 : mat_rec, proc_receive_static)
1061 :
1062 77004 : mat_send = 0.0_dp
1063 : mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
1064 75209 : buffer_cyclic(proc_receive)%msg(:, :)
1065 :
1066 684 : DEALLOCATE (buffer_cyclic(proc_receive)%msg)
1067 : END DO
1068 : ! and finally
1069 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1070 350 : mat_rec, proc_receive_static)
1071 80131 : L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
1072 1384 : DEALLOCATE (buffer_cyclic)
1073 350 : DEALLOCATE (mat_send)
1074 350 : DEALLOCATE (mat_rec)
1075 350 : CALL timestop(handle2)
1076 :
1077 : ! release para_env_exchange
1078 350 : CALL mp_para_env_release(para_env_exchange)
1079 :
1080 350 : CALL cp_fm_release(L1_mu_i)
1081 350 : CALL cp_fm_release(L2_nu_a)
1082 350 : DEALLOCATE (pos_info_ex)
1083 350 : DEALLOCATE (grid_2_mepos)
1084 350 : DEALLOCATE (sizes)
1085 350 : DEALLOCATE (sizes_1i)
1086 350 : DEALLOCATE (sizes_2a)
1087 :
1088 : ! update the P_ij block of P_MP2 with the
1089 : ! non-singular ij pairs
1090 350 : CALL timeset(routineN//"_Pij", handle2)
1091 350 : NULLIFY (fm_struct_tmp)
1092 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1093 350 : nrow_global=homo, ncol_global=homo)
1094 350 : CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
1095 350 : CALL cp_fm_struct_release(fm_struct_tmp)
1096 350 : CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
1097 :
1098 : ! we have it, update P_ij local
1099 : CALL cp_fm_get_info(matrix=fm_P_ij, &
1100 : nrow_local=nrow_local, &
1101 : ncol_local=ncol_local, &
1102 : row_indices=row_indices, &
1103 350 : col_indices=col_indices)
1104 :
1105 350 : IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
1106 : CALL parallel_gemm('T', 'N', homo, homo, dimen, 1.0_dp, &
1107 : mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
1108 : a_first_col=1, &
1109 : a_first_row=1, &
1110 : b_first_col=1, &
1111 : b_first_row=1, &
1112 : c_first_col=1, &
1113 326 : c_first_row=1)
1114 : CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp, &
1115 : L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
1116 : a_first_col=1, &
1117 : a_first_row=1, &
1118 : b_first_col=1, &
1119 : b_first_row=1, &
1120 : c_first_col=1, &
1121 326 : c_first_row=1)
1122 :
1123 1504 : DO jjB = 1, ncol_local
1124 1178 : j_global = col_indices(jjB)
1125 3761 : DO iiB = 1, nrow_local
1126 2257 : i_global = row_indices(iiB)
1127 : ! diagonal elements and nearly degenerate ij pairs already updated
1128 3435 : IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
1129 671 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1130 : ELSE
1131 : fm_P_ij%local_data(iiB, jjB) = &
1132 1586 : factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
1133 : END IF
1134 : END DO
1135 : END DO
1136 : ELSE
1137 116 : DO jjB = 1, ncol_local
1138 92 : j_global = col_indices(jjB)
1139 294 : DO iiB = 1, nrow_local
1140 178 : i_global = row_indices(iiB)
1141 270 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1142 : END DO
1143 : END DO
1144 : END IF
1145 : ! deallocate the local P_ij
1146 350 : DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
1147 350 : CALL timestop(handle2)
1148 :
1149 : ! Now create and fill the P matrix (MO)
1150 : ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
1151 350 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
1152 1142 : ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1153 : END IF
1154 :
1155 350 : CALL timeset(routineN//"_PMO", handle2)
1156 350 : NULLIFY (fm_struct_tmp)
1157 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1158 350 : nrow_global=dimen, ncol_global=dimen)
1159 350 : CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
1160 350 : CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
1161 :
1162 : ! start with the (easy) occ-occ block and locally held P_ab elements
1163 350 : itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
1164 350 : my_B_virtual_start = itmp(1)
1165 350 : my_B_virtual_end = itmp(2)
1166 :
1167 : ! Fill occ-occ block
1168 350 : CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
1169 350 : CALL cp_fm_release(fm_P_ij)
1170 :
1171 : CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
1172 : nrow_local=nrow_local, &
1173 : ncol_local=ncol_local, &
1174 : row_indices=row_indices, &
1175 350 : col_indices=col_indices)
1176 :
1177 350 : IF (mp2_env%method == ri_mp2_laplace) THEN
1178 : CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, &
1179 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1180 : a_first_col=homo + 1, &
1181 : a_first_row=1, &
1182 : b_first_col=homo + 1, &
1183 : b_first_row=1, &
1184 : c_first_col=homo + 1, &
1185 24 : c_first_row=homo + 1)
1186 : CALL parallel_gemm('T', 'N', virtual, virtual, dimen, -2.0_dp, &
1187 : L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1188 : a_first_col=homo + 1, &
1189 : a_first_row=1, &
1190 : b_first_col=homo + 1, &
1191 : b_first_row=1, &
1192 : c_first_col=homo + 1, &
1193 24 : c_first_row=homo + 1)
1194 : END IF
1195 :
1196 350 : IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
1197 : ! With MP2 and RPA, we have already calculated the density matrix elements
1198 6336 : DO jjB = 1, ncol_local
1199 6010 : j_global = col_indices(jjB)
1200 6010 : IF (j_global <= homo) CYCLE
1201 59745 : DO iiB = 1, nrow_local
1202 54587 : i_global = row_indices(iiB)
1203 60597 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1204 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1205 45329 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1206 : END IF
1207 : END DO
1208 : END DO
1209 24 : ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
1210 : ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
1211 616 : DO jjB = 1, ncol_local
1212 592 : j_global = col_indices(jjB)
1213 592 : IF (j_global <= homo) CYCLE
1214 6764 : DO iiB = 1, nrow_local
1215 6240 : i_global = row_indices(iiB)
1216 6832 : IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
1217 565 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1218 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1219 552 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1220 : ELSE
1221 13 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
1222 : END IF
1223 : ELSE
1224 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1225 : factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
1226 5675 : (Eigenval(i_global) - Eigenval(j_global))
1227 : END IF
1228 : END DO
1229 : END DO
1230 : ELSE
1231 0 : CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
1232 : END IF
1233 :
1234 : ! send around the sub_group the local data and check if we
1235 : ! have to update our block with external elements
1236 1050 : ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1237 1050 : CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1238 :
1239 1050 : ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
1240 1050 : CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
1241 :
1242 1400 : ALLOCATE (ab_rec(nrow_local, ncol_local))
1243 366 : DO proc_shift = 1, para_env_sub%num_pe - 1
1244 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1245 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1246 :
1247 16 : send_prow = mepos_2_grid(1, proc_send)
1248 16 : send_pcol = mepos_2_grid(2, proc_send)
1249 :
1250 16 : send_row_size = sizes(1, proc_send)
1251 16 : send_col_size = sizes(2, proc_send)
1252 :
1253 64 : ALLOCATE (ab_send(send_row_size, send_col_size))
1254 4922 : ab_send = 0.0_dp
1255 :
1256 : ! first loop over row since in this way we can cycle
1257 206 : DO iiB = 1, send_row_size
1258 190 : i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow)
1259 190 : IF (i_global <= homo) CYCLE
1260 154 : i_global = i_global - homo
1261 154 : IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
1262 493 : DO jjB = 1, send_col_size
1263 458 : j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol)
1264 458 : IF (j_global <= homo) CYCLE
1265 367 : j_global = j_global - homo
1266 648 : ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
1267 : END DO
1268 : END DO
1269 :
1270 4922 : ab_rec = 0.0_dp
1271 : CALL para_env_sub%sendrecv(ab_send, proc_send, &
1272 16 : ab_rec, proc_receive)
1273 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
1274 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
1275 4922 : ab_rec(1:nrow_local, 1:ncol_local)
1276 :
1277 366 : DEALLOCATE (ab_send)
1278 : END DO
1279 350 : DEALLOCATE (ab_rec)
1280 350 : DEALLOCATE (mepos_2_grid)
1281 350 : DEALLOCATE (sizes)
1282 :
1283 : ! deallocate the local P_ab
1284 350 : DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
1285 350 : CALL timestop(handle2)
1286 :
1287 : ! create also W_MP2_MO
1288 350 : CALL timeset(routineN//"_WMO", handle2)
1289 350 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
1290 1142 : ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1291 : END IF
1292 :
1293 350 : CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
1294 350 : CALL cp_fm_struct_release(fm_struct_tmp)
1295 :
1296 : ! all block
1297 : CALL parallel_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, &
1298 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1299 : a_first_col=1, &
1300 : a_first_row=1, &
1301 : b_first_col=1, &
1302 : b_first_row=1, &
1303 : c_first_col=1, &
1304 350 : c_first_row=1)
1305 :
1306 : ! occ-occ block
1307 : CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, &
1308 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1309 : a_first_col=1, &
1310 : a_first_row=1, &
1311 : b_first_col=1, &
1312 : b_first_row=1, &
1313 : c_first_col=1, &
1314 350 : c_first_row=1)
1315 :
1316 : ! occ-virt block
1317 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1318 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1319 : a_first_col=1, &
1320 : a_first_row=1, &
1321 : b_first_col=homo + 1, &
1322 : b_first_row=1, &
1323 : c_first_col=homo + 1, &
1324 350 : c_first_row=1)
1325 350 : CALL timestop(handle2)
1326 :
1327 : ! Calculate occ-virt block of the lagrangian in MO
1328 350 : CALL timeset(routineN//"_Ljb", handle2)
1329 350 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
1330 1142 : ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1331 : END IF
1332 :
1333 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1334 350 : nrow_global=homo, ncol_global=virtual)
1335 350 : CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb")
1336 350 : CALL cp_fm_struct_release(fm_struct_tmp)
1337 :
1338 : ! first Virtual
1339 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1340 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1341 : a_first_col=1, &
1342 : a_first_row=1, &
1343 : b_first_col=homo + 1, &
1344 : b_first_row=1, &
1345 : c_first_col=1, &
1346 350 : c_first_row=1)
1347 : ! then occupied
1348 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1349 : mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1350 : a_first_col=1, &
1351 : a_first_row=1, &
1352 : b_first_col=homo + 1, &
1353 : b_first_row=1, &
1354 : c_first_col=1, &
1355 350 : c_first_row=1)
1356 :
1357 : ! finally release L_mu_q
1358 350 : CALL cp_fm_release(L_mu_q)
1359 350 : CALL timestop(handle2)
1360 :
1361 : ! here we should be done next CPHF
1362 :
1363 350 : DEALLOCATE (pos_info)
1364 :
1365 350 : CALL timestop(handle)
1366 :
1367 7000 : END SUBROUTINE create_W_P
1368 :
1369 : END MODULE mp2_ri_grad
|