Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate MP2 energy using GPW method
10 : !> \par History
11 : !> 10.2011 created [Joost VandeVondele and Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_gpw_method
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE cell_types, ONLY: cell_type
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_api, ONLY: &
19 : dbcsr_create, dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
20 : dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
21 : dbcsr_p_type, dbcsr_release, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
22 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
23 : cp_dbcsr_m_by_n_from_template
24 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
25 : cp_fm_struct_release,&
26 : cp_fm_struct_type
27 : USE cp_fm_types, ONLY: cp_fm_create,&
28 : cp_fm_get_info,&
29 : cp_fm_release,&
30 : cp_fm_type
31 : USE group_dist_types, ONLY: create_group_dist,&
32 : get_group_dist,&
33 : group_dist_d1_type,&
34 : release_group_dist
35 : USE kinds, ONLY: dp,&
36 : int_8
37 : USE machine, ONLY: m_memory
38 : USE message_passing, ONLY: mp_comm_type,&
39 : mp_para_env_type
40 : USE mp2_eri_gpw, ONLY: calc_potential_gpw,&
41 : cleanup_gpw,&
42 : prepare_gpw
43 : USE particle_types, ONLY: particle_type
44 : USE pw_env_types, ONLY: pw_env_type
45 : USE pw_methods, ONLY: pw_multiply,&
46 : pw_transfer,&
47 : pw_zero
48 : USE pw_poisson_types, ONLY: pw_poisson_type
49 : USE pw_pool_types, ONLY: pw_pool_type
50 : USE pw_types, ONLY: pw_c1d_gs_type,&
51 : pw_r3d_rs_type
52 : USE qs_collocate_density, ONLY: collocate_function
53 : USE qs_environment_types, ONLY: qs_environment_type
54 : USE qs_integrate_potential, ONLY: integrate_v_rspace
55 : USE qs_kind_types, ONLY: qs_kind_type
56 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
57 : USE task_list_types, ONLY: task_list_type
58 : #include "./base/base_uses.f90"
59 :
60 : IMPLICIT NONE
61 :
62 : PRIVATE
63 :
64 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_gpw_method'
65 :
66 : PUBLIC :: mp2_gpw_compute
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief ...
72 : !> \param Emp2 ...
73 : !> \param Emp2_Cou ...
74 : !> \param Emp2_EX ...
75 : !> \param qs_env ...
76 : !> \param para_env ...
77 : !> \param para_env_sub ...
78 : !> \param color_sub ...
79 : !> \param cell ...
80 : !> \param particle_set ...
81 : !> \param atomic_kind_set ...
82 : !> \param qs_kind_set ...
83 : !> \param Eigenval ...
84 : !> \param nmo ...
85 : !> \param homo ...
86 : !> \param mat_munu ...
87 : !> \param sab_orb_sub ...
88 : !> \param mo_coeff_o ...
89 : !> \param mo_coeff_v ...
90 : !> \param eps_filter ...
91 : !> \param unit_nr ...
92 : !> \param mp2_memory ...
93 : !> \param calc_ex ...
94 : !> \param blacs_env_sub ...
95 : !> \param Emp2_AB ...
96 : ! **************************************************************************************************
97 16 : SUBROUTINE mp2_gpw_compute(Emp2, Emp2_Cou, Emp2_EX, qs_env, para_env, para_env_sub, color_sub, &
98 16 : cell, particle_set, atomic_kind_set, qs_kind_set, Eigenval, nmo, homo, &
99 16 : mat_munu, sab_orb_sub, mo_coeff_o, mo_coeff_v, eps_filter, unit_nr, &
100 : mp2_memory, calc_ex, blacs_env_sub, Emp2_AB)
101 :
102 : REAL(KIND=dp), INTENT(OUT) :: Emp2, Emp2_Cou, Emp2_EX
103 : TYPE(qs_environment_type), POINTER :: qs_env
104 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
105 : INTEGER, INTENT(IN) :: color_sub
106 : TYPE(cell_type), POINTER :: cell
107 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
108 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
109 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
110 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: Eigenval
111 : INTEGER, INTENT(IN) :: nmo
112 : INTEGER, DIMENSION(:), INTENT(IN) :: homo
113 : TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu
114 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
115 : POINTER :: sab_orb_sub
116 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_o, mo_coeff_v
117 : REAL(KIND=dp), INTENT(IN) :: eps_filter
118 : INTEGER, INTENT(IN) :: unit_nr
119 : REAL(KIND=dp), INTENT(IN) :: mp2_memory
120 : LOGICAL, INTENT(IN) :: calc_ex
121 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
122 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: Emp2_AB
123 :
124 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_gpw_compute'
125 :
126 : INTEGER :: a, a_group_counter, b, b_global, b_group_counter, blk, col, col_offset, col_size, &
127 : color_counter, EX_end, EX_end_send, EX_start, EX_start_send, group_counter, handle, &
128 : handle2, handle3, i, i_counter, i_group_counter, index_proc_shift, ispin, j, max_b_size, &
129 : max_batch_size_A, max_batch_size_I, max_row_col_local, my_A_batch_size, my_A_virtual_end, &
130 : my_A_virtual_start, my_B_size, my_B_virtual_end, my_B_virtual_start, my_I_batch_size, &
131 : my_I_occupied_end, my_I_occupied_start, my_q_position, ncol_local, nfullcols_total, &
132 : nfullrows_total, ngroup, nrow_local, nspins, p, p_best, proc_receive
133 : INTEGER :: proc_send, q, q_best, row, row_offset, row_size, size_EX, size_EX_send, &
134 : sub_sub_color, wfn_calc, wfn_calc_best
135 : INTEGER(KIND=int_8) :: mem
136 16 : INTEGER, ALLOCATABLE, DIMENSION(:) :: vector_B_sizes, &
137 16 : vector_batch_A_size_group, &
138 16 : vector_batch_I_size_group
139 16 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: color_array, local_col_row_info
140 16 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
141 32 : INTEGER, DIMENSION(SIZE(homo)) :: virtual
142 : LOGICAL :: do_alpha_beta
143 : REAL(KIND=dp) :: cutoff_old, mem_min, mem_real, mem_try, &
144 : relative_cutoff_old, wfn_size
145 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old
146 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: my_Cocc, my_Cvirt
147 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C, BIb_Ex, BIb_send
148 16 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: data_block
149 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
150 : TYPE(cp_fm_type) :: fm_BIb_jb
151 : TYPE(dbcsr_iterator_type) :: iter
152 52 : TYPE(dbcsr_type), DIMENSION(SIZE(homo)) :: matrix_ia_jb, matrix_ia_jnu
153 : TYPE(dft_control_type), POINTER :: dft_control
154 16 : TYPE(group_dist_d1_type) :: gd_exchange
155 : TYPE(mp_comm_type) :: comm_exchange
156 : TYPE(pw_c1d_gs_type) :: pot_g, rho_g
157 : TYPE(pw_env_type), POINTER :: pw_env_sub
158 : TYPE(pw_poisson_type), POINTER :: poisson_env
159 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
160 : TYPE(pw_r3d_rs_type) :: psi_a, rho_r
161 16 : TYPE(pw_r3d_rs_type), ALLOCATABLE, DIMENSION(:) :: psi_i
162 : TYPE(task_list_type), POINTER :: task_list_sub
163 :
164 16 : CALL timeset(routineN, handle)
165 :
166 16 : do_alpha_beta = .FALSE.
167 16 : IF (PRESENT(Emp2_AB)) do_alpha_beta = .TRUE.
168 :
169 16 : nspins = SIZE(homo)
170 34 : virtual = nmo - homo
171 :
172 34 : DO ispin = 1, nspins
173 : ! initialize and create the matrix (ia|jnu)
174 18 : CALL dbcsr_create(matrix_ia_jnu(ispin), template=mo_coeff_o(ispin)%matrix)
175 :
176 : ! Allocate Sparse matrices: (ia|jb)
177 : CALL cp_dbcsr_m_by_n_from_template(matrix_ia_jb(ispin), template=mo_coeff_o(ispin)%matrix, m=homo(ispin), n=nmo - homo(ispin), &
178 18 : sym=dbcsr_type_no_symmetry)
179 :
180 : ! set all to zero in such a way that the memory is actually allocated
181 18 : CALL dbcsr_set(matrix_ia_jnu(ispin), 0.0_dp)
182 34 : CALL dbcsr_set(matrix_ia_jb(ispin), 0.0_dp)
183 : END DO
184 16 : CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
185 :
186 16 : IF (calc_ex) THEN
187 : ! create the analogous of matrix_ia_jb in fm type
188 16 : NULLIFY (fm_struct)
189 16 : CALL dbcsr_get_info(matrix_ia_jb(1), nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
190 : CALL cp_fm_struct_create(fm_struct, context=blacs_env_sub, nrow_global=nfullrows_total, &
191 16 : ncol_global=nfullcols_total, para_env=para_env_sub)
192 16 : CALL cp_fm_create(fm_BIb_jb, fm_struct, name="fm_BIb_jb")
193 :
194 16 : CALL copy_dbcsr_to_fm(matrix_ia_jb(1), fm_BIb_jb)
195 16 : CALL cp_fm_struct_release(fm_struct)
196 :
197 : CALL cp_fm_get_info(matrix=fm_BIb_jb, &
198 : nrow_local=nrow_local, &
199 : ncol_local=ncol_local, &
200 : row_indices=row_indices, &
201 16 : col_indices=col_indices)
202 :
203 16 : max_row_col_local = MAX(nrow_local, ncol_local)
204 16 : CALL para_env_sub%max(max_row_col_local)
205 :
206 64 : ALLOCATE (local_col_row_info(0:max_row_col_local, 2))
207 868 : local_col_row_info = 0
208 : ! 0,1 nrows
209 16 : local_col_row_info(0, 1) = nrow_local
210 78 : local_col_row_info(1:nrow_local, 1) = row_indices(1:nrow_local)
211 : ! 0,2 ncols
212 16 : local_col_row_info(0, 2) = ncol_local
213 458 : local_col_row_info(1:ncol_local, 2) = col_indices(1:ncol_local)
214 : END IF
215 :
216 : ! Get everything for GPW calculations
217 : CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
218 16 : auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_a, sab_orb_sub)
219 :
220 64 : wfn_size = REAL(SIZE(rho_r%array), KIND=dp)
221 16 : CALL para_env%max(wfn_size)
222 :
223 16 : ngroup = para_env%num_pe/para_env_sub%num_pe
224 :
225 : ! calculate the minimal memory required per MPI task (p=occupied division,q=virtual division)
226 16 : p_best = ngroup
227 16 : q_best = 1
228 16 : mem_min = HUGE(0)
229 48 : DO p = 1, ngroup
230 32 : q = ngroup/p
231 32 : IF (p*q .NE. ngroup) CYCLE
232 :
233 32 : CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
234 :
235 48 : IF (mem_try <= mem_min) THEN
236 32 : mem_min = mem_try
237 32 : p_best = p
238 32 : q_best = q
239 : END IF
240 : END DO
241 24 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T69,F9.2,A3)') 'Minimum required memory per MPI process for MP2:', &
242 16 : mem_min, ' MB'
243 :
244 16 : CALL m_memory(mem)
245 16 : mem_real = (mem + 1024*1024 - 1)/(1024*1024)
246 16 : CALL para_env%min(mem_real)
247 :
248 16 : mem_real = mp2_memory - mem_real
249 16 : mem_real = MAX(mem_real, mem_min)
250 24 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T69,F9.2,A3)') 'Available memory per MPI process for MP2:', &
251 16 : mem_real, ' MB'
252 :
253 16 : wfn_calc_best = HUGE(wfn_calc_best)
254 48 : DO p = 1, ngroup
255 32 : q = ngroup/p
256 32 : IF (p*q .NE. ngroup) CYCLE
257 :
258 32 : CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
259 :
260 32 : IF (mem_try > mem_real) CYCLE
261 26 : wfn_calc = ((homo(1) + p - 1)/p) + ((virtual(1) + q - 1)/q)
262 42 : IF (wfn_calc < wfn_calc_best) THEN
263 16 : wfn_calc_best = wfn_calc
264 16 : p_best = p
265 16 : q_best = q
266 : END IF
267 : END DO
268 :
269 16 : max_batch_size_I = (homo(1) + p_best - 1)/p_best
270 16 : max_batch_size_A = (virtual(1) + q_best - 1)/q_best
271 :
272 16 : IF (unit_nr > 0) THEN
273 : WRITE (UNIT=unit_nr, FMT="(T3,A,T77,i4)") &
274 8 : "MP2_GPW| max. batch size for the occupied states:", max_batch_size_I
275 : WRITE (UNIT=unit_nr, FMT="(T3,A,T77,i4)") &
276 8 : "MP2_GPW| max. batch size for the virtual states:", max_batch_size_A
277 : END IF
278 :
279 : CALL get_vector_batch(vector_batch_I_size_group, p_best, max_batch_size_I, homo(1))
280 : CALL get_vector_batch(vector_batch_A_size_group, q_best, max_batch_size_A, virtual(1))
281 :
282 : !XXXXXXXXXXXXX inverse group distribution
283 16 : group_counter = 0
284 16 : a_group_counter = 0
285 16 : my_A_virtual_start = 1
286 21 : DO j = 0, q_best - 1
287 21 : my_I_occupied_start = 1
288 21 : i_group_counter = 0
289 29 : DO i = 0, p_best - 1
290 24 : group_counter = group_counter + 1
291 24 : IF (color_sub == group_counter - 1) EXIT
292 8 : my_I_occupied_start = my_I_occupied_start + vector_batch_I_size_group(i)
293 29 : i_group_counter = i_group_counter + 1
294 : END DO
295 21 : my_q_position = j
296 21 : IF (color_sub == group_counter - 1) EXIT
297 5 : my_A_virtual_start = my_A_virtual_start + vector_batch_A_size_group(j)
298 21 : a_group_counter = a_group_counter + 1
299 : END DO
300 : !XXXXXXXXXXXXX inverse group distribution
301 :
302 16 : my_I_occupied_end = my_I_occupied_start + vector_batch_I_size_group(i_group_counter) - 1
303 16 : my_I_batch_size = vector_batch_I_size_group(i_group_counter)
304 16 : my_A_virtual_end = my_A_virtual_start + vector_batch_A_size_group(a_group_counter) - 1
305 16 : my_A_batch_size = vector_batch_A_size_group(a_group_counter)
306 :
307 16 : DEALLOCATE (vector_batch_I_size_group)
308 16 : DEALLOCATE (vector_batch_A_size_group)
309 :
310 : ! replicate on a local array on proc 0 the occupied and virtual wavevectior
311 : ! needed for the calculation of the WF's by calculate_wavefunction
312 : ! (external vector)
313 : CALL grep_occ_virt_wavefunc(para_env_sub, nmo, &
314 : my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
315 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size, &
316 : mo_coeff_o(1)%matrix, mo_coeff_v(1)%matrix, my_Cocc, my_Cvirt)
317 :
318 : ! divide the b states in the sub_group in such a way to create
319 : ! b_start and b_end for each proc inside the sub_group
320 16 : max_b_size = (virtual(1) + para_env_sub%num_pe - 1)/para_env_sub%num_pe
321 : CALL get_vector_batch(vector_B_sizes, para_env_sub%num_pe, max_b_size, virtual(1))
322 :
323 : ! now give to each proc its b_start and b_end
324 16 : b_group_counter = 0
325 16 : my_B_virtual_start = 1
326 16 : DO j = 0, para_env_sub%num_pe - 1
327 16 : b_group_counter = b_group_counter + 1
328 16 : IF (b_group_counter - 1 == para_env_sub%mepos) EXIT
329 16 : my_B_virtual_start = my_B_virtual_start + vector_B_sizes(j)
330 : END DO
331 16 : my_B_virtual_end = my_B_virtual_start + vector_B_sizes(para_env_sub%mepos) - 1
332 16 : my_B_size = vector_B_sizes(para_env_sub%mepos)
333 :
334 16 : DEALLOCATE (vector_B_sizes)
335 :
336 : ! create an array containing a different "color" for each pair of
337 : ! A_start and B_start, communication will take place only among
338 : ! those proc that have the same A_start and B_start
339 64 : ALLOCATE (color_array(0:para_env_sub%num_pe - 1, 0:q_best - 1))
340 68 : color_array = 0
341 : color_counter = 0
342 42 : DO j = 0, q_best - 1
343 68 : DO i = 0, para_env_sub%num_pe - 1
344 26 : color_counter = color_counter + 1
345 52 : color_array(i, j) = color_counter
346 : END DO
347 : END DO
348 16 : sub_sub_color = color_array(para_env_sub%mepos, my_q_position)
349 :
350 16 : DEALLOCATE (color_array)
351 :
352 : ! now create a group that contains all the proc that have the same 2 virtual starting points
353 : ! in this way it is possible to sum the common integrals needed for the full MP2 energy
354 16 : CALL comm_exchange%from_split(para_env, sub_sub_color)
355 :
356 : ! create an array containing the information for communication
357 16 : CALL create_group_dist(gd_exchange, my_I_occupied_start, my_I_occupied_end, my_I_batch_size, comm_exchange)
358 :
359 98 : ALLOCATE (psi_i(my_I_occupied_start:my_I_occupied_end))
360 66 : DO i = my_I_occupied_start, my_I_occupied_end
361 50 : CALL auxbas_pw_pool%create_pw(psi_i(i))
362 : CALL collocate_function(my_Cocc(:, i - my_I_occupied_start + 1), &
363 : psi_i(i), rho_g, atomic_kind_set, qs_kind_set, cell, particle_set, &
364 66 : pw_env_sub, dft_control%qs_control%eps_rho_rspace)
365 : END DO
366 :
367 16 : Emp2 = 0.0_dp
368 16 : Emp2_Cou = 0.0_dp
369 16 : Emp2_EX = 0.0_dp
370 16 : IF (do_alpha_beta) Emp2_AB = 0.0_dp
371 16 : IF (calc_ex) THEN
372 80 : ALLOCATE (BIb_C(my_B_size, homo(1), my_I_batch_size))
373 : END IF
374 :
375 16 : CALL timeset(routineN//"_loop", handle2)
376 270 : DO a = homo(1) + my_A_virtual_start, homo(1) + my_A_virtual_end
377 :
378 92659 : IF (calc_ex) BIb_C = 0.0_dp
379 :
380 : ! psi_a
381 : CALL collocate_function(my_Cvirt(:, a - (homo(1) + my_A_virtual_start) + 1), &
382 : psi_a, rho_g, atomic_kind_set, qs_kind_set, cell, particle_set, &
383 254 : pw_env_sub, dft_control%qs_control%eps_rho_rspace)
384 254 : i_counter = 0
385 1017 : DO i = my_I_occupied_start, my_I_occupied_end
386 763 : i_counter = i_counter + 1
387 :
388 : ! potential
389 763 : CALL pw_zero(rho_r)
390 763 : CALL pw_multiply(rho_r, psi_i(i), psi_a)
391 763 : CALL pw_transfer(rho_r, rho_g)
392 763 : CALL calc_potential_gpw(rho_r, rho_g, poisson_env, pot_g, qs_env%mp2_env%potential_parameter)
393 :
394 : ! and finally (ia|munu)
395 763 : CALL timeset(routineN//"_int", handle3)
396 763 : CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
397 : CALL integrate_v_rspace(rho_r, hmat=mat_munu, qs_env=qs_env, &
398 : calculate_forces=.FALSE., compute_tau=.FALSE., gapw=.FALSE., &
399 763 : pw_env_external=pw_env_sub, task_list_external=task_list_sub)
400 763 : CALL timestop(handle3)
401 :
402 : ! multiply and goooooooo ...
403 763 : CALL timeset(routineN//"_mult_o", handle3)
404 1622 : DO ispin = 1, nspins
405 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu%matrix, mo_coeff_o(ispin)%matrix, &
406 1622 : 0.0_dp, matrix_ia_jnu(ispin), filter_eps=eps_filter)
407 : END DO
408 763 : CALL timestop(handle3)
409 763 : CALL timeset(routineN//"_mult_v", handle3)
410 1622 : DO ispin = 1, nspins
411 : CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_ia_jnu(ispin), mo_coeff_v(ispin)%matrix, &
412 1622 : 0.0_dp, matrix_ia_jb(ispin), filter_eps=eps_filter)
413 : END DO
414 763 : CALL timestop(handle3)
415 :
416 763 : CALL timeset(routineN//"_E_Cou", handle3)
417 763 : CALL dbcsr_iterator_start(iter, matrix_ia_jb(1))
418 1738 : DO WHILE (dbcsr_iterator_blocks_left(iter))
419 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
420 : row_size=row_size, col_size=col_size, &
421 975 : row_offset=row_offset, col_offset=col_offset)
422 24373 : DO b = 1, col_size
423 112275 : DO j = 1, row_size
424 : ! Compute the coulomb MP2 energy
425 : Emp2_Cou = Emp2_Cou - 2.0_dp*data_block(j, b)**2/ &
426 111300 : (Eigenval(a, 1) + Eigenval(homo(1) + col_offset + b - 1, 1) - Eigenval(i, 1) - Eigenval(row_offset + j - 1, 1))
427 : END DO
428 : END DO
429 : END DO
430 763 : CALL dbcsr_iterator_stop(iter)
431 763 : IF (do_alpha_beta) THEN
432 : ! Compute the coulomb only= SO = MP2 alpha-beta MP2 energy component
433 96 : CALL dbcsr_iterator_start(iter, matrix_ia_jb(2))
434 192 : DO WHILE (dbcsr_iterator_blocks_left(iter))
435 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
436 : row_size=row_size, col_size=col_size, &
437 96 : row_offset=row_offset, col_offset=col_offset)
438 2592 : DO b = 1, col_size
439 9696 : DO j = 1, row_size
440 : ! Compute the coulomb MP2 energy alpha beta case
441 : Emp2_AB = Emp2_AB - data_block(j, b)**2/ &
442 9600 : (Eigenval(a, 1) + Eigenval(homo(2) + col_offset + b - 1, 2) - Eigenval(i, 1) - Eigenval(row_offset + j - 1, 2))
443 : END DO
444 : END DO
445 : END DO
446 96 : CALL dbcsr_iterator_stop(iter)
447 : END IF
448 763 : CALL timestop(handle3)
449 :
450 : ! now collect my local data from all the other members of the group
451 : ! b_start, b_end
452 4069 : IF (calc_ex) THEN
453 763 : CALL timeset(routineN//"_E_Ex_1", handle3)
454 763 : CALL copy_dbcsr_to_fm(matrix_ia_jb(1), fm_BIb_jb)
455 : CALL grep_my_integrals(para_env_sub, fm_BIb_jb, BIb_C(1:my_B_size, 1:homo(1), i_counter), max_row_col_local, &
456 763 : local_col_row_info, my_B_virtual_end, my_B_virtual_start)
457 763 : CALL timestop(handle3)
458 : END IF
459 :
460 : END DO
461 :
462 270 : IF (calc_ex) THEN
463 :
464 254 : ASSOCIATE (mepos_in_EX_group => comm_exchange%mepos, size_of_exchange_group => comm_exchange%num_pe)
465 254 : CALL timeset(routineN//"_E_Ex_2", handle3)
466 : ! calculate the contribution to MP2 energy for my local data
467 1017 : DO i = 1, my_I_batch_size
468 3538 : DO j = my_I_occupied_start, my_I_occupied_end
469 83285 : DO b = 1, my_B_size
470 80001 : b_global = b - 1 + my_B_virtual_start
471 : Emp2_EX = Emp2_EX + BIb_C(b, j, i)*BIb_C(b, i + my_I_occupied_start - 1, j - my_I_occupied_start + 1) &
472 82522 : /(Eigenval(a, 1) + Eigenval(homo(1) + b_global, 1) - Eigenval(i + my_I_occupied_start - 1, 1) - Eigenval(j, 1))
473 : END DO
474 : END DO
475 : END DO
476 :
477 : ! start communicating and collecting exchange contributions from
478 : ! other processes in my exchange group
479 368 : DO index_proc_shift = 1, size_of_exchange_group - 1
480 114 : proc_send = MODULO(mepos_in_EX_group + index_proc_shift, size_of_exchange_group)
481 114 : proc_receive = MODULO(mepos_in_EX_group - index_proc_shift, size_of_exchange_group)
482 :
483 114 : CALL get_group_dist(gd_exchange, proc_receive, EX_start, EX_end, size_EX)
484 :
485 570 : ALLOCATE (BIb_EX(my_B_size, my_I_batch_size, size_EX))
486 9462 : BIb_EX = 0.0_dp
487 :
488 114 : CALL get_group_dist(gd_exchange, proc_send, EX_start_send, EX_end_send, size_EX_send)
489 :
490 570 : ALLOCATE (BIb_send(my_B_size, size_EX_send, my_I_batch_size))
491 : BIb_send(1:my_B_size, 1:size_EX_send, 1:my_I_batch_size) = &
492 9462 : BIb_C(1:my_B_size, EX_start_send:EX_end_send, 1:my_I_batch_size)
493 :
494 : ! send and receive the exchange array
495 114 : CALL comm_exchange%sendrecv(BIb_send, proc_send, BIb_EX, proc_receive)
496 :
497 342 : DO i = 1, my_I_batch_size
498 798 : DO j = 1, size_EX
499 9348 : DO b = 1, my_B_size
500 8664 : b_global = b - 1 + my_B_virtual_start
501 : Emp2_EX = Emp2_EX + BIb_C(b, j + EX_start - 1, i)*BIb_EX(b, i, j) &
502 : /(Eigenval(a, 1) + Eigenval(homo(1) + b_global, 1) - Eigenval(i + my_I_occupied_start - 1, 1) &
503 9120 : - Eigenval(j + EX_start - 1, 1))
504 : END DO
505 : END DO
506 : END DO
507 :
508 114 : DEALLOCATE (BIb_EX)
509 596 : DEALLOCATE (BIb_send)
510 :
511 : END DO
512 508 : CALL timestop(handle3)
513 : END ASSOCIATE
514 : END IF
515 :
516 : END DO
517 16 : CALL timestop(handle2)
518 :
519 16 : CALL para_env%sum(Emp2_Cou)
520 16 : CALL para_env%sum(Emp2_EX)
521 16 : Emp2 = Emp2_Cou + Emp2_EX
522 16 : IF (do_alpha_beta) CALL para_env%sum(Emp2_AB)
523 :
524 16 : DEALLOCATE (my_Cocc)
525 16 : DEALLOCATE (my_Cvirt)
526 :
527 16 : IF (calc_ex) THEN
528 16 : CALL cp_fm_release(fm_BIb_jb)
529 16 : DEALLOCATE (local_col_row_info)
530 16 : DEALLOCATE (BIb_C)
531 : END IF
532 16 : CALL release_group_dist(gd_exchange)
533 :
534 16 : CALL comm_exchange%free()
535 :
536 34 : DO ispin = 1, nspins
537 18 : CALL dbcsr_release(matrix_ia_jnu(ispin))
538 34 : CALL dbcsr_release(matrix_ia_jb(ispin))
539 : END DO
540 :
541 66 : DO i = my_I_occupied_start, my_I_occupied_end
542 66 : CALL psi_i(i)%release()
543 : END DO
544 16 : DEALLOCATE (psi_i)
545 :
546 : CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
547 16 : task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_a)
548 :
549 16 : CALL timestop(handle)
550 :
551 128 : END SUBROUTINE mp2_gpw_compute
552 :
553 : ! **************************************************************************************************
554 : !> \brief ...
555 : !> \param wfn_size ...
556 : !> \param p ...
557 : !> \param q ...
558 : !> \param num_w ...
559 : !> \param nmo ...
560 : !> \param virtual ...
561 : !> \param homo ...
562 : !> \param calc_ex ...
563 : !> \param mem_try ...
564 : ! **************************************************************************************************
565 64 : ELEMENTAL SUBROUTINE estimate_memory_usage(wfn_size, p, q, num_w, nmo, virtual, homo, calc_ex, mem_try)
566 : REAL(KIND=dp), INTENT(IN) :: wfn_size
567 : INTEGER, INTENT(IN) :: p, q, num_w, nmo, virtual, homo
568 : LOGICAL, INTENT(IN) :: calc_ex
569 : REAL(KIND=dp), INTENT(OUT) :: mem_try
570 :
571 : mem_try = 0.0_dp
572 : ! integrals
573 64 : mem_try = mem_try + virtual*REAL(homo, KIND=dp)**2/(p*num_w)
574 : ! array for the coefficient matrix and wave vectors
575 : mem_try = mem_try + REAL(homo, KIND=dp)*nmo/p + &
576 : REAL(virtual, KIND=dp)*nmo/q + &
577 64 : 2.0_dp*MAX(REAL(homo, KIND=dp)*nmo/p, REAL(virtual, KIND=dp)*nmo/q)
578 : ! temporary array for MO integrals and MO integrals to be exchanged
579 64 : IF (calc_ex) THEN
580 : mem_try = mem_try + 2.0_dp*MAX(virtual*REAL(homo, KIND=dp)*MIN(1, num_w - 1)/num_w, &
581 64 : virtual*REAL(homo, KIND=dp)**2/(p*p*num_w))
582 : ELSE
583 0 : mem_try = mem_try + 2.0_dp*virtual*REAL(homo, KIND=dp)
584 : END IF
585 : ! wfn
586 64 : mem_try = mem_try + ((homo + p - 1)/p)*wfn_size
587 : ! Mb
588 64 : mem_try = mem_try*8.0D+00/1024.0D+00**2
589 :
590 64 : END SUBROUTINE
591 :
592 : ! **************************************************************************************************
593 : !> \brief ...
594 : !> \param vector_batch_I_size_group ...
595 : !> \param p_best ...
596 : !> \param max_batch_size_I ...
597 : !> \param homo ...
598 : ! **************************************************************************************************
599 48 : PURE SUBROUTINE get_vector_batch(vector_batch_I_size_group, p_best, max_batch_size_I, homo)
600 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: vector_batch_I_size_group
601 : INTEGER, INTENT(IN) :: p_best, max_batch_size_I, homo
602 :
603 : INTEGER :: i, one
604 :
605 144 : ALLOCATE (vector_batch_I_size_group(0:p_best - 1))
606 :
607 112 : vector_batch_I_size_group = max_batch_size_I
608 112 : IF (SUM(vector_batch_I_size_group) /= homo) THEN
609 24 : one = 1
610 24 : IF (SUM(vector_batch_I_size_group) > homo) one = -1
611 8 : i = -1
612 : DO
613 8 : i = i + 1
614 8 : vector_batch_I_size_group(i) = vector_batch_I_size_group(i) + one
615 24 : IF (SUM(vector_batch_I_size_group) == homo) EXIT
616 8 : IF (i == p_best - 1) i = -1
617 : END DO
618 : END IF
619 :
620 48 : END SUBROUTINE get_vector_batch
621 :
622 : ! **************************************************************************************************
623 : !> \brief ...
624 : !> \param para_env_sub ...
625 : !> \param fm_BIb_jb ...
626 : !> \param BIb_jb ...
627 : !> \param max_row_col_local ...
628 : !> \param local_col_row_info ...
629 : !> \param my_B_virtual_end ...
630 : !> \param my_B_virtual_start ...
631 : ! **************************************************************************************************
632 763 : SUBROUTINE grep_my_integrals(para_env_sub, fm_BIb_jb, BIb_jb, max_row_col_local, &
633 : local_col_row_info, &
634 : my_B_virtual_end, my_B_virtual_start)
635 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
636 : TYPE(cp_fm_type), INTENT(IN) :: fm_BIb_jb
637 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: BIb_jb
638 : INTEGER, INTENT(IN) :: max_row_col_local
639 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: local_col_row_info
640 : INTEGER, INTENT(IN) :: my_B_virtual_end, my_B_virtual_start
641 :
642 : INTEGER :: i_global, iiB, j_global, jjB, ncol_rec, &
643 : nrow_rec, proc_receive, proc_send, &
644 : proc_shift
645 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: rec_col_row_info
646 763 : INTEGER, DIMENSION(:), POINTER :: col_indices_rec, row_indices_rec
647 763 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: local_BI, rec_BI
648 :
649 3052 : ALLOCATE (rec_col_row_info(0:max_row_col_local, 2))
650 :
651 49085 : rec_col_row_info(:, :) = local_col_row_info
652 :
653 763 : nrow_rec = rec_col_row_info(0, 1)
654 763 : ncol_rec = rec_col_row_info(0, 2)
655 :
656 2289 : ALLOCATE (row_indices_rec(nrow_rec))
657 3740 : row_indices_rec = rec_col_row_info(1:nrow_rec, 1)
658 :
659 2289 : ALLOCATE (col_indices_rec(ncol_rec))
660 23398 : col_indices_rec = rec_col_row_info(1:ncol_rec, 2)
661 :
662 : ! accumulate data on BIb_jb buffer starting from myself
663 23398 : DO jjB = 1, ncol_rec
664 22635 : j_global = col_indices_rec(jjB)
665 23398 : IF (j_global >= my_B_virtual_start .AND. j_global <= my_B_virtual_end) THEN
666 111300 : DO iiB = 1, nrow_rec
667 88665 : i_global = row_indices_rec(iiB)
668 111300 : BIb_jb(j_global - my_B_virtual_start + 1, i_global) = fm_BIb_jb%local_data(iiB, jjB)
669 : END DO
670 : END IF
671 : END DO
672 :
673 763 : DEALLOCATE (row_indices_rec)
674 763 : DEALLOCATE (col_indices_rec)
675 :
676 763 : IF (para_env_sub%num_pe > 1) THEN
677 0 : ALLOCATE (local_BI(nrow_rec, ncol_rec))
678 0 : local_BI(1:nrow_rec, 1:ncol_rec) = fm_BIb_jb%local_data(1:nrow_rec, 1:ncol_rec)
679 :
680 0 : DO proc_shift = 1, para_env_sub%num_pe - 1
681 0 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
682 0 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
683 :
684 : ! first exchange information on the local data
685 0 : rec_col_row_info = 0
686 0 : CALL para_env_sub%sendrecv(local_col_row_info, proc_send, rec_col_row_info, proc_receive)
687 0 : nrow_rec = rec_col_row_info(0, 1)
688 0 : ncol_rec = rec_col_row_info(0, 2)
689 :
690 0 : ALLOCATE (row_indices_rec(nrow_rec))
691 0 : row_indices_rec = rec_col_row_info(1:nrow_rec, 1)
692 :
693 0 : ALLOCATE (col_indices_rec(ncol_rec))
694 0 : col_indices_rec = rec_col_row_info(1:ncol_rec, 2)
695 :
696 0 : ALLOCATE (rec_BI(nrow_rec, ncol_rec))
697 0 : rec_BI = 0.0_dp
698 :
699 : ! then send and receive the real data
700 0 : CALL para_env_sub%sendrecv(local_BI, proc_send, rec_BI, proc_receive)
701 :
702 : ! accumulate the received data on BIb_jb buffer
703 0 : DO jjB = 1, ncol_rec
704 0 : j_global = col_indices_rec(jjB)
705 0 : IF (j_global >= my_B_virtual_start .AND. j_global <= my_B_virtual_end) THEN
706 0 : DO iiB = 1, nrow_rec
707 0 : i_global = row_indices_rec(iiB)
708 0 : BIb_jb(j_global - my_B_virtual_start + 1, i_global) = rec_BI(iiB, jjB)
709 : END DO
710 : END IF
711 : END DO
712 :
713 0 : DEALLOCATE (col_indices_rec)
714 0 : DEALLOCATE (row_indices_rec)
715 0 : DEALLOCATE (rec_BI)
716 : END DO
717 :
718 0 : DEALLOCATE (local_BI)
719 : END IF
720 :
721 763 : DEALLOCATE (rec_col_row_info)
722 :
723 763 : END SUBROUTINE grep_my_integrals
724 :
725 : ! **************************************************************************************************
726 : !> \brief ...
727 : !> \param para_env_sub ...
728 : !> \param dimen ...
729 : !> \param my_I_occupied_start ...
730 : !> \param my_I_occupied_end ...
731 : !> \param my_I_batch_size ...
732 : !> \param my_A_virtual_start ...
733 : !> \param my_A_virtual_end ...
734 : !> \param my_A_batch_size ...
735 : !> \param mo_coeff_o ...
736 : !> \param mo_coeff_v ...
737 : !> \param my_Cocc ...
738 : !> \param my_Cvirt ...
739 : ! **************************************************************************************************
740 16 : SUBROUTINE grep_occ_virt_wavefunc(para_env_sub, dimen, &
741 : my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
742 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size, &
743 : mo_coeff_o, mo_coeff_v, my_Cocc, my_Cvirt)
744 :
745 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
746 : INTEGER, INTENT(IN) :: dimen, my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
747 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size
748 : TYPE(dbcsr_type), POINTER :: mo_coeff_o, mo_coeff_v
749 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
750 : INTENT(OUT) :: my_Cocc, my_Cvirt
751 :
752 : CHARACTER(LEN=*), PARAMETER :: routineN = 'grep_occ_virt_wavefunc'
753 :
754 : INTEGER :: blk, col, col_offset, col_size, handle, &
755 : i, i_global, j, j_global, row, &
756 : row_offset, row_size
757 16 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: data_block
758 : TYPE(dbcsr_iterator_type) :: iter
759 :
760 16 : CALL timeset(routineN, handle)
761 :
762 64 : ALLOCATE (my_Cocc(dimen, my_I_batch_size))
763 1558 : my_Cocc = 0.0_dp
764 :
765 64 : ALLOCATE (my_Cvirt(dimen, my_A_batch_size))
766 8159 : my_Cvirt = 0.0_dp
767 :
768 : ! accumulate data from mo_coeff_o into Cocc
769 16 : CALL dbcsr_iterator_start(iter, mo_coeff_o)
770 68 : DO WHILE (dbcsr_iterator_blocks_left(iter))
771 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
772 : row_size=row_size, col_size=col_size, &
773 52 : row_offset=row_offset, col_offset=col_offset)
774 268 : DO j = 1, col_size
775 200 : j_global = col_offset + j - 1
776 252 : IF (j_global >= my_I_occupied_start .AND. j_global <= my_I_occupied_end) THEN
777 1656 : DO i = 1, row_size
778 1492 : i_global = row_offset + i - 1
779 1656 : my_Cocc(i_global, j_global - my_I_occupied_start + 1) = data_block(i, j)
780 : END DO
781 : END IF
782 : END DO
783 : END DO
784 16 : CALL dbcsr_iterator_stop(iter)
785 :
786 16 : CALL para_env_sub%sum(my_Cocc)
787 :
788 : ! accumulate data from mo_coeff_o into Cocc
789 16 : CALL dbcsr_iterator_start(iter, mo_coeff_v)
790 74 : DO WHILE (dbcsr_iterator_blocks_left(iter))
791 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
792 : row_size=row_size, col_size=col_size, &
793 58 : row_offset=row_offset, col_offset=col_offset)
794 1354 : DO j = 1, col_size
795 1280 : j_global = col_offset + j - 1
796 1338 : IF (j_global >= my_A_virtual_start .AND. j_global <= my_A_virtual_end) THEN
797 8700 : DO i = 1, row_size
798 7889 : i_global = row_offset + i - 1
799 8700 : my_Cvirt(i_global, j_global - my_A_virtual_start + 1) = data_block(i, j)
800 : END DO
801 : END IF
802 : END DO
803 : END DO
804 16 : CALL dbcsr_iterator_stop(iter)
805 :
806 16 : CALL para_env_sub%sum(my_Cvirt)
807 :
808 16 : CALL timestop(handle)
809 :
810 48 : END SUBROUTINE grep_occ_virt_wavefunc
811 :
812 : END MODULE mp2_gpw_method
|