Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines for calculating RI-MP2 gradients
10 : !> \par History
11 : !> 10.2013 created [Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_ri_grad_util
14 : !
15 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
16 : cp_blacs_env_release,&
17 : cp_blacs_env_type
18 : USE cp_dbcsr_api, ONLY: dbcsr_p_type,&
19 : dbcsr_type,&
20 : dbcsr_type_no_symmetry
21 : USE cp_dbcsr_operations, ONLY: copy_fm_to_dbcsr,&
22 : cp_dbcsr_m_by_n_from_template
23 : USE cp_fm_basic_linalg, ONLY: cp_fm_geadd
24 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
25 : cp_fm_struct_get,&
26 : cp_fm_struct_release,&
27 : cp_fm_struct_type
28 : USE cp_fm_types, ONLY: cp_fm_create,&
29 : cp_fm_get_info,&
30 : cp_fm_release,&
31 : cp_fm_set_all,&
32 : cp_fm_to_fm,&
33 : cp_fm_type
34 : USE group_dist_types, ONLY: create_group_dist,&
35 : get_group_dist,&
36 : group_dist_d1_type,&
37 : release_group_dist
38 : USE kinds, ONLY: dp
39 : USE libint_2c_3c, ONLY: compare_potential_types
40 : USE message_passing, ONLY: mp_para_env_type,&
41 : mp_request_null,&
42 : mp_request_type,&
43 : mp_waitall
44 : USE mp2_types, ONLY: integ_mat_buffer_type,&
45 : mp2_type
46 : USE parallel_gemm_api, ONLY: parallel_gemm
47 : USE util, ONLY: get_limit
48 : #include "./base/base_uses.f90"
49 :
50 : IMPLICIT NONE
51 :
52 : PRIVATE
53 :
54 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
55 :
56 : PUBLIC :: complete_gamma, array2fm, fm2array, prepare_redistribution, create_dbcsr_gamma
57 :
58 : TYPE index_map
59 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
60 : END TYPE
61 :
62 : CONTAINS
63 :
64 : ! **************************************************************************************************
65 : !> \brief complete the calculation of the Gamma matrices
66 : !> \param mp2_env ...
67 : !> \param B_ia_Q ...
68 : !> \param dimen_RI ...
69 : !> \param homo ...
70 : !> \param virtual ...
71 : !> \param para_env ...
72 : !> \param para_env_sub ...
73 : !> \param ngroup ...
74 : !> \param my_group_L_size ...
75 : !> \param my_group_L_start ...
76 : !> \param my_group_L_end ...
77 : !> \param my_B_size ...
78 : !> \param my_B_virtual_start ...
79 : !> \param gd_array ...
80 : !> \param gd_B_virtual ...
81 : !> \param kspin ...
82 : !> \author Mauro Del Ben
83 : ! **************************************************************************************************
84 302 : SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
85 : my_group_L_size, my_group_L_start, my_group_L_end, &
86 : my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
87 :
88 : TYPE(mp2_type) :: mp2_env
89 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
90 : INTENT(INOUT) :: B_ia_Q
91 : INTEGER, INTENT(IN) :: dimen_RI, homo, virtual
92 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
93 : INTEGER, INTENT(IN) :: ngroup, my_group_L_size, &
94 : my_group_L_start, my_group_L_end, &
95 : my_B_size, my_B_virtual_start
96 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_B_virtual
97 : INTEGER, INTENT(IN) :: kspin
98 :
99 : CHARACTER(LEN=*), PARAMETER :: routineN = 'complete_gamma'
100 :
101 : INTEGER :: dimen_ia, handle, i, ispin, kkB, my_ia_end, my_ia_size, my_ia_start, my_P_end, &
102 : my_P_size, my_P_start, nspins, pos_group, pos_sub
103 302 : INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
104 302 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
105 302 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIb_C_2D, Gamma_2D, Gamma_PQ
106 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
107 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_RI
108 : TYPE(cp_fm_type) :: fm_Gamma, fm_Gamma_PQ, fm_Gamma_PQ_2, fm_Gamma_PQ_temp, &
109 : fm_Gamma_PQ_temp_2, fm_ia_P, fm_Y, operator_half, PQ_half
110 302 : TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_P, &
111 302 : gd_P_new
112 :
113 302 : CALL timeset(routineN, handle)
114 :
115 : ! reshape the data into a 2D array
116 : ! reorder the local data in such a way to help the next stage of matrix creation
117 : ! now the data inside the group are divided into a ia x K matrix
118 302 : dimen_ia = homo*virtual
119 302 : CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
120 302 : CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
121 :
122 : CALL mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
123 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
124 :
125 : CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, Gamma_2D, homo, my_B_size, virtual, my_B_virtual_start, &
126 302 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
127 :
128 : ! create the processor map and size arrays
129 302 : CALL create_group_dist(gd_ia_new, para_env%num_pe)
130 302 : CALL create_group_dist(gd_array_new, para_env%num_pe)
131 302 : CALL create_group_dist(gd_P, para_env_sub%num_pe, dimen_RI)
132 302 : CALL create_group_dist(gd_P_new, para_env%num_pe)
133 :
134 302 : CALL get_group_dist(gd_P, para_env_sub%mepos, my_P_start, my_P_end, my_P_size)
135 :
136 : CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
137 : group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
138 :
139 906 : DO i = 0, para_env%num_pe - 1
140 : ! calculate position of the group
141 604 : pos_group = i/para_env_sub%num_pe
142 : ! calculate position in the subgroup
143 604 : pos_sub = pos_info(i)
144 : ! 1 -> rows, 2 -> cols
145 604 : CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
146 604 : CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
147 906 : CALL get_group_dist(gd_P, pos_sub, gd_P_new, i)
148 : END DO
149 :
150 : ! create the blacs env
151 302 : NULLIFY (blacs_env)
152 302 : CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
153 :
154 302 : NULLIFY (fm_struct_ia)
155 : CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
156 302 : ncol_global=dimen_RI, para_env=para_env)
157 :
158 : ! create the fm matrix Gamma
159 : CALL array2fm(Gamma_2D, fm_struct_ia, my_ia_start, my_ia_end, &
160 : my_group_L_start, my_group_L_end, &
161 : gd_ia_new, gd_array_new, &
162 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
163 302 : fm_Y)
164 : ! create the fm matrix B_ia_P
165 : CALL array2fm(BIb_C_2D, fm_struct_ia, my_ia_start, my_ia_end, &
166 : my_group_L_start, my_group_L_end, &
167 : gd_ia_new, gd_array_new, &
168 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
169 302 : fm_ia_P)
170 :
171 302 : NULLIFY (fm_struct_RI)
172 : CALL cp_fm_struct_create(fm_struct_RI, context=blacs_env, nrow_global=dimen_RI, &
173 302 : ncol_global=dimen_RI, para_env=para_env)
174 :
175 : ! since we will need (P|Q)^(-1/2) in the future, make a copy
176 : CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_RI, my_P_start, my_P_end, &
177 : my_group_L_start, my_group_L_end, &
178 : gd_P_new, gd_array_new, &
179 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
180 302 : PQ_half, do_release_mat=.FALSE.)
181 :
182 302 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
183 : CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_RI, my_P_start, my_P_end, &
184 : my_group_L_start, my_group_L_end, &
185 : gd_P_new, gd_array_new, &
186 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
187 8 : operator_half, do_release_mat=.FALSE.)
188 : END IF
189 :
190 302 : CALL release_group_dist(gd_P_new)
191 302 : CALL release_group_dist(gd_ia_new)
192 302 : CALL release_group_dist(gd_array_new)
193 :
194 : ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
195 302 : IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
196 294 : CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
197 294 : CALL cp_fm_struct_release(fm_struct_ia)
198 : ! perform the matrix multiplication
199 : CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
200 : matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
201 294 : matrix_c=fm_Gamma)
202 : ! release the Y matrix
203 294 : CALL cp_fm_release(fm_Y)
204 :
205 : ! complete gamma small (fm_Gamma_PQ)
206 : ! create temp matrix
207 294 : CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
208 : CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
209 : matrix_a=fm_Gamma, matrix_b=fm_ia_P, beta=0.0_dp, &
210 294 : matrix_c=fm_Gamma_PQ_temp)
211 294 : CALL cp_fm_release(fm_ia_P)
212 : ! create fm_Gamma_PQ matrix
213 294 : CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI, name="fm_Gamma_PQ")
214 294 : CALL cp_fm_struct_release(fm_struct_RI)
215 : ! perform matrix multiplication
216 : CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
217 : matrix_a=fm_Gamma_PQ_temp, matrix_b=PQ_half, beta=0.0_dp, &
218 294 : matrix_c=fm_Gamma_PQ)
219 294 : CALL cp_fm_release(fm_Gamma_PQ_temp)
220 294 : CALL cp_fm_release(PQ_half)
221 :
222 : ELSE
223 : ! create temp matrix
224 8 : CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
225 : CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
226 : matrix_a=fm_Y, matrix_b=fm_ia_P, beta=0.0_dp, &
227 8 : matrix_c=fm_Gamma_PQ_temp)
228 8 : CALL cp_fm_release(fm_ia_P)
229 :
230 8 : CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
231 8 : CALL cp_fm_struct_release(fm_struct_ia)
232 : ! perform the matrix multiplication
233 : CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
234 : matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
235 8 : matrix_c=fm_Gamma)
236 8 : CALL cp_fm_release(fm_Y)
237 :
238 8 : CALL cp_fm_create(fm_Gamma_PQ_temp_2, fm_struct_RI, name="fm_Gamma_PQ_temp_2")
239 : CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
240 : matrix_a=fm_Gamma_PQ_temp, matrix_b=operator_half, beta=0.0_dp, &
241 8 : matrix_c=fm_Gamma_PQ_temp_2)
242 :
243 8 : CALL cp_fm_create(fm_Gamma_PQ_2, fm_struct_RI, name="fm_Gamma_PQ_2")
244 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
245 : matrix_a=PQ_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
246 8 : matrix_c=fm_Gamma_PQ_temp)
247 8 : CALL cp_fm_to_fm(fm_Gamma_PQ_temp, fm_Gamma_PQ_2)
248 8 : CALL cp_fm_geadd(1.0_dp, "T", fm_Gamma_PQ_temp, 1.0_dp, fm_Gamma_PQ_2)
249 8 : CALL cp_fm_release(fm_Gamma_PQ_temp)
250 8 : CALL cp_fm_release(PQ_half)
251 :
252 8 : CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI)
253 8 : CALL cp_fm_struct_release(fm_struct_RI)
254 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=-1.0_dp, &
255 : matrix_a=operator_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
256 8 : matrix_c=fm_Gamma_PQ)
257 8 : CALL cp_fm_release(operator_half)
258 8 : CALL cp_fm_release(fm_Gamma_PQ_temp_2)
259 : END IF
260 :
261 : ! now redistribute the data, in this case we go back
262 : ! to the original way the integrals were distributed
263 : CALL fm2array(Gamma_2D, my_ia_size, my_ia_start, my_ia_end, &
264 : my_group_L_size, my_group_L_start, my_group_L_end, &
265 : group_grid_2_mepos, mepos_2_grid_group, &
266 : para_env_sub%num_pe, ngroup, &
267 302 : fm_Gamma)
268 :
269 1208 : ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
270 : CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
271 : my_group_L_size, my_group_L_start, my_group_L_end, &
272 : group_grid_2_mepos, mepos_2_grid_group, &
273 : para_env_sub%num_pe, ngroup, &
274 302 : fm_Gamma_PQ)
275 302 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
276 224 : CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ)
277 : ELSE
278 313178 : mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + Gamma_PQ
279 78 : DEALLOCATE (Gamma_PQ)
280 : END IF
281 :
282 302 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
283 24 : ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
284 : CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
285 : my_group_L_size, my_group_L_start, my_group_L_end, &
286 : group_grid_2_mepos, mepos_2_grid_group, &
287 : para_env_sub%num_pe, ngroup, &
288 8 : fm_Gamma_PQ_2)
289 8 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
290 8 : CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ_2)
291 : ELSE
292 0 : mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + Gamma_PQ
293 0 : DEALLOCATE (Gamma_PQ)
294 : END IF
295 : END IF
296 :
297 : ! allocate G_P_ia (DBCSR)
298 302 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
299 224 : nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
300 14270 : ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_L_size, nspins))
301 526 : DO ispin = 1, nspins
302 13598 : DO kkB = 1, my_group_L_size
303 13374 : NULLIFY (mp2_env%ri_grad%G_P_ia(kkB, ispin)%matrix)
304 : END DO
305 : END DO
306 : END IF
307 :
308 : ! create the Gamma_ia_P in DBCSR style
309 : CALL create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
310 : my_ia_start, my_ia_end, my_group_L_size, gd_ia, &
311 302 : mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
312 :
313 302 : DEALLOCATE (pos_info)
314 302 : DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
315 302 : CALL release_group_dist(gd_ia)
316 302 : CALL release_group_dist(gd_P)
317 :
318 : ! release blacs_env
319 302 : CALL cp_blacs_env_release(blacs_env)
320 :
321 302 : CALL timestop(handle)
322 :
323 2114 : END SUBROUTINE complete_gamma
324 :
325 : ! **************************************************************************************************
326 : !> \brief Redistribute a 3d matrix to a 2d matrix
327 : !> \param B_ia_Q ...
328 : !> \param BIb_C_2D ...
329 : !> \param homo ...
330 : !> \param my_B_size ...
331 : !> \param virtual ...
332 : !> \param my_B_virtual_start ...
333 : !> \param my_ia_start ...
334 : !> \param my_ia_end ...
335 : !> \param my_ia_size ...
336 : !> \param my_group_L_size ...
337 : !> \param para_env_sub ...
338 : !> \param gd_B_virtual ...
339 : ! **************************************************************************************************
340 302 : SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
341 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
342 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
343 : INTENT(INOUT) :: B_ia_Q
344 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
345 : INTENT(OUT) :: BIb_C_2D
346 : INTEGER, INTENT(IN) :: homo, my_B_size, virtual, &
347 : my_B_virtual_start, my_ia_start, &
348 : my_ia_end, my_ia_size, my_group_L_size
349 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
350 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual
351 :
352 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d'
353 :
354 : INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
355 : rec_B_virtual_end, rec_B_virtual_start
356 302 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C_rec
357 :
358 302 : CALL timeset(routineN, handle)
359 :
360 1208 : ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
361 709054 : BIb_C_2D = 0.0_dp
362 :
363 1388 : DO iiB = 1, homo
364 17032 : DO jjB = 1, my_B_size
365 15644 : ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
366 16730 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
367 693951 : BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(iiB, jjB, 1:my_group_L_size)
368 : END IF
369 : END DO
370 : END DO
371 :
372 310 : DO proc_shift = 1, para_env_sub%num_pe - 1
373 8 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
374 8 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
375 :
376 8 : CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
377 :
378 40 : ALLOCATE (BIb_C_rec(homo, rec_B_size, my_group_L_size))
379 47130 : BIb_C_rec = 0.0_dp
380 :
381 : CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
382 8 : BIb_C_rec, proc_receive)
383 :
384 48 : DO iiB = 1, homo
385 438 : DO jjB = 1, rec_B_size
386 390 : ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
387 430 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
388 17373 : BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(iiB, jjB, 1:my_group_L_size)
389 : END IF
390 : END DO
391 : END DO
392 :
393 318 : DEALLOCATE (BIb_C_rec)
394 : END DO
395 302 : DEALLOCATE (B_ia_Q)
396 :
397 302 : CALL timestop(handle)
398 604 : END SUBROUTINE mat_3d_to_2d
399 :
400 : ! **************************************************************************************************
401 : !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
402 : !> \param B_ia_Q ...
403 : !> \param BIb_C_2D ...
404 : !> \param homo ...
405 : !> \param my_B_size ...
406 : !> \param virtual ...
407 : !> \param my_B_virtual_start ...
408 : !> \param my_ia_start ...
409 : !> \param my_ia_end ...
410 : !> \param my_ia_size ...
411 : !> \param my_group_L_size ...
412 : !> \param para_env_sub ...
413 : !> \param gd_B_virtual ...
414 : ! **************************************************************************************************
415 302 : SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
416 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
417 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
418 : INTENT(INOUT) :: B_ia_Q
419 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
420 : INTENT(OUT) :: BIb_C_2D
421 : INTEGER, INTENT(IN) :: homo, my_B_size, virtual, &
422 : my_B_virtual_start, my_ia_start, &
423 : my_ia_end, my_ia_size, my_group_L_size
424 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
425 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual
426 :
427 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d_gamma'
428 :
429 : INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
430 : rec_B_virtual_end, rec_B_virtual_start
431 302 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C_rec
432 :
433 302 : CALL timeset(routineN, handle)
434 :
435 1208 : ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
436 709054 : BIb_C_2D = 0.0_dp
437 :
438 1388 : DO iiB = 1, homo
439 17032 : DO jjB = 1, my_B_size
440 15644 : ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
441 16730 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
442 693951 : BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(jjB, iiB, 1:my_group_L_size)
443 : END IF
444 : END DO
445 : END DO
446 :
447 310 : DO proc_shift = 1, para_env_sub%num_pe - 1
448 8 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
449 8 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
450 :
451 8 : CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
452 :
453 40 : ALLOCATE (BIb_C_rec(rec_B_size, homo, my_group_L_size))
454 43544 : BIb_C_rec = 0.0_dp
455 :
456 : CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
457 8 : BIb_C_rec, proc_receive)
458 :
459 48 : DO iiB = 1, homo
460 438 : DO jjB = 1, rec_B_size
461 390 : ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
462 430 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
463 17373 : BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(jjB, iiB, 1:my_group_L_size)
464 : END IF
465 : END DO
466 : END DO
467 :
468 318 : DEALLOCATE (BIb_C_rec)
469 : END DO
470 302 : DEALLOCATE (B_ia_Q)
471 :
472 302 : CALL timestop(handle)
473 604 : END SUBROUTINE mat_3d_to_2d_gamma
474 :
475 : ! **************************************************************************************************
476 : !> \brief redistribute local part of array to fm
477 : !> \param mat2D ...
478 : !> \param fm_struct ...
479 : !> \param my_start_row ...
480 : !> \param my_end_row ...
481 : !> \param my_start_col ...
482 : !> \param my_end_col ...
483 : !> \param gd_row ...
484 : !> \param gd_col ...
485 : !> \param group_grid_2_mepos ...
486 : !> \param ngroup_row ...
487 : !> \param ngroup_col ...
488 : !> \param fm_mat ...
489 : !> \param integ_group_size ...
490 : !> \param color_group ...
491 : !> \param do_release_mat whether to release the array (default: yes)
492 : ! **************************************************************************************************
493 1276 : SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
494 : my_start_col, my_end_col, &
495 : gd_row, gd_col, &
496 : group_grid_2_mepos, ngroup_row, ngroup_col, &
497 : fm_mat, integ_group_size, color_group, do_release_mat)
498 :
499 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
500 : INTENT(INOUT) :: mat2D
501 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
502 : INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
503 : my_end_col
504 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
505 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
506 : INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
507 : TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
508 : INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
509 : LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
510 :
511 : CHARACTER(LEN=*), PARAMETER :: routineN = 'array2fm'
512 :
513 : INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
514 : i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, &
515 : my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, &
516 : num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
517 : proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, &
518 : rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, &
519 : send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block
520 1276 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
521 1276 : map_send_size
522 1276 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
523 1276 : grid_2_mepos, grid_ref_2_send_pos, &
524 1276 : mepos_2_grid
525 1276 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
526 : LOGICAL :: convert_pos, my_do_release_mat
527 : REAL(KIND=dp) :: part_col, part_row
528 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
529 1276 : DIMENSION(:) :: buffer_rec, buffer_send
530 : TYPE(mp_para_env_type), POINTER :: para_env
531 1276 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
532 :
533 1276 : CALL timeset(routineN, handle)
534 :
535 1276 : my_do_release_mat = .TRUE.
536 1276 : IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
537 :
538 1276 : CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
539 :
540 : ! create the full matrix, (num_rows x num_cols)
541 1276 : CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
542 1276 : CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
543 :
544 : ! start filling procedure
545 : ! fill the matrix
546 : CALL cp_fm_get_info(matrix=fm_mat, &
547 : nrow_local=nrow_local, &
548 : ncol_local=ncol_local, &
549 : row_indices=row_indices, &
550 1276 : col_indices=col_indices)
551 1276 : myprow = fm_mat%matrix_struct%context%mepos(1)
552 1276 : mypcol = fm_mat%matrix_struct%context%mepos(2)
553 1276 : nprow = fm_mat%matrix_struct%context%num_pe(1)
554 1276 : npcol = fm_mat%matrix_struct%context%num_pe(2)
555 :
556 : ! start the communication part
557 : ! 0) create array containing the processes position
558 : ! and supporting infos
559 1276 : CALL timeset(routineN//"_info", handle2)
560 5104 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561 4960 : grid_2_mepos = 0
562 3828 : ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 1276 : grid_2_mepos(myprow, mypcol) = para_env%mepos
564 : ! sum infos
565 1276 : CALL para_env%sum(grid_2_mepos)
566 3828 : CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567 :
568 : ! 1) loop over my local data and define a map for the proc to send data
569 3828 : ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570 3684 : map_send_size = 0
571 1276 : dummy_proc = 0
572 83246 : DO jjB = my_start_col, my_end_col
573 81970 : send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
574 4439302 : DO iiB = my_start_row, my_end_row
575 4356056 : send_prow = fm_mat%matrix_struct%g2p_row(iiB)
576 4356056 : proc_send = grid_2_mepos(send_prow, send_pcol)
577 4438026 : map_send_size(proc_send) = map_send_size(proc_send) + 1
578 : END DO
579 : END DO
580 :
581 : ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
582 3828 : ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
583 3684 : map_rec_size = 0
584 1276 : part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
585 1276 : part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
586 :
587 : ! In some cases we have to convert global positions to positions in a subgroup (RPA)
588 1276 : convert_pos = .FALSE.
589 1276 : IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .TRUE.
590 :
591 50398 : DO jjB = 1, nrow_local
592 49122 : j_global = row_indices(jjB)
593 : ! check the group holding this element
594 : ! dirty way, if someone has a better idea ...
595 49122 : rec_prow = INT(REAL(j_global - 1, KIND=dp)/part_row)
596 49122 : rec_prow = MAX(0, rec_prow)
597 49122 : rec_prow = MIN(rec_prow, ngroup_row - 1)
598 : DO
599 49122 : itmp = get_limit(num_rows, ngroup_row, rec_prow)
600 49122 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
601 0 : IF (j_global < itmp(1)) rec_prow = rec_prow - 1
602 49122 : IF (j_global > itmp(2)) rec_prow = rec_prow + 1
603 : END DO
604 :
605 49122 : IF (convert_pos) THEN
606 : ! if the group is not in the same RPA group cycle
607 18188 : IF ((rec_prow/integ_group_size) .NE. color_group) CYCLE
608 : ! convert global position to position into the RPA group
609 12731 : rec_prow = MOD(rec_prow, integ_group_size)
610 : END IF
611 :
612 4400997 : DO iiB = 1, ncol_local
613 4356056 : i_global = col_indices(iiB)
614 : ! check the process in the group holding this element
615 4356056 : rec_pcol = INT(REAL(i_global - 1, KIND=dp)/part_col)
616 4356056 : rec_pcol = MAX(0, rec_pcol)
617 4356056 : rec_pcol = MIN(rec_pcol, ngroup_col - 1)
618 : DO
619 4356056 : itmp = get_limit(num_cols, ngroup_col, rec_pcol)
620 4356056 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
621 0 : IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
622 4356056 : IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
623 : END DO
624 :
625 4356056 : proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
626 :
627 4405178 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
628 :
629 : END DO ! i_global
630 : END DO ! j_global
631 :
632 : ! 3) check if the local data has to be stored in the new fm_mat
633 1276 : IF (map_rec_size(para_env%mepos) > 0) THEN
634 122402 : DO jjB = 1, ncol_local
635 121132 : j_global = col_indices(jjB)
636 122402 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
637 3833977 : DO iiB = 1, nrow_local
638 3752007 : i_global = row_indices(iiB)
639 3833977 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
640 2962308 : fm_mat%local_data(iiB, jjB) = mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1)
641 : END IF
642 : END DO
643 : END IF
644 : END DO
645 : END IF
646 1276 : CALL timestop(handle2)
647 :
648 : ! 4) reorder data in the send_buffer
649 1276 : CALL timeset(routineN//"_buffer_send", handle2)
650 : ! count the number of messages to send
651 1276 : number_of_send = 0
652 2408 : DO proc_shift = 1, para_env%num_pe - 1
653 1132 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
654 2408 : IF (map_send_size(proc_send) > 0) THEN
655 1006 : number_of_send = number_of_send + 1
656 : END IF
657 : END DO
658 : ! allocate the structure that will hold the messages to be sent
659 4564 : ALLOCATE (buffer_send(number_of_send))
660 : ! grid_ref_2_send_pos is an array (map) that given a pair
661 : ! (ref_send_prow,ref_send_pcol) returns
662 : ! the position in the buffer_send associated to that process
663 3828 : ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
664 4960 : grid_ref_2_send_pos = 0
665 : ! finalize the allocation of buffer_send with the actual size
666 : ! of each message (actual size is size_send_buffer)
667 1276 : send_counter = 0
668 2408 : DO proc_shift = 1, para_env%num_pe - 1
669 1132 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
670 1132 : size_send_buffer = map_send_size(proc_send)
671 2408 : IF (map_send_size(proc_send) > 0) THEN
672 1006 : send_counter = send_counter + 1
673 : ! allocate the sending buffer (msg)
674 3018 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
675 1394754 : buffer_send(send_counter)%msg = 0.0_dp
676 1006 : buffer_send(send_counter)%proc = proc_send
677 : ! get the pointer to prow, pcol of the process that has
678 : ! to receive this message
679 1006 : ref_send_prow = mepos_2_grid(1, proc_send)
680 1006 : ref_send_pcol = mepos_2_grid(2, proc_send)
681 : ! save the rank of the process that has to receive this message
682 1006 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
683 : END IF
684 : END DO
685 :
686 : ! loop over the locally held data and fill the buffer_send
687 : ! for doing that we need an array that keep track if the
688 : ! sequential increase of the index for each message
689 3558 : ALLOCATE (iii_vet(number_of_send))
690 2282 : iii_vet = 0
691 74792 : DO iiB = my_start_row, my_end_row
692 73516 : send_prow = fm_mat%matrix_struct%g2p_row(iiB)
693 4430848 : DO jjB = my_start_col, my_end_col
694 4356056 : send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
695 : ! we don't need to send to ourselves
696 4356056 : IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
697 :
698 1393748 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
699 1393748 : iii_vet(send_counter) = iii_vet(send_counter) + 1
700 1393748 : iii = iii_vet(send_counter)
701 4429572 : buffer_send(send_counter)%msg(iii) = mat2D(iiB - my_start_row + 1, jjB - my_start_col + 1)
702 : END DO
703 : END DO
704 :
705 1276 : DEALLOCATE (iii_vet)
706 1276 : DEALLOCATE (grid_ref_2_send_pos)
707 1276 : IF (my_do_release_mat) DEALLOCATE (mat2D)
708 1276 : CALL timestop(handle2)
709 :
710 : ! 5) similarly to what done for the buffer_send
711 : ! create the buffer for receive, post the message with irecv
712 : ! and send the messages non-blocking
713 1276 : CALL timeset(routineN//"_isendrecv", handle2)
714 : ! count the number of messages to be received
715 1276 : number_of_rec = 0
716 2408 : DO proc_shift = 1, para_env%num_pe - 1
717 1132 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
718 2408 : IF (map_rec_size(proc_receive) > 0) THEN
719 1006 : number_of_rec = number_of_rec + 1
720 : END IF
721 : END DO
722 :
723 4564 : ALLOCATE (buffer_rec(number_of_rec))
724 :
725 1276 : rec_counter = 0
726 2408 : DO proc_shift = 1, para_env%num_pe - 1
727 1132 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
728 1132 : size_rec_buffer = map_rec_size(proc_receive)
729 2408 : IF (map_rec_size(proc_receive) > 0) THEN
730 1006 : rec_counter = rec_counter + 1
731 : ! prepare the buffer for receive
732 3018 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
733 1394754 : buffer_rec(rec_counter)%msg = 0.0_dp
734 1006 : buffer_rec(rec_counter)%proc = proc_receive
735 : ! post the message to be received
736 : CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
737 1006 : buffer_rec(rec_counter)%msg_req)
738 : END IF
739 : END DO
740 :
741 : ! send messages
742 4564 : ALLOCATE (req_send(number_of_send))
743 1276 : send_counter = 0
744 2408 : DO proc_shift = 1, para_env%num_pe - 1
745 1132 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
746 2408 : IF (map_send_size(proc_send) > 0) THEN
747 1006 : send_counter = send_counter + 1
748 : CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
749 1006 : buffer_send(send_counter)%msg_req)
750 1006 : req_send(send_counter) = buffer_send(send_counter)%msg_req
751 : END IF
752 : END DO
753 1276 : CALL timestop(handle2)
754 :
755 : ! 6) fill the fm_mat matrix with the messages received from the
756 : ! other processes
757 1276 : CALL timeset(routineN//"_fill", handle2)
758 : ! In order to perform this step efficiently first we have to know the
759 : ! ranges of the blocks over which a given process hold its local data.
760 : ! Start with the rows ...
761 1276 : my_num_row_blocks = 1
762 49122 : DO iiB = 1, nrow_local - 1
763 47846 : IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
764 49122 : my_num_row_blocks = my_num_row_blocks + 1
765 : END DO
766 3828 : ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
767 5464 : blocks_ranges_row = 0
768 1276 : blocks_ranges_row(1, 1) = row_indices(1)
769 1276 : iii = 1
770 49122 : DO iiB = 1, nrow_local - 1
771 47846 : IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
772 120 : iii = iii + 1
773 120 : blocks_ranges_row(2, iii - 1) = row_indices(iiB)
774 49122 : blocks_ranges_row(1, iii) = row_indices(iiB + 1)
775 : END DO
776 1276 : blocks_ranges_row(2, my_num_row_blocks) = row_indices(MAX(nrow_local, 1))
777 : ! ... and columns
778 1276 : my_num_col_blocks = 1
779 121138 : DO jjB = 1, ncol_local - 1
780 119862 : IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
781 121138 : my_num_col_blocks = my_num_col_blocks + 1
782 : END DO
783 3828 : ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
784 5104 : blocks_ranges_col = 0
785 1276 : blocks_ranges_col(1, 1) = col_indices(1)
786 1276 : iii = 1
787 121138 : DO jjB = 1, ncol_local - 1
788 119862 : IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
789 0 : iii = iii + 1
790 0 : blocks_ranges_col(2, iii - 1) = col_indices(jjB)
791 121138 : blocks_ranges_col(1, iii) = col_indices(jjB + 1)
792 : END DO
793 1276 : blocks_ranges_col(2, my_num_col_blocks) = col_indices(MAX(ncol_local, 1))
794 :
795 1276 : rec_counter = 0
796 2408 : DO proc_shift = 1, para_env%num_pe - 1
797 1132 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
798 1132 : size_rec_buffer = map_rec_size(proc_receive)
799 :
800 2408 : IF (map_rec_size(proc_receive) > 0) THEN
801 1006 : rec_counter = rec_counter + 1
802 :
803 1006 : CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
804 :
805 : ! precompute the number of received columns and relative index
806 1006 : num_rec_cols = 0
807 2012 : DO jjB = 1, my_num_col_blocks
808 1006 : start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
809 1006 : end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
810 46888 : DO j_sub = start_col_block, end_col_block
811 45882 : num_rec_cols = num_rec_cols + 1
812 : END DO
813 : END DO
814 3018 : ALLOCATE (index_col_rec(num_rec_cols))
815 45882 : index_col_rec = 0
816 1006 : iii = 0
817 2012 : DO jjB = 1, my_num_col_blocks
818 1006 : start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
819 1006 : end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
820 46888 : DO j_sub = start_col_block, end_col_block
821 44876 : iii = iii + 1
822 44876 : j_local = fm_mat%matrix_struct%g2l_col(j_sub)
823 45882 : index_col_rec(iii) = j_local
824 : END DO
825 : END DO
826 :
827 1006 : CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
828 :
829 : ! wait for the message
830 1006 : CALL buffer_rec(rec_counter)%msg_req%wait()
831 :
832 : ! fill the local data in fm_mat
833 1006 : iii = 0
834 2132 : DO iiB = 1, my_num_row_blocks
835 1126 : start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start)
836 1126 : end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end)
837 33379 : DO i_sub = start_row_block, end_row_block
838 31247 : i_local = fm_mat%matrix_struct%g2l_row(i_sub)
839 1426121 : DO jjB = 1, num_rec_cols
840 1393748 : iii = iii + 1
841 1393748 : j_local = index_col_rec(jjB)
842 1424995 : fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
843 : END DO
844 : END DO
845 : END DO
846 :
847 1006 : DEALLOCATE (buffer_rec(rec_counter)%msg)
848 3018 : DEALLOCATE (index_col_rec)
849 : END IF
850 : END DO
851 2282 : DEALLOCATE (buffer_rec)
852 :
853 1276 : DEALLOCATE (blocks_ranges_row)
854 1276 : DEALLOCATE (blocks_ranges_col)
855 :
856 1276 : CALL timestop(handle2)
857 :
858 : ! 7) Finally wait for all messeges to be sent
859 1276 : CALL timeset(routineN//"_waitall", handle2)
860 1276 : CALL mp_waitall(req_send(:))
861 2282 : DO send_counter = 1, number_of_send
862 2282 : DEALLOCATE (buffer_send(send_counter)%msg)
863 : END DO
864 2282 : DEALLOCATE (buffer_send)
865 1276 : CALL timestop(handle2)
866 :
867 1276 : DEALLOCATE (map_send_size)
868 1276 : DEALLOCATE (map_rec_size)
869 1276 : DEALLOCATE (grid_2_mepos)
870 1276 : DEALLOCATE (mepos_2_grid)
871 :
872 1276 : CALL timestop(handle)
873 :
874 10208 : END SUBROUTINE array2fm
875 :
876 : ! **************************************************************************************************
877 : !> \brief redistribute fm to local part of array
878 : !> \param mat2D ...
879 : !> \param my_rows ...
880 : !> \param my_start_row ...
881 : !> \param my_end_row ...
882 : !> \param my_cols ...
883 : !> \param my_start_col ...
884 : !> \param my_end_col ...
885 : !> \param group_grid_2_mepos ...
886 : !> \param mepos_2_grid_group ...
887 : !> \param ngroup_row ...
888 : !> \param ngroup_col ...
889 : !> \param fm_mat ...
890 : ! **************************************************************************************************
891 706 : SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
892 : my_cols, my_start_col, my_end_col, &
893 : group_grid_2_mepos, mepos_2_grid_group, &
894 : ngroup_row, ngroup_col, &
895 : fm_mat)
896 :
897 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
898 : INTENT(OUT) :: mat2D
899 : INTEGER, INTENT(IN) :: my_rows, my_start_row, my_end_row, &
900 : my_cols, my_start_col, my_end_col
901 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
902 : INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
903 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
904 :
905 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fm2array'
906 :
907 : INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, &
908 : myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, num_rows, &
909 : number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_col_size, &
910 : rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, ref_send_prow, &
911 : send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
912 706 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
913 706 : map_send_size
914 706 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
915 706 : mepos_2_grid, sizes
916 706 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
917 : REAL(KIND=dp) :: part_col, part_row
918 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
919 706 : DIMENSION(:) :: buffer_rec, buffer_send
920 : TYPE(mp_para_env_type), POINTER :: para_env
921 706 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
922 :
923 706 : CALL timeset(routineN, handle)
924 :
925 : ! allocate the array
926 2824 : ALLOCATE (mat2D(my_rows, my_cols))
927 2184441 : mat2D = 0.0_dp
928 :
929 : ! start procedure
930 : ! get information of from the full matrix
931 : CALL cp_fm_get_info(matrix=fm_mat, &
932 : nrow_local=nrow_local, &
933 : ncol_local=ncol_local, &
934 : row_indices=row_indices, &
935 : col_indices=col_indices, &
936 : nrow_global=num_rows, &
937 : ncol_global=num_cols, &
938 706 : para_env=para_env)
939 706 : myprow = fm_mat%matrix_struct%context%mepos(1)
940 706 : mypcol = fm_mat%matrix_struct%context%mepos(2)
941 706 : nprow = fm_mat%matrix_struct%context%num_pe(1)
942 706 : npcol = fm_mat%matrix_struct%context%num_pe(2)
943 :
944 : ! start the communication part
945 : ! 0) create array containing the processes position
946 : ! and supporting infos
947 706 : CALL timeset(routineN//"_info", handle2)
948 2824 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
949 2824 : grid_2_mepos = 0
950 2118 : ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
951 :
952 : ! fill the info array
953 706 : grid_2_mepos(myprow, mypcol) = para_env%mepos
954 :
955 : ! sum infos
956 706 : CALL para_env%sum(grid_2_mepos)
957 2118 : CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
958 :
959 : ! 1) loop over my local data and define a map for the proc to send data
960 2118 : ALLOCATE (map_send_size(0:para_env%num_pe - 1))
961 2118 : map_send_size = 0
962 706 : part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
963 706 : part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
964 :
965 60252 : DO jjB = 1, ncol_local
966 59546 : j_global = col_indices(jjB)
967 : ! check the group holding this element
968 : ! dirty way, if someone has a better idea ...
969 59546 : send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
970 59546 : send_pcol = MAX(0, send_pcol)
971 59546 : send_pcol = MIN(send_pcol, ngroup_col - 1)
972 : DO
973 59546 : itmp = get_limit(num_cols, ngroup_col, send_pcol)
974 59546 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
975 0 : IF (j_global < itmp(1)) send_pcol = send_pcol - 1
976 59546 : IF (j_global > itmp(2)) send_pcol = send_pcol + 1
977 : END DO
978 :
979 2212778 : DO iiB = 1, nrow_local
980 2152526 : i_global = row_indices(iiB)
981 : ! check the process in the group holding this element
982 2152526 : send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
983 2152526 : send_prow = MAX(0, send_prow)
984 2152526 : send_prow = MIN(send_prow, ngroup_row - 1)
985 : DO
986 2152526 : itmp = get_limit(num_rows, ngroup_row, send_prow)
987 2152526 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
988 0 : IF (i_global < itmp(1)) send_prow = send_prow - 1
989 2152526 : IF (i_global > itmp(2)) send_prow = send_prow + 1
990 : END DO
991 :
992 2152526 : proc_send = group_grid_2_mepos(send_prow, send_pcol)
993 :
994 2212072 : map_send_size(proc_send) = map_send_size(proc_send) + 1
995 :
996 : END DO ! i_global
997 : END DO ! j_global
998 :
999 : ! 2) loop over my local data of the array and define a map for the proc from which rec data
1000 2118 : ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1001 2118 : map_rec_size = 0
1002 706 : dummy_proc = 0
1003 31915 : DO jjB = my_start_col, my_end_col
1004 31209 : rec_pcol = fm_mat%matrix_struct%g2p_col(jjB)
1005 2184441 : DO iiB = my_start_row, my_end_row
1006 2152526 : rec_prow = fm_mat%matrix_struct%g2p_row(iiB)
1007 2152526 : proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1008 2183735 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1009 : END DO
1010 : END DO
1011 :
1012 : ! 3) check if the local data in the fm_mat has to be stored in the new array
1013 706 : IF (map_rec_size(para_env%mepos) > 0) THEN
1014 60252 : DO jjB = 1, ncol_local
1015 59546 : j_global = col_indices(jjB)
1016 60252 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1017 1172346 : DO iiB = 1, nrow_local
1018 1141137 : i_global = row_indices(iiB)
1019 1172346 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1020 1140257 : mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iiB, jjB)
1021 : END IF
1022 : END DO
1023 : END IF
1024 : END DO
1025 : END IF
1026 706 : CALL timestop(handle2)
1027 :
1028 : ! 4) reorder data in the send_buffer
1029 706 : CALL timeset(routineN//"_buffer_send", handle2)
1030 : ! count the number of messages to send
1031 706 : number_of_send = 0
1032 1412 : DO proc_shift = 1, para_env%num_pe - 1
1033 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1034 1412 : IF (map_send_size(proc_send) > 0) THEN
1035 676 : number_of_send = number_of_send + 1
1036 : END IF
1037 : END DO
1038 : ! allocate the structure that will hold the messages to be sent
1039 2764 : ALLOCATE (buffer_send(number_of_send))
1040 : ! grid_ref_2_send_pos is an array (map) that given a pair
1041 : ! (ref_send_prow,ref_send_pcol) returns
1042 : ! the position in the buffer_send associated to that process
1043 :
1044 2824 : ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1045 3498 : grid_ref_2_send_pos = 0
1046 :
1047 : ! finalize the allocation of buffer_send with the actual size
1048 : ! of each message (actual size is size_send_buffer)
1049 706 : send_counter = 0
1050 1412 : DO proc_shift = 1, para_env%num_pe - 1
1051 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1052 706 : size_send_buffer = map_send_size(proc_send)
1053 1412 : IF (map_send_size(proc_send) > 0) THEN
1054 676 : send_counter = send_counter + 1
1055 : ! allocate the sending buffer (msg)
1056 2028 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1057 1012945 : buffer_send(send_counter)%msg = 0.0_dp
1058 676 : buffer_send(send_counter)%proc = proc_send
1059 : ! get the pointer to prow, pcol of the process that has
1060 : ! to receive this message
1061 676 : ref_send_prow = mepos_2_grid_group(1, proc_send)
1062 676 : ref_send_pcol = mepos_2_grid_group(2, proc_send)
1063 : ! save the rank of the process that has to receive this message
1064 676 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1065 : END IF
1066 : END DO
1067 :
1068 : ! loop over the locally held data and fill the buffer_send
1069 : ! for doing that we need an array that keep track if the
1070 : ! sequential increase of the index for each message
1071 2088 : ALLOCATE (iii_vet(number_of_send))
1072 1382 : iii_vet = 0
1073 60252 : DO jjB = 1, ncol_local
1074 59546 : j_global = col_indices(jjB)
1075 : ! check the group holding this element
1076 : ! dirty way, if someone has a better idea ...
1077 59546 : send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
1078 59546 : send_pcol = MAX(0, send_pcol)
1079 59546 : send_pcol = MIN(send_pcol, ngroup_col - 1)
1080 : DO
1081 59546 : itmp = get_limit(num_cols, ngroup_col, send_pcol)
1082 59546 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1083 0 : IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1084 59546 : IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1085 : END DO
1086 :
1087 2212778 : DO iiB = 1, nrow_local
1088 2152526 : i_global = row_indices(iiB)
1089 : ! check the process in the group holding this element
1090 2152526 : send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
1091 2152526 : send_prow = MAX(0, send_prow)
1092 2152526 : send_prow = MIN(send_prow, ngroup_row - 1)
1093 : DO
1094 2152526 : itmp = get_limit(num_rows, ngroup_row, send_prow)
1095 2152526 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1096 0 : IF (i_global < itmp(1)) send_prow = send_prow - 1
1097 2152526 : IF (i_global > itmp(2)) send_prow = send_prow + 1
1098 : END DO
1099 : ! we don't need to send to ourselves
1100 2152526 : IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
1101 :
1102 1012269 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1103 1012269 : iii_vet(send_counter) = iii_vet(send_counter) + 1
1104 1012269 : iii = iii_vet(send_counter)
1105 2212072 : buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iiB, jjB)
1106 : END DO
1107 : END DO
1108 :
1109 706 : DEALLOCATE (iii_vet)
1110 706 : DEALLOCATE (grid_ref_2_send_pos)
1111 706 : CALL timestop(handle2)
1112 :
1113 : ! 5) similarly to what done for the buffer_send
1114 : ! create the buffer for receive, post the message with irecv
1115 : ! and send the messages with non-blocking
1116 706 : CALL timeset(routineN//"_isendrecv", handle2)
1117 : ! count the number of messages to be received
1118 706 : number_of_rec = 0
1119 1412 : DO proc_shift = 1, para_env%num_pe - 1
1120 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1121 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1122 676 : number_of_rec = number_of_rec + 1
1123 : END IF
1124 : END DO
1125 :
1126 2764 : ALLOCATE (buffer_rec(number_of_rec))
1127 :
1128 706 : rec_counter = 0
1129 1412 : DO proc_shift = 1, para_env%num_pe - 1
1130 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1131 706 : size_rec_buffer = map_rec_size(proc_receive)
1132 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1133 676 : rec_counter = rec_counter + 1
1134 : ! prepare the buffer for receive
1135 2028 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1136 1012945 : buffer_rec(rec_counter)%msg = 0.0_dp
1137 676 : buffer_rec(rec_counter)%proc = proc_receive
1138 : ! post the message to be received
1139 : CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1140 676 : buffer_rec(rec_counter)%msg_req)
1141 : END IF
1142 : END DO
1143 :
1144 : ! send messages
1145 2764 : ALLOCATE (req_send(number_of_send))
1146 706 : send_counter = 0
1147 1412 : DO proc_shift = 1, para_env%num_pe - 1
1148 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1149 1412 : IF (map_send_size(proc_send) > 0) THEN
1150 676 : send_counter = send_counter + 1
1151 : CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1152 676 : buffer_send(send_counter)%msg_req)
1153 676 : req_send(send_counter) = buffer_send(send_counter)%msg_req
1154 : END IF
1155 : END DO
1156 706 : CALL timestop(handle2)
1157 :
1158 : ! 6) fill the fm_mat matrix with the messages received from the
1159 : ! other processes
1160 706 : CALL timeset(routineN//"_fill", handle2)
1161 : CALL cp_fm_get_info(matrix=fm_mat, &
1162 : nrow_local=nrow_local, &
1163 706 : ncol_local=ncol_local)
1164 2118 : ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1165 2118 : CALL para_env%allgather([nrow_local, ncol_local], sizes)
1166 2118 : iiB = MAXVAL(sizes(1, :))
1167 2118 : ALLOCATE (index_row_rec(iiB))
1168 25812 : index_row_rec = 0
1169 706 : rec_counter = 0
1170 1412 : DO proc_shift = 1, para_env%num_pe - 1
1171 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1172 706 : size_rec_buffer = map_rec_size(proc_receive)
1173 :
1174 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1175 676 : rec_counter = rec_counter + 1
1176 :
1177 676 : rec_col_size = sizes(2, proc_receive)
1178 676 : rec_row_size = sizes(1, proc_receive)
1179 :
1180 : ! precompute the indeces of the rows
1181 676 : num_rec_rows = 0
1182 24290 : DO iiB = 1, rec_row_size
1183 23614 : i_global = fm_mat%matrix_struct%l2g_row(iiB, mepos_2_grid(1, proc_receive))
1184 24290 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1185 23489 : num_rec_rows = num_rec_rows + 1
1186 23489 : index_row_rec(num_rec_rows) = i_global
1187 : END IF
1188 : END DO
1189 :
1190 676 : CALL buffer_rec(rec_counter)%msg_req%wait()
1191 :
1192 676 : iii = 0
1193 57570 : DO jjB = 1, rec_col_size
1194 56894 : j_global = fm_mat%matrix_struct%l2g_col(jjB, mepos_2_grid(2, proc_receive))
1195 57570 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1196 1040826 : DO iiB = 1, num_rec_rows
1197 1012269 : i_global = index_row_rec(iiB)
1198 1012269 : iii = iii + 1
1199 1040826 : mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1200 : END DO
1201 : END IF
1202 : END DO
1203 :
1204 676 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1205 : END IF
1206 : END DO
1207 1382 : DEALLOCATE (buffer_rec)
1208 706 : DEALLOCATE (index_row_rec)
1209 706 : CALL cp_fm_release(fm_mat)
1210 706 : CALL timestop(handle2)
1211 :
1212 : ! 7) Finally wait for all messeges to be sent
1213 706 : CALL timeset(routineN//"_waitall", handle2)
1214 706 : CALL mp_waitall(req_send(:))
1215 1382 : DO send_counter = 1, number_of_send
1216 1382 : DEALLOCATE (buffer_send(send_counter)%msg)
1217 : END DO
1218 1382 : DEALLOCATE (buffer_send)
1219 706 : CALL timestop(handle2)
1220 :
1221 706 : CALL timestop(handle)
1222 :
1223 4942 : END SUBROUTINE fm2array
1224 :
1225 : ! **************************************************************************************************
1226 : !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1227 : !> \param Gamma_2D ...
1228 : !> \param homo ...
1229 : !> \param virtual ...
1230 : !> \param dimen_ia ...
1231 : !> \param para_env_sub ...
1232 : !> \param my_ia_start ...
1233 : !> \param my_ia_end ...
1234 : !> \param my_group_L_size ...
1235 : !> \param gd_ia ...
1236 : !> \param dbcsr_Gamma ...
1237 : !> \param mo_coeff_o ...
1238 : ! **************************************************************************************************
1239 350 : SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1240 : my_ia_start, my_ia_end, my_group_L_size, &
1241 350 : gd_ia, dbcsr_Gamma, mo_coeff_o)
1242 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: Gamma_2D
1243 : INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1244 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1245 : INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_L_size
1246 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1247 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_Gamma
1248 : TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1249 :
1250 : CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma'
1251 :
1252 : INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, &
1253 : j_local, jjB, jjj, kkB, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, &
1254 : number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, &
1255 : rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, &
1256 : ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
1257 350 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1258 350 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1259 350 : indeces_map_my, mepos_2_grid
1260 350 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1261 : REAL(KIND=dp) :: part_ia
1262 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1263 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
1264 : TYPE(cp_fm_type) :: fm_ia
1265 350 : TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1266 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1267 350 : DIMENSION(:) :: buffer_rec, buffer_send
1268 350 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1269 :
1270 350 : CALL timeset(routineN, handle)
1271 :
1272 : ! create sub blacs env
1273 350 : NULLIFY (blacs_env)
1274 350 : CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1275 :
1276 : ! create the fm_ia buffer matrix
1277 350 : NULLIFY (fm_struct)
1278 : CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1279 350 : ncol_global=virtual, para_env=para_env_sub)
1280 350 : CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1281 : ! release structure
1282 350 : CALL cp_fm_struct_release(fm_struct)
1283 : ! release blacs_env
1284 350 : CALL cp_blacs_env_release(blacs_env)
1285 :
1286 : ! get array information
1287 : CALL cp_fm_get_info(matrix=fm_ia, &
1288 : nrow_local=nrow_local, &
1289 : ncol_local=ncol_local, &
1290 : row_indices=row_indices, &
1291 350 : col_indices=col_indices)
1292 350 : myprow = fm_ia%matrix_struct%context%mepos(1)
1293 350 : mypcol = fm_ia%matrix_struct%context%mepos(2)
1294 350 : nprow = fm_ia%matrix_struct%context%num_pe(1)
1295 350 : npcol = fm_ia%matrix_struct%context%num_pe(2)
1296 :
1297 : ! 0) create array containing the processes position and supporting infos
1298 1400 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1299 1066 : grid_2_mepos = 0
1300 1050 : ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1301 : ! fill the info array
1302 350 : grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1303 : ! sum infos
1304 350 : CALL para_env_sub%sum(grid_2_mepos)
1305 1050 : CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1306 :
1307 : ! loop over local index range and define the sending map
1308 1050 : ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1309 716 : map_send_size = 0
1310 350 : dummy_proc = 0
1311 19490 : DO iaia = my_ia_start, my_ia_end
1312 19140 : i_global = (iaia - 1)/virtual + 1
1313 19140 : j_global = MOD(iaia - 1, virtual) + 1
1314 19140 : send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1315 19140 : send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1316 19140 : proc_send = grid_2_mepos(send_prow, send_pcol)
1317 19490 : map_send_size(proc_send) = map_send_size(proc_send) + 1
1318 : END DO
1319 :
1320 : ! loop over local data of fm_ia and define the receiving map
1321 1050 : ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1322 716 : map_rec_size = 0
1323 350 : part_ia = REAL(dimen_ia, KIND=dp)/REAL(para_env_sub%num_pe, KIND=dp)
1324 :
1325 1584 : DO iiB = 1, nrow_local
1326 1234 : i_global = row_indices(iiB)
1327 20724 : DO jjB = 1, ncol_local
1328 19140 : j_global = col_indices(jjB)
1329 19140 : iaia = (i_global - 1)*virtual + j_global
1330 19140 : proc_receive = INT(REAL(iaia - 1, KIND=dp)/part_ia)
1331 19140 : proc_receive = MAX(0, proc_receive)
1332 19140 : proc_receive = MIN(proc_receive, para_env_sub%num_pe - 1)
1333 : DO
1334 19140 : itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1335 19140 : IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1336 0 : IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1337 19140 : IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1338 : END DO
1339 20374 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1340 : END DO
1341 : END DO
1342 :
1343 : ! allocate the buffer for sending data
1344 350 : number_of_send = 0
1345 366 : DO proc_shift = 1, para_env_sub%num_pe - 1
1346 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1347 366 : IF (map_send_size(proc_send) > 0) THEN
1348 2 : number_of_send = number_of_send + 1
1349 : END IF
1350 : END DO
1351 : ! allocate the structure that will hold the messages to be sent
1352 704 : ALLOCATE (buffer_send(number_of_send))
1353 : ! and the map from the grid of processess to the message position
1354 1050 : ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1355 1066 : grid_ref_2_send_pos = 0
1356 : ! finally allocate each message
1357 350 : send_counter = 0
1358 366 : DO proc_shift = 1, para_env_sub%num_pe - 1
1359 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1360 16 : size_send_buffer = map_send_size(proc_send)
1361 366 : IF (map_send_size(proc_send) > 0) THEN
1362 2 : send_counter = send_counter + 1
1363 : ! allocate the sending buffer (msg)
1364 6 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1365 2 : buffer_send(send_counter)%proc = proc_send
1366 : ! get the pointer to prow, pcol of the process that has
1367 : ! to receive this message
1368 2 : ref_send_prow = mepos_2_grid(1, proc_send)
1369 2 : ref_send_pcol = mepos_2_grid(2, proc_send)
1370 : ! save the rank of the process that has to receive this message
1371 2 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1372 : END IF
1373 : END DO
1374 :
1375 : ! allocate the buffer for receiving data
1376 : number_of_rec = 0
1377 366 : DO proc_shift = 1, para_env_sub%num_pe - 1
1378 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1379 366 : IF (map_rec_size(proc_receive) > 0) THEN
1380 2 : number_of_rec = number_of_rec + 1
1381 : END IF
1382 : END DO
1383 : ! allocate the structure that will hold the messages to be received
1384 : ! and relative indeces
1385 704 : ALLOCATE (buffer_rec(number_of_rec))
1386 704 : ALLOCATE (indeces_rec(number_of_rec))
1387 : ! finally allocate each message and fill the array of indeces
1388 350 : rec_counter = 0
1389 366 : DO proc_shift = 1, para_env_sub%num_pe - 1
1390 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1391 16 : size_rec_buffer = map_rec_size(proc_receive)
1392 366 : IF (map_rec_size(proc_receive) > 0) THEN
1393 2 : rec_counter = rec_counter + 1
1394 : ! prepare the buffer for receive
1395 6 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1396 2 : buffer_rec(rec_counter)%proc = proc_receive
1397 : ! create the indeces array
1398 6 : ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1399 59 : indeces_rec(rec_counter)%map = 0
1400 2 : CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1401 2 : iii = 0
1402 120 : DO iaia = rec_iaia_start, rec_iaia_end
1403 118 : i_global = (iaia - 1)/virtual + 1
1404 118 : j_global = MOD(iaia - 1, virtual) + 1
1405 118 : rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1406 118 : rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1407 118 : IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
1408 19 : iii = iii + 1
1409 19 : i_local = fm_ia%matrix_struct%g2l_row(i_global)
1410 19 : j_local = fm_ia%matrix_struct%g2l_col(j_global)
1411 19 : indeces_rec(rec_counter)%map(1, iii) = i_local
1412 120 : indeces_rec(rec_counter)%map(2, iii) = j_local
1413 : END DO
1414 : END IF
1415 : END DO
1416 : ! and create the index map for my local data
1417 350 : IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1418 350 : size_rec_buffer = map_rec_size(para_env_sub%mepos)
1419 1050 : ALLOCATE (indeces_map_my(2, size_rec_buffer))
1420 57713 : indeces_map_my = 0
1421 : iii = 0
1422 19490 : DO iaia = my_ia_start, my_ia_end
1423 19140 : i_global = (iaia - 1)/virtual + 1
1424 19140 : j_global = MOD(iaia - 1, virtual) + 1
1425 19140 : rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1426 19140 : rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1427 19140 : IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
1428 19121 : iii = iii + 1
1429 19121 : i_local = fm_ia%matrix_struct%g2l_row(i_global)
1430 19121 : j_local = fm_ia%matrix_struct%g2l_col(j_global)
1431 19121 : indeces_map_my(1, iii) = i_local
1432 19490 : indeces_map_my(2, iii) = j_local
1433 : END DO
1434 : END IF
1435 :
1436 : ! auxiliary vector of indeces for the send buffer
1437 702 : ALLOCATE (iii_vet(number_of_send))
1438 : ! vector for the send requests
1439 704 : ALLOCATE (req_send(number_of_send))
1440 : ! loop over auxiliary basis function and redistribute into a fm
1441 : ! and then compy the fm into a dbcsr matrix
1442 15858 : DO kkB = 1, my_group_L_size
1443 : ! zero the matries of the buffers and post the messages to be received
1444 15508 : CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1445 15508 : rec_counter = 0
1446 16944 : DO proc_shift = 1, para_env_sub%num_pe - 1
1447 1436 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1448 16944 : IF (map_rec_size(proc_receive) > 0) THEN
1449 220 : rec_counter = rec_counter + 1
1450 2310 : buffer_rec(rec_counter)%msg = 0.0_dp
1451 : CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1452 220 : buffer_rec(rec_counter)%msg_req)
1453 : END IF
1454 : END DO
1455 : ! fill the sending buffer and send the messages
1456 15728 : DO send_counter = 1, number_of_send
1457 17818 : buffer_send(send_counter)%msg = 0.0_dp
1458 : END DO
1459 15728 : iii_vet = 0
1460 : jjj = 0
1461 878464 : DO iaia = my_ia_start, my_ia_end
1462 862956 : i_global = (iaia - 1)/virtual + 1
1463 862956 : j_global = MOD(iaia - 1, virtual) + 1
1464 862956 : send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1465 862956 : send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1466 862956 : proc_send = grid_2_mepos(send_prow, send_pcol)
1467 : ! we don't need to send to ourselves
1468 878464 : IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1469 : ! filling fm_ia with local data
1470 860866 : jjj = jjj + 1
1471 860866 : i_local = indeces_map_my(1, jjj)
1472 860866 : j_local = indeces_map_my(2, jjj)
1473 860866 : fm_ia%local_data(i_local, j_local) = Gamma_2D(iaia - my_ia_start + 1, kkB)
1474 : ELSE
1475 2090 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1476 2090 : iii_vet(send_counter) = iii_vet(send_counter) + 1
1477 2090 : iii = iii_vet(send_counter)
1478 2090 : buffer_send(send_counter)%msg(iii) = Gamma_2D(iaia - my_ia_start + 1, kkB)
1479 : END IF
1480 : END DO
1481 15728 : req_send = mp_request_null
1482 15508 : send_counter = 0
1483 16944 : DO proc_shift = 1, para_env_sub%num_pe - 1
1484 1436 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1485 16944 : IF (map_send_size(proc_send) > 0) THEN
1486 220 : send_counter = send_counter + 1
1487 : CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1488 220 : buffer_send(send_counter)%msg_req)
1489 220 : req_send(send_counter) = buffer_send(send_counter)%msg_req
1490 : END IF
1491 : END DO
1492 :
1493 : ! receive the massages and fill the fm_ia
1494 15508 : rec_counter = 0
1495 16944 : DO proc_shift = 1, para_env_sub%num_pe - 1
1496 1436 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1497 1436 : size_rec_buffer = map_rec_size(proc_receive)
1498 16944 : IF (map_rec_size(proc_receive) > 0) THEN
1499 220 : rec_counter = rec_counter + 1
1500 : ! wait for the message
1501 220 : CALL buffer_rec(rec_counter)%msg_req%wait()
1502 2310 : DO iii = 1, size_rec_buffer
1503 2090 : i_local = indeces_rec(rec_counter)%map(1, iii)
1504 2090 : j_local = indeces_rec(rec_counter)%map(2, iii)
1505 2310 : fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1506 : END DO
1507 : END IF
1508 : END DO
1509 :
1510 : ! wait all
1511 15508 : CALL mp_waitall(req_send(:))
1512 :
1513 : ! now create the DBCSR matrix and copy fm_ia into it
1514 15508 : ALLOCATE (dbcsr_Gamma(kkB)%matrix)
1515 : CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma(kkB)%matrix, &
1516 : template=mo_coeff_o, &
1517 15508 : m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1518 15858 : CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma(kkB)%matrix, keep_sparsity=.FALSE.)
1519 :
1520 : END DO
1521 :
1522 : ! deallocate stuff
1523 350 : DEALLOCATE (Gamma_2D)
1524 350 : DEALLOCATE (iii_vet)
1525 350 : DEALLOCATE (req_send)
1526 350 : IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1527 350 : DEALLOCATE (indeces_map_my)
1528 : END IF
1529 352 : DO rec_counter = 1, number_of_rec
1530 2 : DEALLOCATE (indeces_rec(rec_counter)%map)
1531 352 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1532 : END DO
1533 352 : DEALLOCATE (indeces_rec)
1534 352 : DEALLOCATE (buffer_rec)
1535 352 : DO send_counter = 1, number_of_send
1536 352 : DEALLOCATE (buffer_send(send_counter)%msg)
1537 : END DO
1538 352 : DEALLOCATE (buffer_send)
1539 350 : DEALLOCATE (map_send_size)
1540 350 : DEALLOCATE (map_rec_size)
1541 350 : DEALLOCATE (grid_2_mepos)
1542 350 : DEALLOCATE (mepos_2_grid)
1543 :
1544 : ! release buffer matrix
1545 350 : CALL cp_fm_release(fm_ia)
1546 :
1547 350 : CALL timestop(handle)
1548 :
1549 1400 : END SUBROUTINE create_dbcsr_gamma
1550 :
1551 : ! **************************************************************************************************
1552 : !> \brief prepare array for redistribution
1553 : !> \param para_env ...
1554 : !> \param para_env_sub ...
1555 : !> \param ngroup ...
1556 : !> \param group_grid_2_mepos ...
1557 : !> \param mepos_2_grid_group ...
1558 : !> \param pos_info ...
1559 : ! **************************************************************************************************
1560 344 : SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1561 : group_grid_2_mepos, mepos_2_grid_group, &
1562 : pos_info)
1563 : TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1564 : INTEGER, INTENT(IN) :: ngroup
1565 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1566 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1567 : OPTIONAL :: pos_info
1568 :
1569 : INTEGER :: i, pos_group, pos_sub
1570 344 : INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1571 :
1572 1032 : ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1573 344 : CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1574 :
1575 1376 : ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1576 1704 : group_grid_2_mepos = 0
1577 1032 : ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1578 2408 : mepos_2_grid_group = 0
1579 :
1580 1032 : DO i = 0, para_env%num_pe - 1
1581 : ! calculate position of the group
1582 688 : pos_group = i/para_env_sub%num_pe
1583 : ! calculate position in the subgroup
1584 688 : pos_sub = my_pos_info(i)
1585 : ! fill the map from the grid of groups to process
1586 688 : group_grid_2_mepos(pos_sub, pos_group) = i
1587 : ! and the opposite, from the global pos to the grid pos
1588 688 : mepos_2_grid_group(1, i) = pos_sub
1589 1032 : mepos_2_grid_group(2, i) = pos_group
1590 : END DO
1591 :
1592 344 : IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1593 :
1594 344 : END SUBROUTINE prepare_redistribution
1595 :
1596 0 : END MODULE mp2_ri_grad_util
|