Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines for data exchange between MPI processes
10 : !> \par History
11 : !> 04.2008 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_communication
15 : USE atomic_kind_types, ONLY: atomic_kind_type,&
16 : get_atomic_kind_set
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_api, ONLY: dbcsr_get_block_p,&
19 : dbcsr_iterator_blocks_left,&
20 : dbcsr_iterator_next_block,&
21 : dbcsr_iterator_start,&
22 : dbcsr_iterator_stop,&
23 : dbcsr_iterator_type,&
24 : dbcsr_p_type,&
25 : dbcsr_type
26 : USE hfx_types, ONLY: hfx_2D_map,&
27 : hfx_basis_type,&
28 : hfx_type
29 : USE kinds, ONLY: dp,&
30 : int_8
31 : USE message_passing, ONLY: mp_para_env_type,&
32 : mp_request_type,&
33 : mp_waitall
34 : USE particle_types, ONLY: particle_type
35 : USE qs_environment_types, ONLY: get_qs_env,&
36 : qs_environment_type
37 : #include "./base/base_uses.f90"
38 :
39 : IMPLICIT NONE
40 : PRIVATE
41 :
42 : PUBLIC :: get_full_density, &
43 : distribute_ks_matrix, &
44 : scale_and_add_fock_to_ks_matrix, &
45 : get_atomic_block_maps
46 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
47 :
48 : !***
49 :
50 : CONTAINS
51 :
52 : ! **************************************************************************************************
53 : !> \brief - Collects full density matrix from all CPUs
54 : !> \param para_env ...
55 : !> \param full_density The full Density matrix
56 : !> \param rho Distributed density
57 : !> \param number_of_p_entries Maximal buffer size
58 : !> \param block_offset ...
59 : !> \param kind_of ...
60 : !> \param basis_parameter ...
61 : !> \param get_max_vals_spin ...
62 : !> \param rho_beta ...
63 : !> \param antisymmetric ...
64 : !> \par History
65 : !> 11.2007 created [Manuel Guidon]
66 : !> \author Manuel Guidon
67 : !> \note
68 : !> - Communication with left/right node only
69 : !> added a mpi_sync before and after the ring of isendrecv. This *speed up* the
70 : !> communication, and might protect against idle neighbors flooding a busy node
71 : !> with messages [Joost]
72 : ! **************************************************************************************************
73 42971 : SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
74 : block_offset, kind_of, basis_parameter, &
75 : get_max_vals_spin, rho_beta, antisymmetric)
76 :
77 : TYPE(mp_para_env_type), POINTER :: para_env
78 : REAL(dp), DIMENSION(:) :: full_density
79 : TYPE(dbcsr_type), POINTER :: rho
80 : INTEGER, INTENT(IN) :: number_of_p_entries
81 : INTEGER, DIMENSION(:), POINTER :: block_offset
82 : INTEGER :: kind_of(*)
83 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
84 : LOGICAL, INTENT(IN) :: get_max_vals_spin
85 : TYPE(dbcsr_type), OPTIONAL, POINTER :: rho_beta
86 : LOGICAL, INTENT(IN) :: antisymmetric
87 :
88 : INTEGER :: blk, block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, &
89 : jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source, source_cpu
90 42971 : INTEGER, DIMENSION(:), POINTER :: nsgfa, nsgfb
91 : LOGICAL :: found
92 : REAL(dp) :: symmfac
93 42971 : REAL(dp), DIMENSION(:), POINTER :: recbuffer, sendbuffer, swapbuffer
94 42971 : REAL(dp), DIMENSION(:, :), POINTER :: sparse_block, sparse_block_beta
95 : TYPE(dbcsr_iterator_type) :: iter
96 128913 : TYPE(mp_request_type), DIMENSION(2) :: req
97 :
98 12707036 : full_density = 0.0_dp
99 128913 : ALLOCATE (sendbuffer(number_of_p_entries))
100 85942 : ALLOCATE (recbuffer(number_of_p_entries))
101 :
102 42971 : i = 1
103 42971 : CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
104 176478 : DO WHILE (dbcsr_iterator_blocks_left(iter))
105 133507 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
106 : ! the resulting vector will eb only the upper triangle.
107 : ! in case of antisymmetry take care to change signs if a lower block gets copied
108 133507 : symmfac = 1.0_dp
109 133507 : IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
110 133507 : ikind = kind_of(iatom)
111 133507 : nseta = basis_parameter(ikind)%nset
112 133507 : nsgfa => basis_parameter(ikind)%nsgf
113 133507 : jkind = kind_of(jatom)
114 133507 : nsetb = basis_parameter(jkind)%nset
115 133507 : nsgfb => basis_parameter(jkind)%nsgf
116 176478 : IF (get_max_vals_spin) THEN
117 : CALL dbcsr_get_block_p(rho_beta, &
118 0 : row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
119 0 : pa = 0
120 0 : DO iset = 1, nseta
121 : pb = 0
122 0 : DO jset = 1, nsetb
123 0 : DO pa1 = pa + 1, pa + nsgfa(iset)
124 0 : DO pb1 = pb + 1, pb + nsgfb(jset)
125 0 : sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
126 0 : i = i + 1
127 : END DO
128 : END DO
129 0 : pb = pb + nsgfb(jset)
130 : END DO
131 0 : pa = pa + nsgfa(iset)
132 : END DO
133 : ELSE
134 : pa = 0
135 523306 : DO iset = 1, nseta
136 : pb = 0
137 1619473 : DO jset = 1, nsetb
138 4048259 : DO pa1 = pa + 1, pa + nsgfa(iset)
139 10370537 : DO pb1 = pb + 1, pb + nsgfb(jset)
140 6322278 : sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
141 9140863 : i = i + 1
142 : END DO
143 : END DO
144 1619473 : pb = pb + nsgfb(jset)
145 : END DO
146 523306 : pa = pa + nsgfa(iset)
147 : END DO
148 : END IF
149 : END DO
150 42971 : CALL dbcsr_iterator_stop(iter)
151 :
152 : ! sync before/after a ring of isendrecv
153 42971 : CALL para_env%sync()
154 42971 : ncpu = para_env%num_pe
155 42971 : mepos = para_env%mepos
156 42971 : dest = MODULO(mepos + 1, ncpu)
157 42971 : source = MODULO(mepos - 1, ncpu)
158 128830 : DO icpu = 0, ncpu - 1
159 85859 : IF (icpu .NE. ncpu - 1) THEN
160 : CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
161 42888 : req(1), req(2), 13)
162 : END IF
163 85859 : data_from = MODULO(mepos - icpu, ncpu)
164 85859 : source_cpu = MODULO(data_from, ncpu) + 1
165 85859 : block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
166 12706953 : full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
167 :
168 85859 : IF (icpu .NE. ncpu - 1) THEN
169 42888 : CALL mp_waitall(req)
170 : END IF
171 85859 : swapbuffer => sendbuffer
172 85859 : sendbuffer => recbuffer
173 128830 : recbuffer => swapbuffer
174 : END DO
175 42971 : DEALLOCATE (sendbuffer, recbuffer)
176 : ! sync before/after a ring of isendrecv
177 42971 : CALL para_env%sync()
178 :
179 85942 : END SUBROUTINE get_full_density
180 :
181 : ! **************************************************************************************************
182 : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
183 : !> \param para_env ...
184 : !> \param full_ks The full Kohn-Sham matrix
185 : !> \param ks_matrix Distributed Kohn-Sham matrix
186 : !> \param number_of_p_entries Maximal buffer size
187 : !> \param block_offset ...
188 : !> \param kind_of ...
189 : !> \param basis_parameter ...
190 : !> \param off_diag_fac ...
191 : !> \param diag_fac ...
192 : !> \par History
193 : !> 11.2007 created [Manuel Guidon]
194 : !> \author Manuel Guidon
195 : !> \note
196 : !> - Communication with left/right node only
197 : ! **************************************************************************************************
198 40205 : SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
199 : block_offset, kind_of, basis_parameter, &
200 : off_diag_fac, diag_fac)
201 :
202 : TYPE(mp_para_env_type), POINTER :: para_env
203 : REAL(dp), DIMENSION(:) :: full_ks
204 : TYPE(dbcsr_type), POINTER :: ks_matrix
205 : INTEGER, INTENT(IN) :: number_of_p_entries
206 : INTEGER, DIMENSION(:), POINTER :: block_offset
207 : INTEGER :: kind_of(*)
208 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
209 : REAL(dp), INTENT(IN), OPTIONAL :: off_diag_fac, diag_fac
210 :
211 : INTEGER :: blk, block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, &
212 : jkind, jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source
213 40205 : INTEGER, DIMENSION(:), POINTER :: nsgfa, nsgfb
214 : REAL(dp) :: my_fac, myd_fac
215 40205 : REAL(dp), DIMENSION(:), POINTER :: recbuffer, sendbuffer, swapbuffer
216 40205 : REAL(dp), DIMENSION(:, :), POINTER :: sparse_block
217 : TYPE(dbcsr_iterator_type) :: iter
218 120615 : TYPE(mp_request_type), DIMENSION(2) :: req
219 :
220 40205 : my_fac = 1.0_dp; myd_fac = 1.0_dp
221 40205 : IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
222 40205 : IF (PRESENT(diag_fac)) myd_fac = diag_fac
223 :
224 120615 : ALLOCATE (sendbuffer(number_of_p_entries))
225 8081455 : sendbuffer = 0.0_dp
226 80410 : ALLOCATE (recbuffer(number_of_p_entries))
227 8081455 : recbuffer = 0.0_dp
228 :
229 40205 : ncpu = para_env%num_pe
230 40205 : mepos = para_env%mepos
231 40205 : dest = MODULO(mepos + 1, ncpu)
232 40205 : source = MODULO(mepos - 1, ncpu)
233 :
234 : ! sync before/after a ring of isendrecv
235 40205 : CALL para_env%sync()
236 80327 : DO icpu = 1, ncpu
237 80327 : i = 1
238 80327 : data_to = mepos - icpu
239 80327 : dest_cpu = MODULO(data_to, ncpu) + 1
240 80327 : block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
241 11945867 : sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
242 80327 : IF (icpu .EQ. ncpu) EXIT
243 : CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
244 40122 : req(1), req(2), 13)
245 :
246 40122 : CALL mp_waitall(req)
247 40122 : swapbuffer => sendbuffer
248 40122 : sendbuffer => recbuffer
249 80327 : recbuffer => swapbuffer
250 : END DO
251 : ! sync before/after a ring of isendrecv
252 40205 : CALL para_env%sync()
253 :
254 40205 : i = 1
255 40205 : CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
256 165627 : DO WHILE (dbcsr_iterator_blocks_left(iter))
257 125422 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
258 :
259 125422 : ikind = kind_of(iatom)
260 125422 : nseta = basis_parameter(ikind)%nset
261 125422 : nsgfa => basis_parameter(ikind)%nsgf
262 125422 : jkind = kind_of(jatom)
263 125422 : nsetb = basis_parameter(jkind)%nset
264 125422 : nsgfb => basis_parameter(jkind)%nsgf
265 125422 : pa = 0
266 533041 : DO iset = 1, nseta
267 : pb = 0
268 1529771 : DO jset = 1, nsetb
269 3821230 : DO pa1 = pa + 1, pa + nsgfa(iset)
270 9769521 : DO pb1 = pb + 1, pb + nsgfb(jset)
271 5948291 : IF (iatom == jatom .AND. pa1 == pb1) THEN
272 374114 : sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
273 : ELSE
274 5574177 : sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
275 : END IF
276 8607164 : i = i + 1
277 : END DO
278 : END DO
279 1529771 : pb = pb + nsgfb(jset)
280 : END DO
281 492836 : pa = pa + nsgfa(iset)
282 : END DO
283 : END DO
284 40205 : CALL dbcsr_iterator_stop(iter)
285 :
286 40205 : DEALLOCATE (sendbuffer, recbuffer)
287 :
288 80410 : END SUBROUTINE distribute_ks_matrix
289 :
290 : ! **************************************************************************************************
291 : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
292 : !> case of adiabatic rescaling. This is just a refactored version of
293 : !> distribute_ks_matrix
294 : !> \param para_env ...
295 : !> \param qs_env ...
296 : !> \param ks_matrix Distributed Kohn-Sham matrix
297 : !> \param irep ...
298 : !> \param scaling_factor ...
299 : !> \par History
300 : !> 11.2007 created [Manuel Guidon]
301 : !> \author Manuel Guidon
302 : !> \note
303 : !> - Communication with left/right node only
304 : ! **************************************************************************************************
305 112 : SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
306 : scaling_factor)
307 :
308 : TYPE(mp_para_env_type), POINTER :: para_env
309 : TYPE(qs_environment_type), POINTER :: qs_env
310 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
311 : INTEGER, INTENT(IN) :: irep
312 : REAL(dp), INTENT(IN) :: scaling_factor
313 :
314 : INTEGER :: iatom, ikind, img, natom, nimages, nspins
315 112 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global
316 112 : REAL(dp), DIMENSION(:, :), POINTER :: full_ks
317 112 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
318 : TYPE(dft_control_type), POINTER :: dft_control
319 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
320 : TYPE(hfx_type), POINTER :: actual_x_data
321 112 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
322 :
323 : !! All shared data is saved in i_thread = 1!
324 :
325 112 : NULLIFY (dft_control)
326 112 : actual_x_data => qs_env%x_data(irep, 1)
327 112 : basis_parameter => actual_x_data%basis_parameter
328 :
329 : CALL get_qs_env(qs_env=qs_env, &
330 : atomic_kind_set=atomic_kind_set, &
331 : particle_set=particle_set, &
332 112 : dft_control=dft_control)
333 :
334 112 : nspins = dft_control%nspins
335 112 : nimages = dft_control%nimages
336 112 : CPASSERT(nimages == 1)
337 :
338 112 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
339 :
340 112 : natom = SIZE(particle_set, 1)
341 336 : ALLOCATE (last_sgf_global(0:natom))
342 112 : last_sgf_global(0) = 0
343 224 : DO iatom = 1, natom
344 112 : ikind = kind_of(iatom)
345 224 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
346 : END DO
347 112 : full_ks => actual_x_data%full_ks_alpha
348 112 : IF (scaling_factor /= 1.0_dp) THEN
349 22288 : full_ks = full_ks*scaling_factor
350 : END IF
351 224 : DO img = 1, nimages
352 : CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
353 : actual_x_data%block_offset, kind_of, basis_parameter, &
354 224 : off_diag_fac=0.5_dp)
355 : END DO
356 112 : DEALLOCATE (actual_x_data%full_ks_alpha)
357 :
358 112 : IF (nspins == 2) THEN
359 60 : full_ks => actual_x_data%full_ks_beta
360 60 : IF (scaling_factor /= 1.0_dp) THEN
361 11940 : full_ks = full_ks*scaling_factor
362 : END IF
363 120 : DO img = 1, nimages
364 : CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
365 : actual_x_data%block_offset, kind_of, basis_parameter, &
366 120 : off_diag_fac=0.5_dp)
367 : END DO
368 60 : DEALLOCATE (actual_x_data%full_ks_beta)
369 : END IF
370 :
371 112 : DEALLOCATE (last_sgf_global)
372 :
373 224 : END SUBROUTINE scale_and_add_fock_to_ks_matrix
374 :
375 : ! **************************************************************************************************
376 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
377 : !> a symmetric upper triangle NxN matrix
378 : !> The compiler should inline this function, therefore it appears in
379 : !> several modules
380 : !> \param i 2d index
381 : !> \param j 2d index
382 : !> \param N matrix size
383 : !> \return ...
384 : !> \par History
385 : !> 03.2009 created [Manuel Guidon]
386 : !> \author Manuel Guidon
387 : ! **************************************************************************************************
388 0 : PURE FUNCTION get_1D_idx(i, j, N)
389 : INTEGER, INTENT(IN) :: i, j
390 : INTEGER(int_8), INTENT(IN) :: N
391 : INTEGER(int_8) :: get_1D_idx
392 :
393 : INTEGER(int_8) :: min_ij
394 :
395 0 : min_ij = MIN(i, j)
396 0 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
397 :
398 0 : END FUNCTION get_1D_idx
399 :
400 : ! **************************************************************************************************
401 : !> \brief create a several maps array that reflects the ks matrix sparsity
402 : !> \param matrix ...
403 : !> \param basis_parameter ...
404 : !> \param kind_of ...
405 : !> \param is_assoc_atomic_block ...
406 : !> \param number_of_p_entries ...
407 : !> \param para_env ...
408 : !> \param atomic_block_offset ...
409 : !> \param set_offset ...
410 : !> \param block_offset ...
411 : !> \param map_atoms_to_cpus ...
412 : !> \param nkind ...
413 : !> \par History
414 : !> 11.2007 refactored [Joost VandeVondele]
415 : !> 07.2009 add new maps
416 : !> \author Manuel Guidon
417 : !> \notes
418 : !> is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
419 : !> zero for unassiated blocks
420 : ! **************************************************************************************************
421 2098 : SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
422 2098 : is_assoc_atomic_block, number_of_p_entries, &
423 : para_env, atomic_block_offset, set_offset, &
424 : block_offset, map_atoms_to_cpus, nkind)
425 :
426 : TYPE(dbcsr_type), POINTER :: matrix
427 : TYPE(hfx_basis_type), DIMENSION(:) :: basis_parameter
428 : INTEGER, DIMENSION(:) :: kind_of
429 : INTEGER, DIMENSION(:, :), INTENT(OUT) :: is_assoc_atomic_block
430 : INTEGER, INTENT(OUT) :: number_of_p_entries
431 : TYPE(mp_para_env_type), POINTER :: para_env
432 : INTEGER, DIMENSION(:, :), POINTER :: atomic_block_offset
433 : INTEGER, DIMENSION(:, :, :, :), POINTER :: set_offset
434 : INTEGER, DIMENSION(:), POINTER :: block_offset
435 : TYPE(hfx_2D_map), DIMENSION(:), POINTER :: map_atoms_to_cpus
436 : INTEGER :: nkind
437 :
438 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps'
439 :
440 : INTEGER :: blk, handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, &
441 : natom, ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
442 2098 : INTEGER, ALLOCATABLE, DIMENSION(:) :: buffer_in, buffer_out, counter, rcount, &
443 2098 : rdispl
444 2098 : INTEGER, DIMENSION(:), POINTER :: iatom_list, jatom_list, nsgfa, nsgfb
445 2098 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: sparse_block
446 : TYPE(dbcsr_iterator_type) :: iter
447 :
448 2098 : CALL timeset(routineN, handle)
449 :
450 31606 : is_assoc_atomic_block = 0
451 2098 : number_of_p_entries = 0
452 2098 : number_of_p_blocks = 0
453 :
454 : !
455 : ! count number_of_p_blocks and number_of_p_entries
456 : !
457 2098 : CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
458 9432 : DO WHILE (dbcsr_iterator_blocks_left(iter))
459 7334 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
460 7334 : ikind = kind_of(iatom)
461 7334 : jkind = kind_of(jatom)
462 7334 : number_of_p_blocks = number_of_p_blocks + 1
463 : number_of_p_entries = number_of_p_entries + &
464 7334 : basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
465 : END DO
466 2098 : CALL dbcsr_iterator_stop(iter)
467 :
468 6294 : tmp = (/number_of_p_entries, number_of_p_blocks/)
469 2098 : CALL para_env%max(tmp)
470 2098 : number_of_p_entries = tmp(1)
471 2098 : number_of_p_blocks = tmp(2)
472 : !
473 : ! send this info around, so we can construct is_assoc_atomic_block
474 : ! pack all data in buffers and use allgatherv
475 : !
476 6294 : ALLOCATE (buffer_in(3*number_of_p_blocks))
477 6294 : ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
478 8392 : ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
479 :
480 30148 : buffer_in = 0
481 2098 : ibuf = 0
482 :
483 2098 : CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
484 9432 : DO WHILE (dbcsr_iterator_blocks_left(iter))
485 7334 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block, blk)
486 :
487 7334 : buffer_in(ibuf + 1) = iatom
488 7334 : buffer_in(ibuf + 2) = jatom
489 7334 : buffer_in(ibuf + 3) = para_env%mepos + 1
490 7334 : ibuf = ibuf + 3
491 : END DO
492 2098 : CALL dbcsr_iterator_stop(iter)
493 :
494 6292 : rcount = SIZE(buffer_in)
495 2098 : rdispl(1) = 0
496 4194 : DO icpu = 2, para_env%num_pe
497 4194 : rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
498 : END DO
499 2098 : CALL para_env%allgatherv(buffer_in, buffer_out, rcount, rdispl)
500 :
501 20786 : DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
502 18688 : itask = buffer_out(ibuf + 3)
503 : ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
504 : ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
505 20786 : IF (itask .NE. 0) THEN
506 14656 : iatom = buffer_out(ibuf + 1)
507 14656 : jatom = buffer_out(ibuf + 2)
508 14656 : is_assoc_atomic_block(iatom, jatom) = itask
509 14656 : is_assoc_atomic_block(jatom, iatom) = itask
510 : END IF
511 : END DO
512 :
513 2098 : IF (ASSOCIATED(map_atoms_to_cpus)) THEN
514 2640 : DO icpu = 1, para_env%num_pe
515 1760 : DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
516 2640 : DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
517 : END DO
518 880 : DEALLOCATE (map_atoms_to_cpus)
519 : END IF
520 :
521 2098 : natom = SIZE(is_assoc_atomic_block, 1)
522 10488 : ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
523 6294 : ALLOCATE (counter(para_env%num_pe))
524 6292 : counter = 0
525 :
526 8424 : DO iatom = 1, natom
527 23178 : DO jatom = iatom, natom
528 14754 : icpu = is_assoc_atomic_block(jatom, iatom)
529 21080 : IF (icpu > 0) counter(icpu) = counter(icpu) + 1
530 : END DO
531 : END DO
532 6292 : DO icpu = 1, para_env%num_pe
533 12526 : ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
534 10430 : ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
535 : END DO
536 6292 : counter = 0
537 8424 : DO iatom = 1, natom
538 23178 : DO jatom = iatom, natom
539 14754 : icpu = is_assoc_atomic_block(jatom, iatom)
540 21080 : IF (icpu > 0) THEN
541 14656 : counter(icpu) = counter(icpu) + 1
542 14656 : map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
543 14656 : map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
544 : END IF
545 : END DO
546 : END DO
547 :
548 2098 : DEALLOCATE (counter)
549 :
550 2098 : ncpu = para_env%num_pe
551 2098 : offset = 1
552 31606 : atomic_block_offset = 0
553 8390 : block_offset = 0
554 6292 : DO icpu = 1, ncpu
555 4194 : iatom_list => map_atoms_to_cpus(icpu)%iatom_list
556 4194 : jatom_list => map_atoms_to_cpus(icpu)%jatom_list
557 4194 : block_offset(icpu) = offset
558 20948 : DO ilist = 1, SIZE(iatom_list)
559 14656 : iatom = iatom_list(ilist)
560 14656 : ikind = kind_of(iatom)
561 14656 : jatom = jatom_list(ilist)
562 14656 : jkind = kind_of(jatom)
563 14656 : atomic_block_offset(iatom, jatom) = offset
564 14656 : atomic_block_offset(jatom, iatom) = offset
565 18850 : offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
566 : END DO
567 : END DO
568 2098 : block_offset(ncpu + 1) = offset
569 147578 : set_offset = 0
570 5894 : DO ikind = 1, nkind
571 3796 : nseta = basis_parameter(ikind)%nset
572 3796 : nsgfa => basis_parameter(ikind)%nsgf
573 14286 : DO jkind = 1, nkind
574 8392 : nsetb = basis_parameter(jkind)%nset
575 8392 : nsgfb => basis_parameter(jkind)%nsgf
576 8392 : offset = 1
577 34356 : DO iset = 1, nseta
578 105698 : DO jset = 1, nsetb
579 75138 : set_offset(jset, iset, jkind, ikind) = offset
580 97306 : offset = offset + nsgfa(iset)*nsgfb(jset)
581 : END DO
582 : END DO
583 : END DO
584 : END DO
585 :
586 2098 : CALL timestop(handle)
587 :
588 4196 : END SUBROUTINE get_atomic_block_maps
589 :
590 : END MODULE hfx_communication
|