Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : MODULE qs_fb_atomic_matrix_methods
9 :
10 : USE cp_dbcsr_api, ONLY: dbcsr_get_block_p,&
11 : dbcsr_get_info,&
12 : dbcsr_get_stored_coordinates,&
13 : dbcsr_type
14 : USE kinds, ONLY: dp,&
15 : int_8
16 : USE message_passing, ONLY: mp_para_env_type
17 : USE qs_fb_atomic_halo_types, ONLY: fb_atomic_halo_atom_global2halo,&
18 : fb_atomic_halo_get,&
19 : fb_atomic_halo_has_data,&
20 : fb_atomic_halo_list_get,&
21 : fb_atomic_halo_list_obj,&
22 : fb_atomic_halo_obj
23 : USE qs_fb_com_tasks_types, ONLY: &
24 : TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
25 : fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
26 : fb_com_atom_pairs_get, fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, &
27 : fb_com_atom_pairs_nullify, fb_com_atom_pairs_obj, fb_com_atom_pairs_release, &
28 : fb_com_tasks_build_atom_pairs, fb_com_tasks_create, fb_com_tasks_decode_pair, &
29 : fb_com_tasks_encode_pair, fb_com_tasks_get, fb_com_tasks_nullify, fb_com_tasks_obj, &
30 : fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
31 : USE qs_fb_matrix_data_types, ONLY: fb_matrix_data_get,&
32 : fb_matrix_data_has_data,&
33 : fb_matrix_data_obj
34 : #include "./base/base_uses.f90"
35 :
36 : IMPLICIT NONE
37 :
38 : PRIVATE
39 :
40 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_atomic_matrix_methods'
41 :
42 : PUBLIC :: fb_atmatrix_calc_size, &
43 : fb_atmatrix_construct, &
44 : fb_atmatrix_construct_2, &
45 : fb_atmatrix_generate_com_pairs_2
46 :
47 : CONTAINS
48 :
49 : ! **********************************************************************
50 : !> \brief Calculates the atomic matrix size from a given DBCSR matrix
51 : !> and atomic halo. It also calculates the first row (col) or the
52 : !> row (col) atomic blocks in the atomic matrix
53 : !> \param dbcsr_mat : pointer to the DBCSR matrix the atomic matrix is
54 : !> to be constructed from
55 : !> \param atomic_halo : the atomic halo used for defining the atomic
56 : !> matrix from the DBCSR matrix
57 : !> \param nrows : outputs total number of rows in the atomic matrix
58 : !> \param ncols : outputs total number of cols in the atomic matrix
59 : !> \param blk_row_start : first row in each atomic blk row in the
60 : !> atomic matrix
61 : !> \param blk_col_start : first col in each atomic blk col in the
62 : !> atomic matrix
63 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
64 : ! **************************************************************************************************
65 1280 : SUBROUTINE fb_atmatrix_calc_size(dbcsr_mat, &
66 : atomic_halo, &
67 : nrows, &
68 : ncols, &
69 640 : blk_row_start, &
70 640 : blk_col_start)
71 : TYPE(dbcsr_type), POINTER :: dbcsr_mat
72 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
73 : INTEGER, INTENT(OUT) :: nrows, ncols
74 : INTEGER, DIMENSION(:), INTENT(OUT) :: blk_row_start, blk_col_start
75 :
76 : INTEGER :: ii, natoms_in_halo
77 640 : INTEGER, DIMENSION(:), POINTER :: col_block_size_data, halo_atoms, &
78 640 : row_block_size_data
79 : LOGICAL :: check_ok
80 :
81 640 : NULLIFY (halo_atoms, row_block_size_data, col_block_size_data)
82 :
83 640 : CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
84 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
85 : natoms=natoms_in_halo, &
86 640 : halo_atoms=halo_atoms)
87 640 : check_ok = SIZE(blk_row_start) .GE. (natoms_in_halo + 1)
88 640 : CPASSERT(check_ok)
89 640 : check_ok = SIZE(blk_col_start) .GE. (natoms_in_halo + 1)
90 640 : CPASSERT(check_ok)
91 6400 : blk_row_start = 0
92 6400 : blk_col_start = 0
93 640 : nrows = 0
94 640 : ncols = 0
95 5760 : DO ii = 1, natoms_in_halo
96 5120 : blk_row_start(ii) = nrows + 1
97 5120 : blk_col_start(ii) = ncols + 1
98 5120 : nrows = nrows + row_block_size_data(halo_atoms(ii))
99 5760 : ncols = ncols + col_block_size_data(halo_atoms(ii))
100 : END DO
101 640 : blk_row_start(natoms_in_halo + 1) = nrows + 1
102 640 : blk_col_start(natoms_in_halo + 1) = ncols + 1
103 640 : END SUBROUTINE fb_atmatrix_calc_size
104 :
105 : ! ****************************************************************************
106 : !> \brief Constructs atomic matrix for filter basis method from a given
107 : !> DBCSR matrix and a set of atomic send and recv pairs
108 : !> corresponding to the matrix blocks that needs to be included
109 : !> in the atomic matrix. This version is for when we do MPI
110 : !> communications at every step, for each atomic matrix.
111 : !> \param dbcsr_mat : the DBCSR matrix the atomic matrix is to be
112 : !> constructed from
113 : !> \param atomic_halo : the atomic halo conrresponding to this atomic
114 : !> matrix
115 : !> \param para_env : cp2k parallel environment
116 : !> \param atomic_matrix : the atomic matrix to be constructed, it should
117 : !> have already been allocated prior entering
118 : !> this subroutine
119 : !> \param blk_row_start : first row in each atomic blk row in the
120 : !> atomic matrix
121 : !> \param blk_col_start : first col in each atomic blk col in the
122 : !> atomic matrix
123 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
124 : ! **************************************************************************************************
125 512 : SUBROUTINE fb_atmatrix_construct(dbcsr_mat, &
126 : atomic_halo, &
127 : para_env, &
128 512 : atomic_matrix, &
129 512 : blk_row_start, &
130 512 : blk_col_start)
131 : TYPE(dbcsr_type), POINTER :: dbcsr_mat
132 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
133 : TYPE(mp_para_env_type), POINTER :: para_env
134 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: atomic_matrix
135 : INTEGER, DIMENSION(:), INTENT(IN) :: blk_row_start, blk_col_start
136 :
137 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct'
138 :
139 : INTEGER :: handle, iatom, iatom_in_halo, ii, ind, ipair, ipe, jatom, jatom_in_halo, jj, &
140 : ncols_blk, npairs_recv, npairs_send, nrows_blk, numprocs, pe, recv_encode, send_encode
141 512 : INTEGER(KIND=int_8), DIMENSION(:), POINTER :: pairs_recv, pairs_send
142 512 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
143 512 : recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
144 512 : INTEGER, DIMENSION(:), POINTER :: col_block_size_data, row_block_size_data
145 : LOGICAL :: found
146 512 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
147 512 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat_block
148 : TYPE(fb_com_atom_pairs_obj) :: atom_pairs_recv, atom_pairs_send
149 :
150 512 : CALL timeset(routineN, handle)
151 :
152 512 : NULLIFY (pairs_send, pairs_recv, mat_block, &
153 512 : row_block_size_data, col_block_size_data)
154 512 : CALL fb_com_atom_pairs_nullify(atom_pairs_send)
155 512 : CALL fb_com_atom_pairs_nullify(atom_pairs_recv)
156 :
157 : ! initialise atomic matrix
158 512 : IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
159 5591552 : atomic_matrix = 0.0_dp
160 : END IF
161 :
162 : ! generate send and receive atomic pairs
163 512 : CALL fb_com_atom_pairs_create(atom_pairs_send)
164 512 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
165 : CALL fb_atmatrix_generate_com_pairs(dbcsr_mat, &
166 : atomic_halo, &
167 : para_env, &
168 : atom_pairs_send, &
169 512 : atom_pairs_recv)
170 :
171 : ! get com pair informations
172 : CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
173 : pairs=pairs_send, &
174 : npairs=npairs_send, &
175 512 : natoms_encode=send_encode)
176 : CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
177 : pairs=pairs_recv, &
178 : npairs=npairs_recv, &
179 512 : natoms_encode=recv_encode)
180 :
181 : ! get para_env info
182 512 : numprocs = para_env%num_pe
183 :
184 : ! get dbcsr row and col block sizes
185 512 : CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
186 :
187 : ! allocate temporary arrays for send
188 1536 : ALLOCATE (send_sizes(numprocs))
189 1024 : ALLOCATE (send_disps(numprocs))
190 1024 : ALLOCATE (send_pair_count(numprocs))
191 1024 : ALLOCATE (send_pair_disps(numprocs))
192 :
193 : ! setup send buffer sizes
194 : CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
195 : numprocs, &
196 : row_block_size_data, &
197 : col_block_size_data, &
198 : send_sizes, &
199 : send_disps, &
200 : send_pair_count, &
201 512 : send_pair_disps)
202 : ! allocate send buffer
203 2560 : ALLOCATE (send_buf(SUM(send_sizes)))
204 :
205 : ! allocate temporary arrays for recv
206 1024 : ALLOCATE (recv_sizes(numprocs))
207 1024 : ALLOCATE (recv_disps(numprocs))
208 1024 : ALLOCATE (recv_pair_count(numprocs))
209 1024 : ALLOCATE (recv_pair_disps(numprocs))
210 :
211 : ! setup recv buffer sizes
212 : CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
213 : numprocs, &
214 : row_block_size_data, &
215 : col_block_size_data, &
216 : recv_sizes, &
217 : recv_disps, &
218 : recv_pair_count, &
219 512 : recv_pair_disps)
220 : ! allocate recv buffer
221 2560 : ALLOCATE (recv_buf(SUM(recv_sizes)))
222 : ! do packing
223 1536 : DO ipe = 1, numprocs
224 : ! need to reuse send_sizes as an accumulative displacement, so recalculate
225 1024 : send_sizes(ipe) = 0
226 19968 : DO ipair = 1, send_pair_count(ipe)
227 : CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
228 18432 : pe, iatom, jatom, send_encode)
229 18432 : nrows_blk = row_block_size_data(iatom)
230 18432 : ncols_blk = col_block_size_data(jatom)
231 : CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
232 : row=iatom, col=jatom, block=mat_block, &
233 18432 : found=found)
234 19456 : IF (.NOT. found) THEN
235 0 : CPABORT("Matrix block not found")
236 : ELSE
237 : ! we have found the matrix block
238 258048 : DO jj = 1, ncols_blk
239 3373056 : DO ii = 1, nrows_blk
240 : ! column major format in blocks
241 3115008 : ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
242 3354624 : send_buf(ind) = mat_block(ii, jj)
243 : END DO ! ii
244 : END DO ! jj
245 18432 : send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
246 : END IF
247 : END DO ! ipair
248 : END DO ! ipe
249 :
250 : ! do communication
251 : CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
252 512 : recv_buf, recv_sizes, recv_disps)
253 :
254 : ! cleanup temporary arrays no longer needed
255 512 : DEALLOCATE (send_buf)
256 512 : DEALLOCATE (send_sizes)
257 512 : DEALLOCATE (send_disps)
258 512 : DEALLOCATE (send_pair_count)
259 512 : DEALLOCATE (send_pair_disps)
260 :
261 : ! do unpacking
262 1536 : DO ipe = 1, numprocs
263 1024 : recv_sizes(ipe) = 0
264 19968 : DO ipair = 1, recv_pair_count(ipe)
265 : CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
266 18432 : pe, iatom, jatom, recv_encode)
267 : ! nrows_blk = last_row(iatom) - first_row(iatom) + 1
268 : ! ncols_blk = last_col(jatom) - first_col(jatom) + 1
269 18432 : nrows_blk = row_block_size_data(iatom)
270 18432 : ncols_blk = col_block_size_data(jatom)
271 : ! get the corresponding atom indices in halo
272 : ! the atoms from the recv_pairs should be in the atomic_halo, because
273 : ! the recv_pairs are the matrix blocks requested by the local proc for
274 : ! this particular atomic_halo
275 : CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
276 : iatom, iatom_in_halo, &
277 18432 : found)
278 18432 : CPASSERT(found)
279 : CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
280 : jatom, jatom_in_halo, &
281 18432 : found)
282 18432 : CPASSERT(found)
283 : ! put block into the full conventional matrix
284 258048 : DO jj = 1, ncols_blk
285 3373056 : DO ii = 1, nrows_blk
286 : ! column major format in blocks
287 3115008 : ind = recv_disps(ipe) + recv_sizes(ipe) + ii + (jj - 1)*nrows_blk
288 : atomic_matrix(blk_row_start(iatom_in_halo) + ii - 1, &
289 3354624 : blk_col_start(jatom_in_halo) + jj - 1) = recv_buf(ind)
290 :
291 : END DO ! ii
292 : END DO ! jj
293 56320 : recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
294 : END DO ! ipair
295 : END DO ! ipe
296 :
297 : ! the constructed matrix is upper triangular, fill it up to full
298 53248 : DO ii = 2, SIZE(atomic_matrix, 1)
299 2795520 : DO jj = 1, ii - 1
300 2795008 : atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
301 : END DO
302 : END DO
303 :
304 : ! cleanup rest of the temporary arrays
305 512 : DEALLOCATE (recv_buf)
306 512 : DEALLOCATE (recv_sizes)
307 512 : DEALLOCATE (recv_disps)
308 512 : DEALLOCATE (recv_pair_count)
309 512 : DEALLOCATE (recv_pair_disps)
310 512 : CALL fb_com_atom_pairs_release(atom_pairs_send)
311 512 : CALL fb_com_atom_pairs_release(atom_pairs_recv)
312 :
313 512 : CALL timestop(handle)
314 :
315 1024 : END SUBROUTINE fb_atmatrix_construct
316 :
317 : ! ****************************************************************************
318 : !> \brief Constructs atomic matrix for filter basis method from a given
319 : !> DBCSR matrix and a set of atomic send and recv pairs
320 : !> corresponding to the matrix blocks that needs to be included
321 : !> in the atomic matrix. This version is for when we do MPI
322 : !> communications collectively in one go at the beginning.
323 : !> \param matrix_storage : data storing the relevant DBCSR matrix blocks
324 : !> needed for constructing the atomic matrix
325 : !> \param atomic_halo : the atomic halo conrresponding to this atomic
326 : !> matrix
327 : !> \param atomic_matrix : the atomic matrix to be constructed, it should
328 : !> have already been allocated prior entering
329 : !> this subroutine
330 : !> \param blk_row_start : first row in each atomic blk row in the
331 : !> atomic matrix
332 : !> \param blk_col_start : first col in each atomic blk col in the
333 : !> atomic matrix
334 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
335 : ! **************************************************************************************************
336 384 : SUBROUTINE fb_atmatrix_construct_2(matrix_storage, &
337 : atomic_halo, &
338 128 : atomic_matrix, &
339 256 : blk_row_start, &
340 128 : blk_col_start)
341 : TYPE(fb_matrix_data_obj), INTENT(IN) :: matrix_storage
342 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
343 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: atomic_matrix
344 : INTEGER, DIMENSION(:), INTENT(IN) :: blk_row_start, blk_col_start
345 :
346 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct_2'
347 :
348 : INTEGER :: handle, iatom, iatom_global, icol, ii, &
349 : irow, jatom, jatom_global, jj, &
350 : natoms_in_halo
351 128 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
352 : LOGICAL :: check_ok, found
353 128 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: blk_p
354 :
355 128 : CALL timeset(routineN, handle)
356 :
357 128 : check_ok = fb_matrix_data_has_data(matrix_storage)
358 128 : CPASSERT(check_ok)
359 128 : check_ok = fb_atomic_halo_has_data(atomic_halo)
360 128 : CPASSERT(check_ok)
361 :
362 128 : NULLIFY (halo_atoms, blk_p)
363 :
364 : ! initialise atomic matrix
365 128 : IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
366 1397888 : atomic_matrix = 0.0_dp
367 : END IF
368 :
369 : ! get atomic halo information
370 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
371 : natoms=natoms_in_halo, &
372 128 : halo_atoms=halo_atoms)
373 :
374 : ! construct atomic matrix using data from matrix_storage
375 1152 : DO iatom = 1, natoms_in_halo
376 1024 : iatom_global = halo_atoms(iatom)
377 9344 : DO jatom = 1, natoms_in_halo
378 8192 : jatom_global = halo_atoms(jatom)
379 : ! atomic matrices are symmetric, fill only the top
380 : ! triangular part
381 9216 : IF (jatom_global .GE. iatom_global) THEN
382 : CALL fb_matrix_data_get(matrix_storage, &
383 : iatom_global, &
384 : jatom_global, &
385 : blk_p, &
386 4608 : found)
387 : ! copy data to atomic_matrix if found
388 4608 : IF (found) THEN
389 64512 : DO jj = 1, SIZE(blk_p, 2)
390 59904 : icol = blk_col_start(jatom) + jj - 1
391 843264 : DO ii = 1, SIZE(blk_p, 1)
392 778752 : irow = blk_row_start(iatom) + ii - 1
393 838656 : atomic_matrix(irow, icol) = blk_p(ii, jj)
394 : END DO ! ii
395 : END DO ! jj
396 : END IF
397 : END IF
398 : END DO ! jatom
399 : END DO ! iatom
400 :
401 : ! the constructed matrix is upper triangular, fill it up to full
402 13312 : DO ii = 2, SIZE(atomic_matrix, 1)
403 698880 : DO jj = 1, ii - 1
404 698752 : atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
405 : END DO
406 : END DO
407 :
408 128 : CALL timestop(handle)
409 :
410 128 : END SUBROUTINE fb_atmatrix_construct_2
411 :
412 : ! ****************************************************************************
413 : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
414 : !> sent and received in order to construct an atomic matrix
415 : !> corresponding to a given atomic halo. This version is for the case
416 : !> when we do MPI communications at each step, for each atomic matrix.
417 : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
418 : !> \param atomic_halo : the atomic halo used to construct the atomic
419 : !> matrix
420 : !> \param para_env : cp2k parallel environment
421 : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
422 : !> data to be sent
423 : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
424 : !> data to be recveived
425 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
426 : ! **************************************************************************************************
427 512 : SUBROUTINE fb_atmatrix_generate_com_pairs(dbcsr_mat, &
428 : atomic_halo, &
429 : para_env, &
430 : atom_pairs_send, &
431 : atom_pairs_recv)
432 : TYPE(dbcsr_type), POINTER :: dbcsr_mat
433 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
434 : TYPE(mp_para_env_type), POINTER :: para_env
435 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
436 :
437 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs'
438 :
439 : INTEGER :: counter, handle, iatom, iatom_global, itask, jatom, jatom_global, natoms_in_halo, &
440 : nblkrows_total, nencode, ntasks_recv, ntasks_send, src
441 : INTEGER(KIND=int_8) :: pair
442 512 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_recv, tasks_send
443 512 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
444 : LOGICAL :: found
445 512 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat_block
446 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
447 :
448 512 : CALL timeset(routineN, handle)
449 :
450 512 : NULLIFY (halo_atoms, tasks_send, tasks_recv)
451 512 : CALL fb_com_tasks_nullify(com_tasks_send)
452 512 : CALL fb_com_tasks_nullify(com_tasks_recv)
453 :
454 : ! initialise atom_pairs_send and atom_pairs_receive
455 512 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
456 512 : CALL fb_com_atom_pairs_init(atom_pairs_send)
457 : ELSE
458 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
459 : END IF
460 512 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
461 512 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
462 : ELSE
463 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
464 : END IF
465 :
466 : ! get atomic halo information
467 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
468 : natoms=natoms_in_halo, &
469 512 : halo_atoms=halo_atoms)
470 :
471 : ! get the total number of atoms, we can obtain this directly
472 : ! from the global block row dimension of the dbcsr matrix
473 : CALL dbcsr_get_info(matrix=dbcsr_mat, &
474 512 : nblkrows_total=nblkrows_total)
475 :
476 : ! generate recv task list (tasks_recv)
477 :
478 : ! a recv task corresponds to the copying or transferring of a
479 : ! matrix block in the part of the DBCSR matrix owned by the src
480 : ! proc to this proc in order to construct the atomic matrix
481 : ! corresponding to the given atomic halo. As an upper-bound, the
482 : ! number of matrix blocks required do not exceed natoms_in_halo**2
483 512 : ntasks_recv = natoms_in_halo*natoms_in_halo
484 :
485 1536 : ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
486 :
487 : ! destination proc is always the local processor
488 : ASSOCIATE (dest => para_env%mepos)
489 : ! now that tasks_recv has been allocated, generate the tasks
490 512 : itask = 1
491 4608 : DO iatom = 1, natoms_in_halo
492 4096 : iatom_global = halo_atoms(iatom)
493 37376 : DO jatom = 1, natoms_in_halo
494 32768 : jatom_global = halo_atoms(jatom)
495 : ! atomic matrix is symmetric, and only upper triangular part
496 : ! is stored in DBCSR matrix
497 36864 : IF (jatom_global .GE. iatom_global) THEN
498 : ! find the source proc that supposed to own the block
499 : ! (iatom_global, jatom_global)
500 : CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
501 : iatom_global, &
502 : jatom_global, &
503 18432 : processor=src)
504 : ! we must encode the global atom indices rather the halo
505 : ! atomic indices in each task, because halo atomic
506 : ! indices are local to each halo, and each processor is
507 : ! working on a different halo local to them. So one
508 : ! processor would not have the information about the halo
509 : ! on another processor, rendering the halo atomic indices
510 : ! rather useless outside the local processor.
511 18432 : tasks_recv(TASK_DEST, itask) = dest
512 18432 : tasks_recv(TASK_SRC, itask) = src
513 :
514 : CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
515 : iatom_global, jatom_global, &
516 18432 : nblkrows_total)
517 : ! calculation of cost not implemented at the moment
518 18432 : tasks_recv(TASK_COST, itask) = 0
519 18432 : itask = itask + 1
520 : END IF
521 : END DO ! jatom
522 : END DO ! iatom
523 : END ASSOCIATE
524 :
525 : ! get the actual number of tasks
526 512 : ntasks_recv = itask - 1
527 :
528 : ! create tasks
529 512 : CALL fb_com_tasks_create(com_tasks_recv)
530 512 : CALL fb_com_tasks_create(com_tasks_send)
531 :
532 : CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
533 : task_dim=TASK_N_RECORDS, &
534 : ntasks=ntasks_recv, &
535 : nencode=nblkrows_total, &
536 512 : tasks=tasks_recv)
537 :
538 : ! genearte the send task list (tasks_send) from the recv task list
539 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
540 512 : para_env)
541 :
542 : CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
543 : ntasks=ntasks_send, &
544 : tasks=tasks_send, &
545 512 : nencode=nencode)
546 :
547 : ! because the atomic_halos and the neighbor_list_set used to
548 : ! generate the sparse structure of the DBCSR matrix do not
549 : ! necessarily have to coincide, we must check of the blocks in
550 : ! tasks_send (these should be local to the processor) do indeed
551 : ! exist in the DBCSR matrix, if not, then we need to prune these
552 : ! out of the task list
553 :
554 512 : counter = 0
555 18944 : DO itask = 1, ntasks_send
556 18432 : pair = tasks_send(TASK_PAIR, itask)
557 18432 : CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
558 : ! check if block exists in DBCSR matrix
559 : CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
560 : row=iatom_global, col=jatom_global, block=mat_block, &
561 18432 : found=found)
562 18944 : IF (found) THEN
563 18432 : counter = counter + 1
564 : ! we can do this here, because essencially we are inspecting
565 : ! the send tasks one by one, and then omit ones which the
566 : ! block is not found in the DBCSR matrix. itask is always
567 : ! .GE. counter
568 92160 : tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
569 : END IF
570 : END DO
571 : ! the new send task list should have size counter. counter
572 : ! .LE. the old ntasks_send, thus the task list does not really
573 : ! need to be reallocated (as it is just a temporary array), and
574 : ! the useful data will cutoff at counter, and the rest of the
575 : ! array will just be garbage
576 512 : ntasks_send = counter
577 :
578 : ! tasks_send is set through the pointer already
579 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
580 512 : ntasks=ntasks_send)
581 :
582 : ! now, re-distribute the new send tasks list to other processors
583 : ! to build the updated recv tasks list
584 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
585 512 : para_env)
586 :
587 : ! task lists are now complete, now construct the atom_pairs_send
588 : ! and atom_pairs_recv from the tasks lists
589 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
590 : atom_pairs=atom_pairs_send, &
591 : natoms_encode=nencode, &
592 512 : send_or_recv="send")
593 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
594 : atom_pairs=atom_pairs_recv, &
595 : natoms_encode=nencode, &
596 512 : send_or_recv="recv")
597 :
598 : ! cleanup
599 512 : CALL fb_com_tasks_release(com_tasks_recv)
600 512 : CALL fb_com_tasks_release(com_tasks_send)
601 :
602 512 : CALL timestop(handle)
603 :
604 1536 : END SUBROUTINE fb_atmatrix_generate_com_pairs
605 :
606 : ! ****************************************************************************
607 : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
608 : !> sent and received in order to construct all local atomic matrices
609 : !> corresponding to the atomic halos. This version is for the case
610 : !> when we do MPI communications collectively in one go at the
611 : !> beginning.
612 : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
613 : !> \param atomic_halos : the list of all atomic halos local to the process
614 : !> \param para_env : cp2k parallel environment
615 : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
616 : !> data to be sent
617 : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
618 : !> data to be recveived
619 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
620 : ! **************************************************************************************************
621 32 : SUBROUTINE fb_atmatrix_generate_com_pairs_2(dbcsr_mat, &
622 : atomic_halos, &
623 : para_env, &
624 : atom_pairs_send, &
625 : atom_pairs_recv)
626 : TYPE(dbcsr_type), POINTER :: dbcsr_mat
627 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
628 : TYPE(mp_para_env_type), POINTER :: para_env
629 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
630 :
631 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs_2'
632 :
633 : INTEGER :: counter, handle, iatom, iatom_global, ihalo, itask, jatom, jatom_global, &
634 : natoms_in_halo, nblkrows_total, nencode, nhalos, ntasks_recv, ntasks_send, src
635 : INTEGER(KIND=int_8) :: pair
636 32 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_recv, tasks_send
637 32 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
638 : LOGICAL :: found
639 32 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat_block
640 32 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
641 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
642 :
643 32 : CALL timeset(routineN, handle)
644 :
645 32 : NULLIFY (halo_atoms, tasks_send, tasks_recv)
646 32 : CALL fb_com_tasks_nullify(com_tasks_send)
647 32 : CALL fb_com_tasks_nullify(com_tasks_recv)
648 :
649 : ! initialise atom_pairs_send and atom_pairs_receive
650 32 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
651 32 : CALL fb_com_atom_pairs_init(atom_pairs_send)
652 : ELSE
653 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
654 : END IF
655 32 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
656 32 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
657 : ELSE
658 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
659 : END IF
660 :
661 : ! get atomic halo list information
662 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
663 : nhalos=nhalos, &
664 32 : halos=halos)
665 : ! get the total number of atoms, we can obtain this directly
666 : ! from the global block row dimension of the dbcsr matrix
667 : CALL dbcsr_get_info(matrix=dbcsr_mat, &
668 32 : nblkrows_total=nblkrows_total)
669 :
670 : ! estimate the maximum number of blocks to be received
671 32 : ntasks_recv = 0
672 160 : DO ihalo = 1, nhalos
673 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
674 128 : natoms=natoms_in_halo)
675 160 : ntasks_recv = ntasks_recv + natoms_in_halo*natoms_in_halo
676 : END DO
677 96 : ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
678 :
679 : ! now that tasks_recv has been allocated, generate the tasks
680 :
681 : ! destination proc is always the local process
682 : ASSOCIATE (dest => para_env%mepos)
683 32 : itask = 1
684 160 : DO ihalo = 1, nhalos
685 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
686 : natoms=natoms_in_halo, &
687 128 : halo_atoms=halo_atoms)
688 1184 : DO iatom = 1, natoms_in_halo
689 1024 : iatom_global = halo_atoms(iatom)
690 9344 : DO jatom = 1, natoms_in_halo
691 8192 : jatom_global = halo_atoms(jatom)
692 : ! atomic matrices are always symmetric, treat it as such.
693 : ! so only deal with upper triangular parts
694 9216 : IF (jatom_global .GE. iatom_global) THEN
695 : ! find the source proc that supposed to own the block
696 : ! (iatom_global, jatom_global)
697 : CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
698 : iatom_global, &
699 : jatom_global, &
700 4608 : processor=src)
701 : ! we must encode the global atom indices rather the halo
702 : ! atomic indices in each task, because halo atomic indices
703 : ! are local to each halo, and each processor is working on a
704 : ! different halo local to them. So one processor would not
705 : ! have the information about the halo on another processor,
706 : ! rendering the halo atomic indices rather useless outside
707 : ! the local processor.
708 4608 : tasks_recv(TASK_DEST, itask) = dest
709 4608 : tasks_recv(TASK_SRC, itask) = src
710 : CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
711 : iatom_global, jatom_global, &
712 4608 : nblkrows_total)
713 : ! calculation of cost not implemented at the moment
714 4608 : tasks_recv(TASK_COST, itask) = 0
715 4608 : itask = itask + 1
716 : END IF
717 : END DO ! jatom
718 : END DO ! iatom
719 : END DO ! ihalo
720 : END ASSOCIATE
721 :
722 : ! set the actual number of tasks obtained
723 32 : ntasks_recv = itask - 1
724 :
725 : ! create tasks
726 32 : CALL fb_com_tasks_create(com_tasks_recv)
727 32 : CALL fb_com_tasks_create(com_tasks_send)
728 :
729 : CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
730 : task_dim=TASK_N_RECORDS, &
731 : ntasks=ntasks_recv, &
732 : nencode=nblkrows_total, &
733 32 : tasks=tasks_recv)
734 :
735 : ! genearte the send task list (tasks_send) from the recv task list
736 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
737 32 : para_env)
738 :
739 : CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
740 : ntasks=ntasks_send, &
741 : tasks=tasks_send, &
742 32 : nencode=nencode)
743 :
744 : ! because the atomic_halos and the neighbor_list_set used to
745 : ! generate the sparse structure of the DBCSR matrix do not
746 : ! necessarily have to coincide, we must check of the blocks in
747 : ! tasks_send (these should be local to the processor) do indeed
748 : ! exist in the DBCSR matrix, if not, then we need to prune these
749 : ! out of the task list
750 :
751 32 : counter = 0
752 4640 : DO itask = 1, ntasks_send
753 4608 : pair = tasks_send(TASK_PAIR, itask)
754 4608 : CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
755 : ! check if block exists in DBCSR matrix
756 : CALL dbcsr_get_block_p(matrix=dbcsr_mat, row=iatom_global, &
757 : col=jatom_global, block=mat_block, &
758 4608 : found=found)
759 4640 : IF (found) THEN
760 4608 : counter = counter + 1
761 : ! we can do this here, because essencially we are inspecting
762 : ! the send tasks one by one, and then omit ones which the
763 : ! block is not found in the DBCSR matrix. itask is always
764 : ! .GE. counter
765 23040 : tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
766 : END IF
767 : END DO
768 : ! the new send task list should have size counter. counter
769 : ! .LE. the old ntasks_send, thus the task list does not really
770 : ! need to be reallocated (as it is just a temporary array), and
771 : ! the useful data will cutoff at counter, and the rest of the
772 : ! array will just be garbage
773 32 : ntasks_send = counter
774 :
775 : ! tasks_send is set through the pointer already
776 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
777 32 : ntasks=ntasks_send)
778 :
779 : ! now, re-distribute the new send tasks list to other processors
780 : ! to build the updated recv tasks list
781 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
782 32 : para_env)
783 :
784 : ! task lists are now complete, now construct the atom_pairs_send
785 : ! and atom_pairs_recv from the tasks lists
786 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
787 : atom_pairs=atom_pairs_send, &
788 : natoms_encode=nencode, &
789 32 : send_or_recv="send")
790 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
791 : atom_pairs=atom_pairs_recv, &
792 : natoms_encode=nencode, &
793 32 : send_or_recv="recv")
794 :
795 : ! cleanup
796 32 : CALL fb_com_tasks_release(com_tasks_recv)
797 32 : CALL fb_com_tasks_release(com_tasks_send)
798 :
799 32 : CALL timestop(handle)
800 :
801 96 : END SUBROUTINE fb_atmatrix_generate_com_pairs_2
802 :
803 : END MODULE qs_fb_atomic_matrix_methods
|