Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : MODULE qs_fb_filter_matrix_methods
9 :
10 : USE atomic_kind_types, ONLY: atomic_kind_type,&
11 : get_atomic_kind
12 : USE cp_dbcsr_api, ONLY: dbcsr_create,&
13 : dbcsr_distribution_type,&
14 : dbcsr_finalize,&
15 : dbcsr_get_info,&
16 : dbcsr_get_stored_coordinates,&
17 : dbcsr_put_block,&
18 : dbcsr_type,&
19 : dbcsr_type_no_symmetry
20 : USE fermi_utils, ONLY: Fermi,&
21 : FermiFixed
22 : USE kinds, ONLY: default_string_length,&
23 : dp,&
24 : int_8
25 : USE message_passing, ONLY: mp_para_env_type
26 : USE particle_types, ONLY: particle_type
27 : USE qs_fb_atomic_halo_types, ONLY: fb_atomic_halo_create,&
28 : fb_atomic_halo_get,&
29 : fb_atomic_halo_list_get,&
30 : fb_atomic_halo_list_obj,&
31 : fb_atomic_halo_nullify,&
32 : fb_atomic_halo_obj,&
33 : fb_atomic_halo_release,&
34 : fb_atomic_halo_set
35 : USE qs_fb_atomic_matrix_methods, ONLY: fb_atmatrix_calc_size,&
36 : fb_atmatrix_construct,&
37 : fb_atmatrix_construct_2,&
38 : fb_atmatrix_generate_com_pairs_2
39 : USE qs_fb_com_tasks_types, ONLY: &
40 : TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
41 : fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
42 : fb_com_atom_pairs_distribute_blks, fb_com_atom_pairs_gather_blks, fb_com_atom_pairs_get, &
43 : fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, fb_com_atom_pairs_nullify, &
44 : fb_com_atom_pairs_obj, fb_com_atom_pairs_release, fb_com_tasks_build_atom_pairs, &
45 : fb_com_tasks_create, fb_com_tasks_encode_pair, fb_com_tasks_nullify, fb_com_tasks_obj, &
46 : fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
47 : USE qs_fb_matrix_data_types, ONLY: fb_matrix_data_add,&
48 : fb_matrix_data_create,&
49 : fb_matrix_data_has_data,&
50 : fb_matrix_data_nullify,&
51 : fb_matrix_data_obj,&
52 : fb_matrix_data_release
53 : USE qs_fb_trial_fns_types, ONLY: fb_trial_fns_get,&
54 : fb_trial_fns_obj
55 : USE string_utilities, ONLY: compress,&
56 : uppercase
57 : #include "./base/base_uses.f90"
58 :
59 : IMPLICIT NONE
60 :
61 : PRIVATE
62 :
63 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_filter_matrix_methods'
64 :
65 : PUBLIC :: fb_fltrmat_build, &
66 : fb_fltrmat_build_2
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief Build the filter matrix, with MPI communications happening at each
72 : !> step. Less efficient on communication, but more efficient on
73 : !> memory usage (compared to fb_fltrmat_build_2)
74 : !> \param H_mat : DBCSR system KS matrix
75 : !> \param S_mat : DBCSR system overlap matrix
76 : !> \param atomic_halos : list of all local atomic halos, each halo gives
77 : !> one atomic matrix and contributes to one blk
78 : !> col to the filter matrix
79 : !> \param trial_fns : the trial functions to be used to shrink the
80 : !> size of the new "filtered" basis
81 : !> \param para_env : cp2k parallel environment
82 : !> \param particle_set : set of all particles in the system
83 : !> \param fermi_level : the fermi level used for defining the filter
84 : !> function, which is a Fermi-Dirac distribution
85 : !> function
86 : !> \param filter_temp : the filter temperature used for defining the
87 : !> filter function
88 : !> \param name : name given to the filter matrix
89 : !> \param filter_mat : DBCSR format filter matrix
90 : !> \param tolerance : anything less than tolerance is treated as zero
91 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
92 : ! **************************************************************************************************
93 64 : SUBROUTINE fb_fltrmat_build(H_mat, &
94 : S_mat, &
95 : atomic_halos, &
96 : trial_fns, &
97 : para_env, &
98 : particle_set, &
99 : fermi_level, &
100 : filter_temp, &
101 : name, &
102 : filter_mat, &
103 : tolerance)
104 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
105 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
106 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
107 : TYPE(mp_para_env_type), POINTER :: para_env
108 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
109 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
110 : CHARACTER(LEN=*), INTENT(IN) :: name
111 : TYPE(dbcsr_type), POINTER :: filter_mat
112 : REAL(KIND=dp), INTENT(IN) :: tolerance
113 :
114 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build'
115 :
116 : CHARACTER(LEN=32) :: symmetry_string
117 : CHARACTER(LEN=default_string_length) :: name_string
118 : INTEGER :: handle, iblkcol, ihalo, ikind, &
119 : max_nhalos, nblkcols_total, nhalos
120 64 : INTEGER, DIMENSION(:), POINTER :: col_blk_size, dummy_halo_atoms, ntfns, &
121 64 : row_blk_size
122 : LOGICAL :: send_data_only
123 : TYPE(atomic_kind_type), POINTER :: atomic_kind
124 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
125 : TYPE(fb_atomic_halo_obj) :: dummy_atomic_halo
126 64 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
127 :
128 64 : CALL timeset(routineN, handle)
129 :
130 64 : NULLIFY (halos, atomic_kind, ntfns, dummy_halo_atoms, row_blk_size, col_blk_size)
131 64 : CALL fb_atomic_halo_nullify(dummy_atomic_halo)
132 :
133 : ! filter_mat must be of a dissassociated status (i.e. brand new)
134 64 : CPASSERT(.NOT. ASSOCIATED(filter_mat))
135 :
136 : ! get trial function information
137 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
138 64 : nfunctions=ntfns)
139 :
140 : ! calculate the row_blk_size and col_blk_size arrays for
141 : ! constructing the filter matrix in DBCSR format
142 : ! row_blk_size for the filter matrix is the same as H or S
143 : CALL dbcsr_get_info(H_mat, &
144 : nblkcols_total=nblkcols_total, &
145 : row_blk_size=row_blk_size, &
146 64 : distribution=dbcsr_dist)
147 192 : ALLOCATE (col_blk_size(nblkcols_total))
148 576 : col_blk_size = 0
149 576 : DO iblkcol = 1, nblkcols_total
150 512 : atomic_kind => particle_set(iblkcol)%atomic_kind
151 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
152 512 : kind_number=ikind)
153 576 : col_blk_size(iblkcol) = ntfns(ikind)
154 : END DO
155 : ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
156 64 : name_string = name
157 64 : CALL compress(name_string)
158 64 : CALL uppercase(name_string)
159 : ! the filter matrix is non-square and is always non-symmetric
160 64 : symmetry_string = dbcsr_type_no_symmetry
161 : ! create empty filter matrix
162 64 : ALLOCATE (filter_mat)
163 : CALL dbcsr_create(matrix=filter_mat, &
164 : name=name_string, &
165 : dist=dbcsr_dist, &
166 : matrix_type=symmetry_string, &
167 : row_blk_size=row_blk_size, &
168 : col_blk_size=col_blk_size, &
169 64 : nze=0)
170 64 : DEALLOCATE (col_blk_size)
171 :
172 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
173 : nhalos=nhalos, &
174 : max_nhalos=max_nhalos, &
175 64 : halos=halos)
176 :
177 : ! create dummy empty atomic halo
178 64 : CALL fb_atomic_halo_create(dummy_atomic_halo)
179 64 : ALLOCATE (dummy_halo_atoms(0))
180 : CALL fb_atomic_halo_set(atomic_halo=dummy_atomic_halo, &
181 : owner_atom=0, &
182 : owner_id_in_halo=0, &
183 : natoms=0, &
184 : halo_atoms=dummy_halo_atoms, &
185 : nelectrons=0, &
186 64 : sorted=.TRUE.)
187 :
188 64 : send_data_only = .FALSE.
189 :
190 320 : DO ihalo = 1, max_nhalos
191 256 : IF (ihalo > nhalos) THEN
192 : send_data_only = .TRUE.
193 : END IF
194 : ! construct the filter matrix block by block
195 320 : IF (send_data_only) THEN
196 : CALL fb_fltrmat_add_blkcol(H_mat, &
197 : S_mat, &
198 : dummy_atomic_halo, &
199 : trial_fns, &
200 : para_env, &
201 : particle_set, &
202 : fermi_level, &
203 : filter_temp, &
204 : filter_mat, &
205 0 : tolerance)
206 : ELSE
207 : CALL fb_fltrmat_add_blkcol(H_mat, &
208 : S_mat, &
209 : halos(ihalo), &
210 : trial_fns, &
211 : para_env, &
212 : particle_set, &
213 : fermi_level, &
214 : filter_temp, &
215 : filter_mat, &
216 256 : tolerance)
217 : END IF ! send_data_only
218 : END DO
219 :
220 : ! finalise the filter matrix
221 64 : CALL dbcsr_finalize(filter_mat)
222 :
223 : ! cleanup
224 64 : CALL fb_atomic_halo_release(dummy_atomic_halo)
225 :
226 64 : CALL timestop(handle)
227 :
228 192 : END SUBROUTINE fb_fltrmat_build
229 :
230 : ! **************************************************************************************************
231 : !> \brief Build the filter matrix, with MPI communications grouped together.
232 : !> More effcient on communication, less efficient on memory (compared
233 : !> to fb_fltrmat_build)
234 : !> \param H_mat : DBCSR system KS matrix
235 : !> \param S_mat : DBCSR system overlap matrix
236 : !> \param atomic_halos : list of all local atomic halos, each halo gives
237 : !> one atomic matrix and contributes to one blk
238 : !> col to the filter matrix
239 : !> \param trial_fns : the trial functions to be used to shrink the
240 : !> size of the new "filtered" basis
241 : !> \param para_env : cp2k parallel environment
242 : !> \param particle_set : set of all particles in the system
243 : !> \param fermi_level : the fermi level used for defining the filter
244 : !> function, which is a Fermi-Dirac distribution
245 : !> function
246 : !> \param filter_temp : the filter temperature used for defining the
247 : !> filter function
248 : !> \param name : name given to the filter matrix
249 : !> \param filter_mat : DBCSR format filter matrix
250 : !> \param tolerance : anything less than tolerance is treated as zero
251 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
252 : ! **************************************************************************************************
253 16 : SUBROUTINE fb_fltrmat_build_2(H_mat, &
254 : S_mat, &
255 : atomic_halos, &
256 : trial_fns, &
257 : para_env, &
258 : particle_set, &
259 : fermi_level, &
260 : filter_temp, &
261 : name, &
262 : filter_mat, &
263 : tolerance)
264 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
265 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
266 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
267 : TYPE(mp_para_env_type), POINTER :: para_env
268 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
269 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
270 : CHARACTER(LEN=*), INTENT(IN) :: name
271 : TYPE(dbcsr_type), POINTER :: filter_mat
272 : REAL(KIND=dp), INTENT(IN) :: tolerance
273 :
274 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_2'
275 :
276 : CHARACTER(LEN=default_string_length) :: name_string
277 : INTEGER :: handle, iblkcol, ihalo, ikind, &
278 : natoms_global, natoms_in_halo, &
279 : nblkcols_total, nblks_recv, nhalos, &
280 : nmax
281 16 : INTEGER, DIMENSION(:), POINTER :: col_blk_size, ntfns, row_blk_size
282 : LOGICAL :: check_ok
283 : TYPE(atomic_kind_type), POINTER :: atomic_kind
284 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
285 16 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
286 : TYPE(fb_com_atom_pairs_obj) :: atmatrix_blks_recv, atmatrix_blks_send, &
287 : filter_mat_blks_recv, &
288 : filter_mat_blks_send
289 : TYPE(fb_matrix_data_obj) :: filter_mat_data, H_mat_data, S_mat_data
290 :
291 16 : CALL timeset(routineN, handle)
292 :
293 16 : NULLIFY (halos, atomic_kind, row_blk_size, col_blk_size, ntfns)
294 :
295 : ! filter_mat must be of a dissassociated status (i.e. brand new)
296 16 : check_ok = .NOT. ASSOCIATED(filter_mat)
297 16 : CPASSERT(check_ok)
298 :
299 : ! get total number of atoms
300 16 : natoms_global = SIZE(particle_set)
301 :
302 : ! get trial function information
303 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
304 16 : nfunctions=ntfns)
305 :
306 : ! calculate the row_blk_size and col_blk_size arrays for
307 : ! constructing the filter matrix in DBCSR format
308 : ! row_blk_size for the filter matrix is the same as H or S
309 : CALL dbcsr_get_info(H_mat, &
310 : nblkcols_total=nblkcols_total, &
311 : row_blk_size=row_blk_size, &
312 16 : distribution=dbcsr_dist)
313 48 : ALLOCATE (col_blk_size(nblkcols_total))
314 144 : col_blk_size = 0
315 144 : DO iblkcol = 1, nblkcols_total
316 128 : atomic_kind => particle_set(iblkcol)%atomic_kind
317 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
318 128 : kind_number=ikind)
319 144 : col_blk_size(iblkcol) = ntfns(ikind)
320 : END DO
321 : ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
322 16 : name_string = name
323 16 : CALL compress(name_string)
324 16 : CALL uppercase(name_string)
325 : ! create empty filter matrix (it is always non-symmetric as it is non-square)
326 16 : ALLOCATE (filter_mat)
327 : CALL dbcsr_create(matrix=filter_mat, &
328 : name=name_string, &
329 : dist=dbcsr_dist, &
330 : matrix_type=dbcsr_type_no_symmetry, &
331 : row_blk_size=row_blk_size, &
332 : col_blk_size=col_blk_size, &
333 16 : nze=0)
334 16 : DEALLOCATE (col_blk_size)
335 :
336 : ! get all the blocks required for constructing atomic matrics, and
337 : ! store it in a fb_matrix_data object
338 16 : CALL fb_matrix_data_nullify(H_mat_data)
339 16 : CALL fb_matrix_data_nullify(S_mat_data)
340 16 : CALL fb_com_atom_pairs_nullify(atmatrix_blks_send)
341 16 : CALL fb_com_atom_pairs_nullify(atmatrix_blks_recv)
342 16 : CALL fb_com_atom_pairs_create(atmatrix_blks_send)
343 16 : CALL fb_com_atom_pairs_create(atmatrix_blks_recv)
344 : ! H matrix
345 : CALL fb_atmatrix_generate_com_pairs_2(H_mat, &
346 : atomic_halos, &
347 : para_env, &
348 : atmatrix_blks_send, &
349 16 : atmatrix_blks_recv)
350 : CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
351 16 : npairs=nblks_recv)
352 : CALL fb_matrix_data_create(H_mat_data, &
353 : nblks_recv, &
354 16 : natoms_global)
355 : CALL fb_com_atom_pairs_gather_blks(H_mat, &
356 : atmatrix_blks_send, &
357 : atmatrix_blks_recv, &
358 : para_env, &
359 16 : H_mat_data)
360 : ! S matrix
361 : CALL fb_atmatrix_generate_com_pairs_2(S_mat, &
362 : atomic_halos, &
363 : para_env, &
364 : atmatrix_blks_send, &
365 16 : atmatrix_blks_recv)
366 : CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
367 16 : npairs=nblks_recv)
368 : CALL fb_matrix_data_create(S_mat_data, &
369 : nblks_recv, &
370 16 : natoms_global)
371 : CALL fb_com_atom_pairs_gather_blks(S_mat, &
372 : atmatrix_blks_send, &
373 : atmatrix_blks_recv, &
374 : para_env, &
375 16 : S_mat_data)
376 : ! cleanup
377 16 : CALL fb_com_atom_pairs_release(atmatrix_blks_send)
378 16 : CALL fb_com_atom_pairs_release(atmatrix_blks_recv)
379 :
380 : ! make filter matrix blocks one by one and store in an
381 : ! matrix_data_obj
382 16 : CALL fb_matrix_data_nullify(filter_mat_data)
383 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
384 : nhalos=nhalos, &
385 16 : halos=halos)
386 16 : nmax = 0
387 80 : DO ihalo = 1, nhalos
388 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
389 64 : natoms=natoms_in_halo)
390 80 : nmax = nmax + natoms_in_halo
391 : END DO
392 : CALL fb_matrix_data_create(filter_mat_data, &
393 : nmax, &
394 16 : natoms_global)
395 80 : DO ihalo = 1, nhalos
396 : CALL fb_fltrmat_add_blkcol_2(H_mat, &
397 : S_mat, &
398 : H_mat_data, &
399 : S_mat_data, &
400 : halos(ihalo), &
401 : trial_fns, &
402 : particle_set, &
403 : fermi_level, &
404 : filter_temp, &
405 : filter_mat_data, &
406 80 : tolerance)
407 : END DO
408 : ! clean up
409 16 : CALL fb_matrix_data_release(H_mat_data)
410 16 : CALL fb_matrix_data_release(S_mat_data)
411 :
412 : ! distribute the relevant blocks from the matrix_data_obj to DBCSR
413 : ! filter matrix
414 16 : CALL fb_com_atom_pairs_nullify(filter_mat_blks_send)
415 16 : CALL fb_com_atom_pairs_nullify(filter_mat_blks_recv)
416 16 : CALL fb_com_atom_pairs_create(filter_mat_blks_send)
417 16 : CALL fb_com_atom_pairs_create(filter_mat_blks_recv)
418 : CALL fb_fltrmat_generate_com_pairs_2(filter_mat, &
419 : atomic_halos, &
420 : para_env, &
421 : filter_mat_blks_send, &
422 16 : filter_mat_blks_recv)
423 : CALL fb_com_atom_pairs_distribute_blks(filter_mat_data, &
424 : filter_mat_blks_send, &
425 : filter_mat_blks_recv, &
426 : para_env, &
427 16 : filter_mat)
428 : ! cleanup
429 16 : CALL fb_com_atom_pairs_release(filter_mat_blks_send)
430 16 : CALL fb_com_atom_pairs_release(filter_mat_blks_recv)
431 16 : CALL fb_matrix_data_release(filter_mat_data)
432 :
433 : ! finalise matrix
434 16 : CALL dbcsr_finalize(filter_mat)
435 :
436 16 : CALL timestop(handle)
437 :
438 96 : END SUBROUTINE fb_fltrmat_build_2
439 :
440 : ! **************************************************************************************************
441 : !> \brief Add a computed blocks in one column to the filter matrix. This
442 : !> version is used by fb_fltrmat_build, for the case where MPI
443 : !> communications are done at each step
444 : !> It does not finalise the filter matrix
445 : !> \param H_mat : DBCSR system KS matrix
446 : !> \param S_mat : DBCSR system overlap matrix
447 : !> \param atomic_halo : the halo that contributes to the blk
448 : !> col of the filter matrix
449 : !> \param trial_fns ...
450 : !> \param para_env : cp2k parallel environment
451 : !> \param particle_set : set of all particles in the system
452 : !> \param fermi_level : the fermi level used for defining the filter
453 : !> function, which is a Fermi-Dirac distribution
454 : !> function
455 : !> \param filter_temp : the filter temperature used for defining the
456 : !> filter function
457 : !> \param filter_mat : DBCSR format filter matrix
458 : !> \param tolerance : anything smaller than tolerance is treated as zero
459 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
460 : ! **************************************************************************************************
461 256 : SUBROUTINE fb_fltrmat_add_blkcol(H_mat, &
462 : S_mat, &
463 : atomic_halo, &
464 : trial_fns, &
465 : para_env, &
466 : particle_set, &
467 : fermi_level, &
468 : filter_temp, &
469 : filter_mat, &
470 : tolerance)
471 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
472 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
473 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
474 : TYPE(mp_para_env_type), POINTER :: para_env
475 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
476 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
477 : TYPE(dbcsr_type), POINTER :: filter_mat
478 : REAL(KIND=dp), INTENT(IN) :: tolerance
479 :
480 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol'
481 :
482 : INTEGER :: handle, handle_mpi, iatom_global, iatom_in_halo, ind, ipair, ipe, itrial, &
483 : jatom_global, jatom_in_halo, jkind, natoms_global, natoms_in_halo, ncols_atmatrix, &
484 : ncols_blk, nrows_atmatrix, nrows_blk, numprocs, pe, recv_encode, send_encode, stat
485 256 : INTEGER(KIND=int_8), DIMENSION(:), POINTER :: pairs_recv, pairs_send
486 256 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
487 256 : atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data, ind_in_halo, &
488 256 : recv_disps, recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
489 256 : send_pair_disps, send_sizes
490 256 : INTEGER, DIMENSION(:), POINTER :: halo_atoms, ntfns, row_block_size_data
491 256 : INTEGER, DIMENSION(:, :), POINTER :: tfns
492 256 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
493 256 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_filter_mat, atomic_H, atomic_S
494 : TYPE(atomic_kind_type), POINTER :: atomic_kind
495 : TYPE(fb_com_atom_pairs_obj) :: com_pairs_recv, com_pairs_send
496 :
497 256 : CALL timeset(routineN, handle)
498 :
499 256 : NULLIFY (atomic_kind, halo_atoms, ntfns, pairs_send, pairs_recv, &
500 256 : row_block_size_data, tfns)
501 256 : CALL fb_com_atom_pairs_nullify(com_pairs_send)
502 256 : CALL fb_com_atom_pairs_nullify(com_pairs_recv)
503 :
504 : ! ----------------------------------------------------------------------
505 : ! Get communication buffers ready
506 : ! ----------------------------------------------------------------------
507 :
508 : ! generate send and recv atom pairs
509 256 : CALL fb_com_atom_pairs_create(com_pairs_send)
510 256 : CALL fb_com_atom_pairs_create(com_pairs_recv)
511 : CALL fb_fltrmat_generate_com_pairs(filter_mat, &
512 : atomic_halo, &
513 : para_env, &
514 : com_pairs_send, &
515 256 : com_pairs_recv)
516 : CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_send, &
517 : natoms_encode=send_encode, &
518 256 : pairs=pairs_send)
519 : CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_recv, &
520 : natoms_encode=recv_encode, &
521 256 : pairs=pairs_recv)
522 :
523 : ! get para_env info
524 256 : numprocs = para_env%num_pe
525 : ! me = para_env%mepos + 1 ! my process id, starting counting from 1
526 :
527 : ! obtain trail function information
528 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
529 : nfunctions=ntfns, &
530 256 : functions=tfns)
531 :
532 : ! obtain row and col block size data for filter matrix
533 256 : CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
534 256 : natoms_global = SIZE(particle_set)
535 768 : ALLOCATE (col_block_size_data(natoms_global))
536 2304 : DO jatom_global = 1, natoms_global
537 2048 : atomic_kind => particle_set(jatom_global)%atomic_kind
538 2048 : CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
539 2304 : col_block_size_data(jatom_global) = ntfns(jkind)
540 : END DO
541 :
542 : ! allocate temporary arrays for send
543 768 : ALLOCATE (send_sizes(numprocs))
544 512 : ALLOCATE (send_disps(numprocs))
545 512 : ALLOCATE (send_pair_count(numprocs))
546 512 : ALLOCATE (send_pair_disps(numprocs))
547 : ! setup send buffer sizes
548 : CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_send, &
549 : numprocs, &
550 : row_block_size_data, &
551 : col_block_size_data, &
552 : send_sizes, &
553 : send_disps, &
554 : send_pair_count, &
555 256 : send_pair_disps)
556 : ! allocate send buffer
557 1280 : ALLOCATE (send_buf(SUM(send_sizes)))
558 :
559 : ! allocate temporary array for recv
560 512 : ALLOCATE (recv_sizes(numprocs))
561 512 : ALLOCATE (recv_disps(numprocs))
562 512 : ALLOCATE (recv_pair_count(numprocs))
563 512 : ALLOCATE (recv_pair_disps(numprocs))
564 : ! setup recv buffer sizes
565 : CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_recv, &
566 : numprocs, &
567 : row_block_size_data, &
568 : col_block_size_data, &
569 : recv_sizes, &
570 : recv_disps, &
571 : recv_pair_count, &
572 256 : recv_pair_disps)
573 : ! allocate recv buffer
574 1280 : ALLOCATE (recv_buf(SUM(recv_sizes)))
575 :
576 : ! ----------------------------------------------------------------------
577 : ! Construct atomic filter matrix for this atomic_halo
578 : ! ----------------------------------------------------------------------
579 :
580 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
581 : natoms=natoms_in_halo, &
582 256 : halo_atoms=halo_atoms)
583 :
584 : ! construct atomic matrix for H for atomic_halo
585 : ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
586 : atomic_H_blk_col_start(natoms_in_halo + 1), &
587 1024 : STAT=stat)
588 256 : CPASSERT(stat == 0)
589 : CALL fb_atmatrix_calc_size(H_mat, &
590 : atomic_halo, &
591 : nrows_atmatrix, &
592 : ncols_atmatrix, &
593 : atomic_H_blk_row_start, &
594 256 : atomic_H_blk_col_start)
595 :
596 1024 : ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
597 : CALL fb_atmatrix_construct(H_mat, &
598 : atomic_halo, &
599 : para_env, &
600 : atomic_H, &
601 : atomic_H_blk_row_start, &
602 256 : atomic_H_blk_col_start)
603 :
604 : ! construct atomic matrix for S for atomic_halo
605 : ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
606 : atomic_S_blk_col_start(natoms_in_halo + 1), &
607 1024 : STAT=stat)
608 256 : CPASSERT(stat == 0)
609 : CALL fb_atmatrix_calc_size(S_mat, &
610 : atomic_halo, &
611 : nrows_atmatrix, &
612 : ncols_atmatrix, &
613 : atomic_S_blk_row_start, &
614 256 : atomic_S_blk_col_start)
615 1024 : ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
616 : CALL fb_atmatrix_construct(S_mat, &
617 : atomic_halo, &
618 : para_env, &
619 : atomic_S, &
620 : atomic_S_blk_row_start, &
621 256 : atomic_S_blk_col_start)
622 :
623 : ! construct the atomic filter matrix
624 768 : ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
625 : ! calculate atomic filter matrix only if it is non-zero sized
626 256 : IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
627 : CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
628 : atomic_S, &
629 : fermi_level, &
630 : filter_temp, &
631 : atomic_filter_mat, &
632 256 : tolerance)
633 : END IF
634 :
635 : ! ----------------------------------------------------------------------
636 : ! Construct filter matrix blocks and add to the correct locations
637 : ! in send_buffer
638 : ! ----------------------------------------------------------------------
639 :
640 : ! preconstruct iatom_global to iatom_in_halo map
641 512 : ALLOCATE (ind_in_halo(natoms_global))
642 2304 : ind_in_halo = 0
643 2304 : DO iatom_in_halo = 1, natoms_in_halo
644 2048 : iatom_global = halo_atoms(iatom_in_halo)
645 2304 : ind_in_halo(iatom_global) = iatom_in_halo
646 : END DO
647 :
648 : ! initialise send buffer
649 106752 : IF (SIZE(send_buf) > 0) send_buf = 0.0_dp
650 : ! assign values
651 768 : DO ipe = 1, numprocs
652 512 : send_sizes(ipe) = 0
653 2816 : DO ipair = 1, send_pair_count(ipe)
654 : CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
655 : pe, iatom_global, jatom_global, &
656 2048 : send_encode)
657 2048 : iatom_in_halo = ind_in_halo(iatom_global)
658 2048 : CPASSERT(iatom_in_halo > 0)
659 2048 : jatom_in_halo = ind_in_halo(jatom_global)
660 2048 : CPASSERT(jatom_in_halo > 0)
661 2048 : atomic_kind => particle_set(jatom_global)%atomic_kind
662 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
663 2048 : kind_number=jkind)
664 2048 : nrows_blk = row_block_size_data(iatom_global)
665 2048 : ncols_blk = ntfns(jkind)
666 :
667 : ! do it column-wise one trial function at a time
668 10240 : DO itrial = 1, ntfns(jkind)
669 8192 : ind = send_disps(ipe) + send_sizes(ipe) + (itrial - 1)*nrows_blk
670 : CALL dgemv("N", &
671 : nrows_blk, &
672 : ncols_atmatrix, &
673 : 1.0_dp, &
674 : atomic_filter_mat( &
675 : atomic_H_blk_row_start(iatom_in_halo): &
676 : atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
677 : 1:ncols_atmatrix &
678 : ), &
679 : nrows_blk, &
680 : atomic_S( &
681 : 1:nrows_atmatrix, &
682 : atomic_S_blk_col_start(jatom_in_halo) + &
683 : tfns(itrial, jkind) - 1 &
684 : ), &
685 : 1, &
686 : 0.0_dp, &
687 : send_buf(ind + 1:ind + nrows_blk), &
688 23865344 : 1)
689 : END DO ! itrial
690 6656 : send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
691 : END DO ! ipair
692 : END DO ! ipe
693 :
694 256 : DEALLOCATE (atomic_H)
695 256 : DEALLOCATE (atomic_H_blk_row_start)
696 256 : DEALLOCATE (atomic_S)
697 256 : DEALLOCATE (atomic_S_blk_row_start)
698 256 : DEALLOCATE (atomic_filter_mat)
699 256 : DEALLOCATE (ind_in_halo)
700 :
701 : ! ----------------------------------------------------------------------
702 : ! Do communication
703 : ! ----------------------------------------------------------------------
704 :
705 256 : CALL timeset("fb_fltrmat_add_blkcol_mpi", handle_mpi)
706 :
707 : CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
708 256 : recv_buf, recv_sizes, recv_disps)
709 :
710 256 : CALL timestop(handle_mpi)
711 :
712 256 : DEALLOCATE (send_buf)
713 256 : DEALLOCATE (send_sizes)
714 256 : DEALLOCATE (send_disps)
715 256 : DEALLOCATE (send_pair_count)
716 256 : DEALLOCATE (send_pair_disps)
717 :
718 : ! ----------------------------------------------------------------------
719 : ! Unpack the recv buffer and add the blocks to correct parts of
720 : ! the DBCSR filter matrix
721 : ! ----------------------------------------------------------------------
722 :
723 768 : DO ipe = 1, numprocs
724 512 : recv_sizes(ipe) = 0
725 2816 : DO ipair = 1, recv_pair_count(ipe)
726 : CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
727 : pe, iatom_global, jatom_global, &
728 2048 : recv_encode)
729 2048 : nrows_blk = row_block_size_data(iatom_global)
730 2048 : ncols_blk = col_block_size_data(jatom_global)
731 2048 : ind = recv_disps(ipe) + recv_sizes(ipe)
732 : CALL dbcsr_put_block(filter_mat, &
733 : iatom_global, jatom_global, &
734 2048 : recv_buf((ind + 1):(ind + nrows_blk*ncols_blk)))
735 4608 : recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
736 : END DO ! ipair
737 : END DO ! ipe
738 :
739 : ! cleanup rest of the temporary arrays
740 256 : DEALLOCATE (recv_buf)
741 256 : DEALLOCATE (recv_sizes)
742 256 : DEALLOCATE (recv_pair_count)
743 256 : DEALLOCATE (recv_pair_disps)
744 :
745 256 : CALL fb_com_atom_pairs_release(com_pairs_send)
746 256 : CALL fb_com_atom_pairs_release(com_pairs_recv)
747 :
748 : ! cannot finalise the matrix until all blocks has been added
749 :
750 256 : CALL timestop(handle)
751 :
752 1792 : END SUBROUTINE fb_fltrmat_add_blkcol
753 :
754 : ! **************************************************************************************************
755 : !> \brief Computed blocks in one filter matrix column. This version is used by
756 : !> fb_fltrmat_build_2, where MPI communication is done collectively
757 : !> \param H_mat : DBCSR system KS matrix
758 : !> \param S_mat : DBCSR system overlap matrix
759 : !> \param H_mat_data : local storage of the relevant H_mat matrix blocks
760 : !> \param S_mat_data : local storage of the relevant S_mat matrix blocks
761 : !> \param atomic_halo : the halo that contributes to the blk
762 : !> col of the filter matrix
763 : !> \param trial_fns : trial functions data
764 : !> \param particle_set : set of all particles in the system
765 : !> \param fermi_level : the fermi level used for defining the filter
766 : !> function, which is a Fermi-Dirac distribution
767 : !> function
768 : !> \param filter_temp : the filter temperature used for defining the
769 : !> filter function
770 : !> \param filter_mat_data : local storage for the the computed filter matrix
771 : !> blocks
772 : !> \param tolerance : anything less than this is regarded as zero
773 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
774 : ! **************************************************************************************************
775 64 : SUBROUTINE fb_fltrmat_add_blkcol_2(H_mat, &
776 : S_mat, &
777 : H_mat_data, &
778 : S_mat_data, &
779 : atomic_halo, &
780 : trial_fns, &
781 : particle_set, &
782 : fermi_level, &
783 : filter_temp, &
784 : filter_mat_data, &
785 : tolerance)
786 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
787 : TYPE(fb_matrix_data_obj), INTENT(IN) :: H_mat_data, S_mat_data
788 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
789 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
790 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
791 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
792 : TYPE(fb_matrix_data_obj), INTENT(INOUT) :: filter_mat_data
793 : REAL(KIND=dp), INTENT(IN) :: tolerance
794 :
795 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol_2'
796 :
797 : INTEGER :: handle, iatom_global, iatom_in_halo, itrial, jatom_global, jatom_in_halo, jkind, &
798 : natoms_global, natoms_in_halo, ncols_atmatrix, ncols_blk, ncols_blk_max, nrows_atmatrix, &
799 : nrows_blk, nrows_blk_max, stat
800 64 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
801 64 : atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data
802 64 : INTEGER, DIMENSION(:), POINTER :: halo_atoms, ntfns, row_block_size_data
803 64 : INTEGER, DIMENSION(:, :), POINTER :: tfns
804 : LOGICAL :: check_ok
805 64 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_filter_mat, atomic_H, atomic_S, &
806 64 : mat_blk
807 : TYPE(atomic_kind_type), POINTER :: atomic_kind
808 :
809 64 : CALL timeset(routineN, handle)
810 :
811 64 : NULLIFY (atomic_kind, halo_atoms, ntfns, row_block_size_data, tfns)
812 :
813 64 : check_ok = fb_matrix_data_has_data(H_mat_data)
814 64 : CPASSERT(check_ok)
815 64 : check_ok = fb_matrix_data_has_data(S_mat_data)
816 64 : CPASSERT(check_ok)
817 :
818 : ! obtain trial function information
819 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
820 : nfunctions=ntfns, &
821 64 : functions=tfns)
822 :
823 : ! obtain row and col block size data for filter matrix
824 64 : CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
825 64 : natoms_global = SIZE(particle_set)
826 192 : ALLOCATE (col_block_size_data(natoms_global))
827 576 : DO jatom_global = 1, natoms_global
828 512 : atomic_kind => particle_set(jatom_global)%atomic_kind
829 512 : CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
830 576 : col_block_size_data(jatom_global) = ntfns(jkind)
831 : END DO
832 :
833 : ! ----------------------------------------------------------------------
834 : ! Construct atomic filter matrix for this atomic_halo
835 : ! ----------------------------------------------------------------------
836 :
837 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
838 : natoms=natoms_in_halo, &
839 64 : halo_atoms=halo_atoms)
840 :
841 : ! construct atomic matrix for H for atomic_halo
842 : ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
843 : atomic_H_blk_col_start(natoms_in_halo + 1), &
844 256 : STAT=stat)
845 64 : CPASSERT(stat == 0)
846 : CALL fb_atmatrix_calc_size(H_mat, &
847 : atomic_halo, &
848 : nrows_atmatrix, &
849 : ncols_atmatrix, &
850 : atomic_H_blk_row_start, &
851 64 : atomic_H_blk_col_start)
852 256 : ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
853 : CALL fb_atmatrix_construct_2(H_mat_data, &
854 : atomic_halo, &
855 : atomic_H, &
856 : atomic_H_blk_row_start, &
857 64 : atomic_H_blk_col_start)
858 :
859 : ! construct atomic matrix for S for atomic_halo
860 : ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
861 : atomic_S_blk_col_start(natoms_in_halo + 1), &
862 256 : STAT=stat)
863 64 : CPASSERT(stat == 0)
864 : CALL fb_atmatrix_calc_size(S_mat, &
865 : atomic_halo, &
866 : nrows_atmatrix, &
867 : ncols_atmatrix, &
868 : atomic_S_blk_row_start, &
869 64 : atomic_S_blk_col_start)
870 256 : ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
871 : CALL fb_atmatrix_construct_2(S_mat_data, &
872 : atomic_halo, &
873 : atomic_S, &
874 : atomic_S_blk_row_start, &
875 64 : atomic_S_blk_col_start)
876 :
877 : ! construct the atomic filter matrix
878 192 : ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
879 : ! calculate atomic filter matrix only if it is non-zero sized
880 64 : IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
881 : CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
882 : atomic_S, &
883 : fermi_level, &
884 : filter_temp, &
885 : atomic_filter_mat, &
886 64 : tolerance)
887 : END IF
888 :
889 : ! ----------------------------------------------------------------------
890 : ! Construct filter matrix block and add to filter_mat_data
891 : ! ----------------------------------------------------------------------
892 :
893 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
894 : owner_atom=jatom_global, &
895 64 : owner_id_in_halo=jatom_in_halo)
896 576 : nrows_blk_max = MAXVAL(row_block_size_data)
897 128 : ncols_blk_max = MAXVAL(ntfns)
898 256 : ALLOCATE (mat_blk(nrows_blk_max, ncols_blk_max))
899 3648 : mat_blk(:, :) = 0.0_dp
900 576 : DO iatom_in_halo = 1, natoms_in_halo
901 512 : iatom_global = halo_atoms(iatom_in_halo)
902 512 : atomic_kind => particle_set(jatom_global)%atomic_kind
903 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
904 512 : kind_number=jkind)
905 512 : nrows_blk = row_block_size_data(iatom_global)
906 512 : ncols_blk = ntfns(jkind)
907 :
908 : ! ALLOCATE(mat_blk(nrows_blk,ncols_blk) STAT=stat)
909 : ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
910 :
911 : ! do it column-wise one trial function at a time
912 2560 : DO itrial = 1, ntfns(jkind)
913 : CALL dgemv("N", &
914 : nrows_blk, &
915 : ncols_atmatrix, &
916 : 1.0_dp, &
917 : atomic_filter_mat( &
918 : atomic_H_blk_row_start(iatom_in_halo): &
919 : atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
920 : 1:ncols_atmatrix &
921 : ), &
922 : nrows_blk, &
923 : atomic_S( &
924 : 1:nrows_atmatrix, &
925 : atomic_S_blk_col_start(jatom_in_halo) + &
926 : tfns(itrial, jkind) - 1 &
927 : ), &
928 : 1, &
929 : 0.0_dp, &
930 : mat_blk( &
931 : 1:nrows_blk, &
932 : itrial), &
933 5966336 : 1)
934 : END DO ! itrial
935 : CALL fb_matrix_data_add(filter_mat_data, &
936 : iatom_global, &
937 : jatom_global, &
938 1088 : mat_blk(1:nrows_blk, 1:ncols_blk))
939 : ! DEALLOCATE(mat_blk, STAT=stat)
940 : ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
941 : END DO ! iatom_in_halo
942 64 : DEALLOCATE (mat_blk)
943 :
944 : ! clean up
945 64 : DEALLOCATE (atomic_H)
946 64 : DEALLOCATE (atomic_H_blk_row_start)
947 64 : DEALLOCATE (atomic_S)
948 64 : DEALLOCATE (atomic_S_blk_row_start)
949 64 : DEALLOCATE (atomic_filter_mat)
950 :
951 64 : CALL timestop(handle)
952 :
953 384 : END SUBROUTINE fb_fltrmat_add_blkcol_2
954 :
955 : ! **************************************************************************************************
956 : !> \brief generate the list of blocks (atom pairs) to be sent and received
957 : !> in order to construct the filter matrix for each atomic halo.
958 : !> This version is for use with fb_fltrmat_build, where MPI
959 : !> communications are done at each step
960 : !> \param filter_mat : DBCSR formatted filter matrix
961 : !> \param atomic_halo : the halo that contributes to a blk
962 : !> col of the filter matrix
963 : !> \param para_env : cp2k parallel environment
964 : !> \param atom_pairs_send : list of blocks to be sent
965 : !> \param atom_pairs_recv : list of blocks to be received
966 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
967 : ! **************************************************************************************************
968 256 : SUBROUTINE fb_fltrmat_generate_com_pairs(filter_mat, &
969 : atomic_halo, &
970 : para_env, &
971 : atom_pairs_send, &
972 : atom_pairs_recv)
973 : TYPE(dbcsr_type), POINTER :: filter_mat
974 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
975 : TYPE(mp_para_env_type), POINTER :: para_env
976 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
977 :
978 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs'
979 :
980 : INTEGER :: dest, handle, iatom_global, &
981 : iatom_in_halo, itask, jatom_global, &
982 : natoms_in_halo, nblkrows_total, &
983 : ntasks_send
984 256 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_send
985 256 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
986 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
987 :
988 256 : CALL timeset(routineN, handle)
989 :
990 256 : NULLIFY (tasks_send)
991 256 : CALL fb_com_tasks_nullify(com_tasks_send)
992 256 : CALL fb_com_tasks_nullify(com_tasks_recv)
993 :
994 : ! initialise atom_pairs_send and atom_pairs_recv
995 256 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
996 256 : CALL fb_com_atom_pairs_init(atom_pairs_send)
997 : ELSE
998 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
999 : END IF
1000 256 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
1001 256 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
1002 : ELSE
1003 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
1004 : END IF
1005 :
1006 : ! The total number of filter matrix blocks each processor is going
1007 : ! to construct equals to the total number of halo atoms in all of
1008 : ! the atomic halos local to the processor. The number of send
1009 : ! tasks will not exceed this. We do one halo (col) at a time, and
1010 : ! each call of this subroutine will only work on one filter matrix
1011 : ! col corresponding to atomic_halo.
1012 :
1013 : ! The col atom block index for each filter matrix block are the
1014 : ! owner atom of each halo. The row atom block index for each
1015 : ! filter matrix block corresponding to each col are the halo atoms
1016 : ! of the corresponding halos. Filter matrix is non-symmetric: it
1017 : ! is non-square, because the blocks themselves are non-square
1018 :
1019 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
1020 : owner_atom=jatom_global, &
1021 : natoms=natoms_in_halo, &
1022 256 : halo_atoms=halo_atoms)
1023 256 : ntasks_send = natoms_in_halo
1024 :
1025 : ! allocate send tasks
1026 768 : ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
1027 :
1028 : ! Get the total number of atoms, this can be obtained from the
1029 : ! total number of block rows in the DBCSR filter matrix. We
1030 : ! assumes that before calling this subroutine, the filter_mat has
1031 : ! already been created and initialised: i.e. using
1032 : ! dbcsr_create_new. Even if the matrix is at the moment empty,
1033 : ! the attribute nblkrows_total is already assigned from the dbcsr
1034 : ! distribution data
1035 : CALL dbcsr_get_info(filter_mat, &
1036 256 : nblkrows_total=nblkrows_total)
1037 :
1038 : ! source is always the local processor
1039 : ASSOCIATE (src => para_env%mepos)
1040 : ! construct send tasks
1041 256 : itask = 1
1042 2304 : DO iatom_in_halo = 1, natoms_in_halo
1043 2048 : iatom_global = halo_atoms(iatom_in_halo)
1044 : ! find where the constructed block of filter matrix belongs to
1045 : CALL dbcsr_get_stored_coordinates(filter_mat, &
1046 : iatom_global, &
1047 : jatom_global, &
1048 2048 : processor=dest)
1049 : ! create the send tasks
1050 2048 : tasks_send(TASK_DEST, itask) = dest
1051 2048 : tasks_send(TASK_SRC, itask) = src
1052 : CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
1053 : iatom_global, jatom_global, &
1054 2048 : nblkrows_total)
1055 : ! calculation of cost not implemented at the moment
1056 2048 : tasks_send(TASK_COST, itask) = 0
1057 4352 : itask = itask + 1
1058 : END DO ! iatom_in_halo
1059 : END ASSOCIATE
1060 :
1061 256 : CALL fb_com_tasks_create(com_tasks_recv)
1062 256 : CALL fb_com_tasks_create(com_tasks_send)
1063 :
1064 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
1065 : task_dim=TASK_N_RECORDS, &
1066 : ntasks=ntasks_send, &
1067 : nencode=nblkrows_total, &
1068 256 : tasks=tasks_send)
1069 :
1070 : ! generate the recv task list (tasks_recv) from the send task list
1071 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
1072 256 : para_env)
1073 :
1074 : ! task lists are now complete, now construct the atom_pairs_send
1075 : ! and atom_pairs_recv from the tasks lists
1076 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
1077 : atom_pairs=atom_pairs_send, &
1078 : natoms_encode=nblkrows_total, &
1079 256 : send_or_recv="send")
1080 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
1081 : atom_pairs=atom_pairs_recv, &
1082 : natoms_encode=nblkrows_total, &
1083 256 : send_or_recv="recv")
1084 :
1085 : ! cleanup
1086 256 : CALL fb_com_tasks_release(com_tasks_recv)
1087 256 : CALL fb_com_tasks_release(com_tasks_send)
1088 :
1089 256 : CALL timestop(handle)
1090 :
1091 768 : END SUBROUTINE fb_fltrmat_generate_com_pairs
1092 :
1093 : ! **************************************************************************************************
1094 : !> \brief generate the list of blocks (atom pairs) to be sent and received
1095 : !> in order to construct the filter matrix for each atomic halo.
1096 : !> This version is for use with fb_fltrmat_build_2, where MPI
1097 : !> communications are done collectively.
1098 : !> \param filter_mat : DBCSR formatted filter matrix
1099 : !> \param atomic_halos : set of all local atomic halos contributing to the
1100 : !> filter matrix
1101 : !> \param para_env : cp2k parallel environment
1102 : !> \param atom_pairs_send : list of blocks to be sent
1103 : !> \param atom_pairs_recv : list of blocks to be received
1104 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1105 : ! **************************************************************************************************
1106 16 : SUBROUTINE fb_fltrmat_generate_com_pairs_2(filter_mat, &
1107 : atomic_halos, &
1108 : para_env, &
1109 : atom_pairs_send, &
1110 : atom_pairs_recv)
1111 : TYPE(dbcsr_type), POINTER :: filter_mat
1112 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
1113 : TYPE(mp_para_env_type), POINTER :: para_env
1114 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
1115 :
1116 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs_2'
1117 :
1118 : INTEGER :: dest, handle, iatom_global, iatom_in_halo, iatom_stored, ihalo, itask, &
1119 : jatom_global, jatom_stored, natoms_in_halo, nblkrows_total, nhalos, ntasks_send
1120 16 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_send
1121 16 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
1122 : LOGICAL :: transpose
1123 16 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
1124 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
1125 :
1126 16 : CALL timeset(routineN, handle)
1127 :
1128 16 : NULLIFY (tasks_send)
1129 16 : CALL fb_com_tasks_nullify(com_tasks_send)
1130 16 : CALL fb_com_tasks_nullify(com_tasks_recv)
1131 :
1132 : ! initialise atom_pairs_send and atom_pairs_recv
1133 16 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
1134 16 : CALL fb_com_atom_pairs_init(atom_pairs_send)
1135 : ELSE
1136 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
1137 : END IF
1138 16 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
1139 16 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
1140 : ELSE
1141 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
1142 : END IF
1143 :
1144 : ! The col atom block index for each filter matrix block are the
1145 : ! owner atom of each halo. The row atom block index for each
1146 : ! filter matrix block corresponding to each col are the halo atoms
1147 : ! of the corresponding halos. Filter matrix is non-symmetric: it
1148 : ! is non-square, because the blocks themselves are non-square
1149 :
1150 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
1151 : nhalos=nhalos, &
1152 16 : halos=halos)
1153 :
1154 : ! estimate the maximum number of blocks (i.e. atom paris) to send
1155 16 : ntasks_send = 0
1156 80 : DO ihalo = 1, nhalos
1157 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
1158 64 : natoms=natoms_in_halo)
1159 80 : ntasks_send = ntasks_send + natoms_in_halo
1160 : END DO ! ihalo
1161 :
1162 : ! allocate send tasks
1163 48 : ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
1164 :
1165 : ! Get the total number of atoms. This can be obtained from the
1166 : ! total number of block rows in the DBCSR filter matrix. We
1167 : ! assumes that before calling this subroutine, the filter_mat has
1168 : ! already been created and initialised: i.e. using
1169 : ! dbcsr_create_new. Even if the matrix is at the moment empty,
1170 : ! the attribute nblkrows_total is already assigned from the dbcsr
1171 : ! distribution data
1172 : CALL dbcsr_get_info(filter_mat, &
1173 16 : nblkrows_total=nblkrows_total)
1174 :
1175 : ! source is always the local processor
1176 : ASSOCIATE (src => para_env%mepos)
1177 : ! construct send tasks
1178 16 : itask = 1
1179 80 : DO ihalo = 1, nhalos
1180 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
1181 : owner_atom=jatom_global, &
1182 : natoms=natoms_in_halo, &
1183 64 : halo_atoms=halo_atoms)
1184 592 : DO iatom_in_halo = 1, natoms_in_halo
1185 512 : iatom_global = halo_atoms(iatom_in_halo)
1186 512 : iatom_stored = iatom_global
1187 512 : jatom_stored = jatom_global
1188 512 : transpose = .FALSE.
1189 : ! find where the constructed block of filter matrix belongs to
1190 : CALL dbcsr_get_stored_coordinates(filter_mat, &
1191 : iatom_stored, &
1192 : jatom_stored, &
1193 512 : processor=dest)
1194 : ! create the send tasks
1195 512 : tasks_send(TASK_DEST, itask) = dest
1196 512 : tasks_send(TASK_SRC, itask) = src
1197 : CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
1198 : iatom_global, jatom_global, &
1199 512 : nblkrows_total)
1200 : ! calculation of cost not implemented at the moment
1201 512 : tasks_send(TASK_COST, itask) = 0
1202 1088 : itask = itask + 1
1203 : END DO ! iatom_in_halo
1204 : END DO ! ihalo
1205 : END ASSOCIATE
1206 :
1207 : ! get the actual number of tasks
1208 16 : ntasks_send = itask - 1
1209 :
1210 16 : CALL fb_com_tasks_create(com_tasks_send)
1211 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
1212 : task_dim=TASK_N_RECORDS, &
1213 : ntasks=ntasks_send, &
1214 : nencode=nblkrows_total, &
1215 16 : tasks=tasks_send)
1216 :
1217 : ! generate the recv task list (tasks_recv) from the send task list
1218 16 : CALL fb_com_tasks_create(com_tasks_recv)
1219 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
1220 16 : para_env)
1221 :
1222 : ! task lists are now complete, now construct the atom_pairs_send
1223 : ! and atom_pairs_recv from the tasks lists
1224 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
1225 : atom_pairs=atom_pairs_send, &
1226 : natoms_encode=nblkrows_total, &
1227 16 : send_or_recv="send")
1228 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
1229 : atom_pairs=atom_pairs_recv, &
1230 : natoms_encode=nblkrows_total, &
1231 16 : send_or_recv="recv")
1232 :
1233 : ! cleanup
1234 16 : CALL fb_com_tasks_release(com_tasks_recv)
1235 16 : CALL fb_com_tasks_release(com_tasks_send)
1236 :
1237 16 : CALL timestop(handle)
1238 :
1239 48 : END SUBROUTINE fb_fltrmat_generate_com_pairs_2
1240 :
1241 : ! **************************************************************************************************
1242 : !> \brief Build the atomic filter matrix for each atomic halo
1243 : !> \param atomic_H : atomic KS matrix
1244 : !> \param atomic_S : atomic overlap matrix
1245 : !> \param fermi_level : fermi level used to construct the Fermi-Dirac
1246 : !> filter function
1247 : !> \param filter_temp : temperature used to construct the Fermi-Dirac
1248 : !> filter function
1249 : !> \param atomic_filter_mat : the atomic filter matrix
1250 : !> \param tolerance : anything smaller than tolerance is treated as zero
1251 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1252 : ! **************************************************************************************************
1253 960 : SUBROUTINE fb_fltrmat_build_atomic_fltrmat(atomic_H, &
1254 320 : atomic_S, &
1255 : fermi_level, &
1256 : filter_temp, &
1257 320 : atomic_filter_mat, &
1258 : tolerance)
1259 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: atomic_H, atomic_S
1260 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
1261 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: atomic_filter_mat
1262 : REAL(KIND=dp), INTENT(IN) :: tolerance
1263 :
1264 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_atomic_fltrmat'
1265 :
1266 : INTEGER :: handle, handle_dgemm, handle_dsygv, ii, &
1267 : info, jj, mat_dim, work_array_size
1268 : LOGICAL :: check_ok
1269 320 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues, filter_function, work
1270 320 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_S_copy, eigenvectors, &
1271 320 : filtered_eigenvectors
1272 :
1273 320 : CALL timeset(routineN, handle)
1274 :
1275 : ! This subroutine assumes atomic_filter_mat is not zero size, in
1276 : ! other words, it really has to be constructed, instead of just
1277 : ! being a dummy
1278 :
1279 : check_ok = SIZE(atomic_filter_mat, 1) > 0 .AND. &
1280 320 : SIZE(atomic_filter_mat, 2) > 0
1281 0 : CPASSERT(check_ok)
1282 :
1283 : ! initialise
1284 3494720 : atomic_filter_mat = 0.0_dp
1285 320 : mat_dim = SIZE(atomic_H, 1)
1286 :
1287 : ! diagonalise using LAPACK
1288 960 : ALLOCATE (eigenvalues(mat_dim))
1289 : ! get optimal work array size
1290 320 : ALLOCATE (work(1))
1291 : ! dsygv will overwrite part of atomic_H and atomic_S, thus need to copy them
1292 1280 : ALLOCATE (atomic_S_copy(SIZE(atomic_S, 1), SIZE(atomic_S, 2)))
1293 3494720 : atomic_S_copy(:, :) = atomic_S(:, :)
1294 1280 : ALLOCATE (eigenvectors(SIZE(atomic_H, 1), SIZE(atomic_H, 2)))
1295 3494720 : eigenvectors(:, :) = atomic_H(:, :)
1296 :
1297 320 : CALL timeset("fb_atomic_filter_dsygv", handle_dsygv)
1298 :
1299 320 : info = 0
1300 : CALL dsygv(1, &
1301 : 'V', &
1302 : 'U', &
1303 : mat_dim, &
1304 : eigenvectors, &
1305 : mat_dim, &
1306 : atomic_S_copy, &
1307 : mat_dim, &
1308 : eigenvalues, &
1309 : work, &
1310 : -1, &
1311 320 : info)
1312 320 : work_array_size = NINT(work(1))
1313 : ! now allocate work array
1314 320 : DEALLOCATE (work)
1315 960 : ALLOCATE (work(work_array_size))
1316 1131840 : work = 0.0_dp
1317 : ! do calculation
1318 3494720 : atomic_S_copy(:, :) = atomic_S(:, :)
1319 3494720 : eigenvectors(:, :) = atomic_H(:, :)
1320 320 : info = 0
1321 : CALL dsygv(1, &
1322 : 'V', &
1323 : 'U', &
1324 : mat_dim, &
1325 : eigenvectors, &
1326 : mat_dim, &
1327 : atomic_S_copy, &
1328 : mat_dim, &
1329 : eigenvalues, &
1330 : work, &
1331 : work_array_size, &
1332 320 : info)
1333 : ! check if diagonalisation is successful
1334 320 : IF (info .NE. 0) THEN
1335 0 : WRITE (*, *) "DSYGV ERROR MESSAGE: ", info
1336 0 : CPABORT("DSYGV failed")
1337 : END IF
1338 :
1339 320 : CALL timestop(handle_dsygv)
1340 :
1341 320 : DEALLOCATE (work)
1342 320 : DEALLOCATE (atomic_S_copy)
1343 :
1344 : ! first get the filter function
1345 960 : ALLOCATE (filter_function(mat_dim))
1346 33600 : filter_function = 0.0_dp
1347 : CALL fb_fltrmat_fermi_dirac_mu(filter_function, &
1348 : eigenvalues, &
1349 : filter_temp, &
1350 320 : fermi_level)
1351 320 : DEALLOCATE (eigenvalues)
1352 :
1353 : ! atomic_H has the eigenvectors, construct the version of it
1354 : ! filtered through the filter function
1355 1280 : ALLOCATE (filtered_eigenvectors(mat_dim, mat_dim))
1356 33600 : DO jj = 1, mat_dim
1357 3494720 : DO ii = 1, mat_dim
1358 : filtered_eigenvectors(ii, jj) = &
1359 3494400 : filter_function(jj)*eigenvectors(ii, jj)
1360 : END DO ! ii
1361 : END DO ! jj
1362 :
1363 320 : DEALLOCATE (filter_function)
1364 :
1365 320 : CALL timeset("fb_atomic_filter_dgemm", handle_dgemm)
1366 :
1367 : ! construct atomic filter matrix
1368 : CALL dgemm("N", &
1369 : "T", &
1370 : mat_dim, &
1371 : mat_dim, &
1372 : mat_dim, &
1373 : 1.0_dp, &
1374 : filtered_eigenvectors, &
1375 : mat_dim, &
1376 : eigenvectors, &
1377 : mat_dim, &
1378 : 0.0_dp, &
1379 : atomic_filter_mat, &
1380 320 : mat_dim)
1381 :
1382 320 : CALL timestop(handle_dgemm)
1383 :
1384 : ! remove small negative terms due to numerical error, the filter
1385 : ! matrix must not be negative definite
1386 33600 : DO jj = 1, SIZE(atomic_filter_mat, 2)
1387 3494720 : DO ii = 1, SIZE(atomic_filter_mat, 1)
1388 3494400 : IF (ABS(atomic_filter_mat(ii, jj)) < tolerance) THEN
1389 50656 : atomic_filter_mat(ii, jj) = 0.0_dp
1390 : END IF
1391 : END DO
1392 : END DO
1393 :
1394 320 : DEALLOCATE (filtered_eigenvectors)
1395 320 : DEALLOCATE (eigenvectors)
1396 :
1397 320 : CALL timestop(handle)
1398 :
1399 960 : END SUBROUTINE fb_fltrmat_build_atomic_fltrmat
1400 :
1401 : ! **************************************************************************************************
1402 : !> \brief get values of Fermi-Dirac distribution based on a given fermi
1403 : !> level at a given set of energy eigenvalues
1404 : !> \param f : the Fermi-Dirac distribution function values
1405 : !> \param eigenvals : set of energy eigenvalues
1406 : !> \param T : temperature
1407 : !> \param mu : the fermi level
1408 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1409 : ! **************************************************************************************************
1410 320 : SUBROUTINE fb_fltrmat_fermi_dirac_mu(f, eigenvals, T, mu)
1411 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: f
1412 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigenvals
1413 : REAL(KIND=dp), INTENT(IN) :: T, mu
1414 :
1415 : REAL(KIND=dp) :: kTS, ne
1416 :
1417 : ! we want fermi function max at 1, so maxocc = 1 here
1418 :
1419 320 : CALL Fermi(f, ne, kTS, eigenvals, mu, T, 1.0_dp)
1420 320 : END SUBROUTINE fb_fltrmat_fermi_dirac_mu
1421 :
1422 : ! **************************************************************************************************
1423 : !> \brief get values of Fermi-Dirac distribution based on a given electron
1424 : !> number at a given set of energy eigenvales
1425 : !> \param f : the Fermi-Dirac distribution function values
1426 : !> \param eigenvals : set of energy eigenvalues
1427 : !> \param T : temperature
1428 : !> \param ne : number of electrons
1429 : !> \param maxocc : maximum occupancy per orbital
1430 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1431 : ! **************************************************************************************************
1432 0 : SUBROUTINE fb_fltrmat_fermi_dirac_ne(f, eigenvals, T, ne, maxocc)
1433 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: f
1434 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigenvals
1435 : REAL(KIND=dp), INTENT(IN) :: T, ne, maxocc
1436 :
1437 : REAL(KIND=dp) :: kTS, mu
1438 :
1439 : ! mu is the calculated fermi level
1440 : ! kTS is the calculated entropic contribution to the energy i.e. -TS
1441 : ! kTS = kT*[f ln f + (1-f) ln (1-f)]
1442 :
1443 0 : CALL FermiFixed(f, mu, kTS, eigenvals, ne, T, maxocc)
1444 0 : END SUBROUTINE fb_fltrmat_fermi_dirac_ne
1445 :
1446 : END MODULE qs_fb_filter_matrix_methods
|