Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines that link DBCSR and CP2K concepts together
10 : !> \author Ole Schuett
11 : !> \par History
12 : !> 01.2014 created
13 : ! **************************************************************************************************
14 : MODULE cp_dbcsr_cp2k_link
15 : USE ao_util, ONLY: exp_radius
16 : USE atomic_kind_types, ONLY: atomic_kind_type,&
17 : get_atomic_kind_set
18 : USE basis_set_types, ONLY: gto_basis_set_p_type,&
19 : gto_basis_set_type
20 : USE bibliography, ONLY: Borstnik2014,&
21 : Heinecke2016,&
22 : Schuett2016,&
23 : cite_reference
24 : USE cp_control_types, ONLY: dft_control_type
25 : USE cp_dbcsr_api, ONLY: &
26 : dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_default_config, dbcsr_get_matrix_type, &
27 : dbcsr_has_symmetry, dbcsr_reserve_blocks, dbcsr_set, dbcsr_set_config, dbcsr_type, &
28 : dbcsr_type_no_symmetry
29 : USE cp_dbcsr_operations, ONLY: max_elements_per_block
30 : USE input_keyword_types, ONLY: keyword_create,&
31 : keyword_release,&
32 : keyword_type
33 : USE input_section_types, ONLY: section_add_keyword,&
34 : section_add_subsection,&
35 : section_create,&
36 : section_release,&
37 : section_type,&
38 : section_vals_get_subs_vals,&
39 : section_vals_type,&
40 : section_vals_val_get
41 : USE kinds, ONLY: dp,&
42 : real_8
43 : USE orbital_pointers, ONLY: nso
44 : USE qs_integral_utils, ONLY: basis_set_list_setup
45 : USE qs_kind_types, ONLY: qs_kind_type
46 : USE qs_ks_types, ONLY: get_ks_env,&
47 : qs_ks_env_type
48 : USE qs_neighbor_list_types, ONLY: get_iterator_info,&
49 : get_neighbor_list_set_p,&
50 : neighbor_list_iterate,&
51 : neighbor_list_iterator_create,&
52 : neighbor_list_iterator_p_type,&
53 : neighbor_list_iterator_release,&
54 : neighbor_list_set_p_type
55 : USE string_utilities, ONLY: s2a
56 : #include "./base/base_uses.f90"
57 :
58 : IMPLICIT NONE
59 :
60 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_cp2k_link'
61 :
62 : PUBLIC :: create_dbcsr_section
63 : PUBLIC :: cp_dbcsr_config
64 : PUBLIC :: cp_dbcsr_alloc_block_from_nbl
65 : PUBLIC :: cp_dbcsr_to_csr_screening
66 :
67 : PRIVATE
68 :
69 : ! Possible drivers to use for matrix multiplications
70 : INTEGER, PARAMETER :: mm_driver_auto = 0
71 : INTEGER, PARAMETER :: mm_driver_matmul = 1
72 : INTEGER, PARAMETER :: mm_driver_blas = 2
73 : INTEGER, PARAMETER :: mm_driver_smm = 3
74 : INTEGER, PARAMETER :: mm_driver_xsmm = 4
75 :
76 : CHARACTER(len=*), PARAMETER :: mm_name_auto = "AUTO", &
77 : mm_name_blas = "BLAS", &
78 : mm_name_matmul = "MATMUL", &
79 : mm_name_smm = "SMM", &
80 : mm_name_xsmm = "XSMM"
81 : CONTAINS
82 :
83 : ! **************************************************************************************************
84 : !> \brief Creates the dbcsr section for configuring DBCSR
85 : !> \param section ...
86 : !> \date 2011-04-05
87 : !> \author Urban Borstnik
88 : ! **************************************************************************************************
89 269762 : SUBROUTINE create_dbcsr_section(section)
90 : TYPE(section_type), POINTER :: section
91 :
92 : INTEGER :: idefault
93 : LOGICAL :: ldefault
94 : REAL(KIND=dp) :: rdefault
95 : TYPE(keyword_type), POINTER :: keyword
96 : TYPE(section_type), POINTER :: subsection
97 :
98 14198 : CPASSERT(.NOT. ASSOCIATED(section))
99 : CALL section_create(section, __LOCATION__, name="DBCSR", &
100 : description="Configuration options for the DBCSR library.", &
101 : n_keywords=1, n_subsections=0, repeats=.FALSE., &
102 42594 : citations=(/Borstnik2014, Schuett2016/))
103 :
104 14198 : NULLIFY (keyword)
105 :
106 : CALL keyword_create(keyword, __LOCATION__, name="mm_stack_size", &
107 : description="Size of multiplication parameter stack." &
108 : //" A negative value leaves the decision up to DBCSR.", &
109 : usage="mm_stack_size 1000", &
110 14198 : default_i_val=-1)
111 14198 : CALL section_add_keyword(section, keyword)
112 14198 : CALL keyword_release(keyword)
113 :
114 : CALL keyword_create(keyword, __LOCATION__, name="mm_driver", &
115 : description="Select which backend to use preferably "// &
116 : "for matrix block multiplications on the host.", &
117 : usage="mm_driver blas", &
118 : default_i_val=mm_driver_auto, &
119 : enum_c_vals=s2a("AUTO", "BLAS", "MATMUL", "SMM", "XSMM"), &
120 : enum_i_vals=(/mm_driver_auto, mm_driver_blas, mm_driver_matmul, mm_driver_smm, &
121 : mm_driver_xsmm/), &
122 : enum_desc=s2a("Choose automatically the best available driver", &
123 : "BLAS (requires the BLAS library at link time)", &
124 : "Fortran MATMUL", &
125 : "Library optimised for Small Matrix Multiplies "// &
126 : "(requires the SMM library at link time)", &
127 : "LIBXSMM"), &
128 28396 : citations=(/Heinecke2016/))
129 14198 : CALL section_add_keyword(section, keyword)
130 14198 : CALL keyword_release(keyword)
131 :
132 14198 : CALL dbcsr_get_default_config(avg_elements_images=idefault)
133 : CALL keyword_create(keyword, __LOCATION__, name="avg_elements_images", &
134 : description="Average number of elements (dense limit)" &
135 : //" for each image, which also corresponds to" &
136 : //" the average number of elements exchanged between MPI processes" &
137 : //" during the operations." &
138 : //" A negative or zero value means unlimited.", &
139 : usage="avg_elements_images 10000", &
140 14198 : default_i_val=idefault)
141 14198 : CALL section_add_keyword(section, keyword)
142 14198 : CALL keyword_release(keyword)
143 :
144 14198 : CALL dbcsr_get_default_config(num_mult_images=idefault)
145 : CALL keyword_create(keyword, __LOCATION__, name="num_mult_images", &
146 : description="Multiplicative factor for number of virtual images.", &
147 : usage="num_mult_images 2", &
148 14198 : default_i_val=idefault)
149 14198 : CALL section_add_keyword(section, keyword)
150 14198 : CALL keyword_release(keyword)
151 :
152 14198 : CALL dbcsr_get_default_config(use_mpi_allocator=ldefault)
153 : CALL keyword_create(keyword, __LOCATION__, name="use_mpi_allocator", &
154 : description="Use MPI allocator" &
155 : //" to allocate buffers used in MPI communications.", &
156 : usage="use_mpi_allocator T", &
157 14198 : default_l_val=ldefault)
158 14198 : CALL section_add_keyword(section, keyword)
159 14198 : CALL keyword_release(keyword)
160 :
161 14198 : CALL dbcsr_get_default_config(use_mpi_rma=ldefault)
162 : CALL keyword_create(keyword, __LOCATION__, name="use_mpi_rma", &
163 : description="Use RMA for MPI communications" &
164 : //" for each image, which also corresponds to" &
165 : //" the number of elements exchanged between MPI processes" &
166 : //" during the operations.", &
167 : usage="use_mpi_rma F", &
168 14198 : default_l_val=ldefault)
169 14198 : CALL section_add_keyword(section, keyword)
170 14198 : CALL keyword_release(keyword)
171 :
172 14198 : CALL dbcsr_get_default_config(num_layers_3D=idefault)
173 : CALL keyword_create(keyword, __LOCATION__, name="num_layers_3D", &
174 : description="Number of layers for the 3D multplication algorithm.", &
175 : usage="num_layers_3D 1", &
176 14198 : default_i_val=idefault)
177 14198 : CALL section_add_keyword(section, keyword)
178 14198 : CALL keyword_release(keyword)
179 :
180 14198 : CALL dbcsr_get_default_config(nstacks=idefault)
181 : CALL keyword_create(keyword, __LOCATION__, name="n_size_mnk_stacks", &
182 : description="Number of stacks to use for distinct atomic sizes" &
183 : //" (e.g., 2 for a system of mostly waters). ", &
184 : usage="n_size_mnk_stacks 2", &
185 14198 : default_i_val=idefault)
186 14198 : CALL section_add_keyword(section, keyword)
187 14198 : CALL keyword_release(keyword)
188 :
189 14198 : CALL dbcsr_get_default_config(use_comm_thread=ldefault)
190 : CALL keyword_create(keyword, __LOCATION__, name="use_comm_thread", &
191 : description="During multiplication, use a thread to periodically poll" &
192 : //" MPI to progress outstanding message completions. This is" &
193 : //" beneficial on systems without a DMA-capable network adapter" &
194 : //" e.g. Cray XE6.", &
195 : usage="use_comm_thread T", &
196 14198 : default_l_val=ldefault)
197 14198 : CALL section_add_keyword(section, keyword)
198 14198 : CALL keyword_release(keyword)
199 :
200 : CALL keyword_create(keyword, __LOCATION__, name="MAX_ELEMENTS_PER_BLOCK", &
201 : description="Default block size for turning dense matrices in blocked ones", &
202 : usage="MAX_ELEMENTS_PER_BLOCK 32", &
203 14198 : default_i_val=max_elements_per_block)
204 14198 : CALL section_add_keyword(section, keyword)
205 14198 : CALL keyword_release(keyword)
206 :
207 : CALL keyword_create(keyword, __LOCATION__, name="comm_thread_load", &
208 : description="If a communications thread is used, specify how much " &
209 : //"multiplication workload (%) the thread should perform in " &
210 : //"addition to communication tasks. " &
211 : //"A negative value leaves the decision up to DBCSR.", &
212 : usage="comm_thread_load 50", &
213 14198 : default_i_val=-1)
214 14198 : CALL section_add_keyword(section, keyword)
215 14198 : CALL keyword_release(keyword)
216 :
217 14198 : CALL dbcsr_get_default_config(multrec_limit=idefault)
218 : CALL keyword_create(keyword, __LOCATION__, name="multrec_limit", &
219 : description="Recursion limit of cache oblivious multrec algorithm.", &
220 14198 : default_i_val=idefault)
221 14198 : CALL section_add_keyword(section, keyword)
222 14198 : CALL keyword_release(keyword)
223 :
224 14198 : CALL dbcsr_get_default_config(use_mempools_cpu=ldefault)
225 : CALL keyword_create(keyword, __LOCATION__, name="use_mempools_cpu", &
226 : description="Enable memory pools on the CPU.", &
227 14198 : default_l_val=ldefault)
228 14198 : CALL section_add_keyword(section, keyword)
229 14198 : CALL keyword_release(keyword)
230 :
231 14198 : NULLIFY (subsection)
232 : CALL section_create(subsection, __LOCATION__, name="TENSOR", &
233 : description="Configuration options for Tensors.", &
234 14198 : n_keywords=1, n_subsections=0, repeats=.FALSE.)
235 :
236 14198 : CALL dbcsr_get_default_config(tas_split_factor=rdefault)
237 : CALL keyword_create(keyword, __LOCATION__, name="TAS_SPLIT_FACTOR", &
238 : description="Parameter for hybrid DBCSR-TAS matrix-matrix multiplication algorithm: "// &
239 : "a TAS matrix is split into s submatrices with s = N_max/(N_min*f) with f "// &
240 : "given by this parameter and N_max/N_min the max/min occupancies of the matrices "// &
241 : "involved in a multiplication. A large value makes the multiplication Cannon-based "// &
242 : "(s=1) and a small value (> 0) makes the multiplication based on TAS algorithm "// &
243 : "(s=number of MPI ranks)", &
244 14198 : default_r_val=rdefault)
245 14198 : CALL section_add_keyword(subsection, keyword)
246 14198 : CALL keyword_release(keyword)
247 :
248 14198 : CALL section_add_subsection(section, subsection)
249 14198 : CALL section_release(subsection)
250 :
251 : !---------------------------------------------------------------------------
252 14198 : NULLIFY (subsection)
253 : CALL section_create(subsection, __LOCATION__, name="ACC", &
254 : description="Configuration options for the ACC-Driver.", &
255 14198 : n_keywords=1, n_subsections=0, repeats=.FALSE.)
256 :
257 14198 : CALL dbcsr_get_default_config(accdrv_thread_buffers=idefault)
258 : CALL keyword_create(keyword, __LOCATION__, name="thread_buffers", &
259 : description="Number of transfer-buffers associated with each thread and corresponding stream.", &
260 14198 : default_i_val=idefault)
261 14198 : CALL section_add_keyword(subsection, keyword)
262 14198 : CALL keyword_release(keyword)
263 :
264 14198 : CALL dbcsr_get_default_config(accdrv_avoid_after_busy=ldefault)
265 : CALL keyword_create(keyword, __LOCATION__, name="avoid_after_busy", &
266 : description="If enabled, stacks are not processed by the acc-driver " &
267 : //"after it has signaled congestion during a round of flushing. " &
268 : //"For the next round of flusing the driver is used again.", &
269 14198 : default_l_val=ldefault)
270 14198 : CALL section_add_keyword(subsection, keyword)
271 14198 : CALL keyword_release(keyword)
272 :
273 14198 : CALL dbcsr_get_default_config(accdrv_min_flop_process=idefault)
274 : CALL keyword_create(keyword, __LOCATION__, name="min_flop_process", &
275 : description="Only process stacks with more than the given number of " &
276 : //"floating-point operations per stack-entry (2*m*n*k).", &
277 14198 : default_i_val=idefault)
278 14198 : CALL section_add_keyword(subsection, keyword)
279 14198 : CALL keyword_release(keyword)
280 :
281 14198 : CALL dbcsr_get_default_config(accdrv_stack_sort=ldefault)
282 : CALL keyword_create(keyword, __LOCATION__, name="stack_sort", &
283 : description="Sort multiplication stacks according to C-access.", &
284 14198 : default_l_val=ldefault)
285 14198 : CALL section_add_keyword(subsection, keyword)
286 14198 : CALL keyword_release(keyword)
287 :
288 14198 : CALL dbcsr_get_default_config(accdrv_min_flop_sort=idefault)
289 : CALL keyword_create(keyword, __LOCATION__, name="min_flop_sort", &
290 : description="Only sort stacks with more than the given number of " &
291 : //"floating-point operations per stack-entry (2*m*n*k). " &
292 : //"Alternatively, the stacks are roughly ordered through a " &
293 : //"binning-scheme by Peter Messmer. (Depends on ACC%STACK_SORT)", &
294 14198 : default_i_val=idefault)
295 14198 : CALL section_add_keyword(subsection, keyword)
296 14198 : CALL keyword_release(keyword)
297 :
298 14198 : CALL dbcsr_get_default_config(accdrv_do_inhomogenous=ldefault)
299 : CALL keyword_create(keyword, __LOCATION__, name="process_inhomogenous", &
300 : description="If enabled, inhomogenous stacks are also processed by the acc driver.", &
301 14198 : default_l_val=ldefault)
302 14198 : CALL section_add_keyword(subsection, keyword)
303 14198 : CALL keyword_release(keyword)
304 :
305 14198 : CALL dbcsr_get_default_config(accdrv_binning_nbins=idefault)
306 : CALL keyword_create(keyword, __LOCATION__, name="binning_nbins", &
307 : description="Number of bins used when ordering " &
308 : //"the stacks with the binning-scheme.", &
309 14198 : default_i_val=idefault)
310 14198 : CALL section_add_keyword(subsection, keyword)
311 14198 : CALL keyword_release(keyword)
312 :
313 14198 : CALL dbcsr_get_default_config(accdrv_binning_binsize=idefault)
314 : CALL keyword_create(keyword, __LOCATION__, name="binning_binsize", &
315 : description="Size of bins used when ordering " &
316 : //"the stacks with the binning-scheme.", &
317 14198 : default_i_val=idefault)
318 14198 : CALL section_add_keyword(subsection, keyword)
319 14198 : CALL keyword_release(keyword)
320 :
321 14198 : CALL section_add_subsection(section, subsection)
322 14198 : CALL section_release(subsection)
323 :
324 14198 : END SUBROUTINE create_dbcsr_section
325 :
326 : ! **************************************************************************************************
327 : !> \brief Configures options for DBCSR
328 : !> \param root_section ...
329 : ! **************************************************************************************************
330 198904 : SUBROUTINE cp_dbcsr_config(root_section)
331 : TYPE(section_vals_type), POINTER :: root_section
332 :
333 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_dbcsr_config'
334 :
335 : INTEGER :: handle, ival
336 : LOGICAL :: lval
337 : REAL(kind=real_8) :: rval
338 : TYPE(section_vals_type), POINTER :: dbcsr_section
339 :
340 8648 : CALL timeset(routineN, handle)
341 :
342 8648 : CALL cite_reference(Borstnik2014)
343 8648 : CALL cite_reference(Schuett2016)
344 :
345 8648 : dbcsr_section => section_vals_get_subs_vals(root_section, "GLOBAL%DBCSR")
346 :
347 8648 : CALL section_vals_val_get(dbcsr_section, "mm_stack_size", i_val=ival)
348 8648 : CALL dbcsr_set_config(mm_stack_size=ival)
349 :
350 8648 : CALL section_vals_val_get(dbcsr_section, "MAX_ELEMENTS_PER_BLOCK", i_val=max_elements_per_block)
351 :
352 8648 : CALL section_vals_val_get(dbcsr_section, "avg_elements_images", i_val=ival)
353 8648 : CALL dbcsr_set_config(avg_elements_images=ival)
354 :
355 8648 : CALL section_vals_val_get(dbcsr_section, "num_mult_images", i_val=ival)
356 8648 : CALL dbcsr_set_config(num_mult_images=ival)
357 :
358 8648 : CALL section_vals_val_get(dbcsr_section, "n_size_mnk_stacks", i_val=ival)
359 8648 : CALL dbcsr_set_config(nstacks=ival)
360 :
361 8648 : CALL section_vals_val_get(dbcsr_section, "use_mpi_allocator", l_val=lval)
362 8648 : CALL dbcsr_set_config(use_mpi_allocator=lval)
363 :
364 8648 : CALL section_vals_val_get(dbcsr_section, "use_mpi_rma", l_val=lval)
365 8648 : CALL dbcsr_set_config(use_mpi_rma=lval)
366 :
367 8648 : CALL section_vals_val_get(dbcsr_section, "num_layers_3D", i_val=ival)
368 8648 : CALL dbcsr_set_config(num_layers_3D=ival)
369 :
370 8648 : CALL section_vals_val_get(dbcsr_section, "use_comm_thread", l_val=lval)
371 8648 : CALL dbcsr_set_config(use_comm_thread=lval)
372 :
373 8648 : CALL section_vals_val_get(dbcsr_section, "comm_thread_load", i_val=ival)
374 8648 : CALL dbcsr_set_config(comm_thread_load=ival)
375 :
376 8648 : CALL section_vals_val_get(dbcsr_section, "multrec_limit", i_val=ival)
377 8648 : CALL dbcsr_set_config(multrec_limit=ival)
378 :
379 8648 : CALL section_vals_val_get(dbcsr_section, "use_mempools_cpu", l_val=lval)
380 8648 : CALL dbcsr_set_config(use_mempools_cpu=lval)
381 :
382 8648 : CALL section_vals_val_get(dbcsr_section, "TENSOR%tas_split_factor", r_val=rval)
383 8648 : CALL dbcsr_set_config(tas_split_factor=rval)
384 :
385 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%thread_buffers", i_val=ival)
386 8648 : CALL dbcsr_set_config(accdrv_thread_buffers=ival)
387 :
388 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_process", i_val=ival)
389 8648 : CALL dbcsr_set_config(accdrv_min_flop_process=ival)
390 :
391 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%stack_sort", l_val=lval)
392 8648 : CALL dbcsr_set_config(accdrv_stack_sort=lval)
393 :
394 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_sort", i_val=ival)
395 8648 : CALL dbcsr_set_config(accdrv_min_flop_sort=ival)
396 :
397 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%process_inhomogenous", l_val=lval)
398 8648 : CALL dbcsr_set_config(accdrv_do_inhomogenous=lval)
399 :
400 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%avoid_after_busy", l_val=lval)
401 8648 : CALL dbcsr_set_config(accdrv_avoid_after_busy=lval)
402 :
403 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%binning_nbins", i_val=ival)
404 8648 : CALL dbcsr_set_config(accdrv_binning_nbins=ival)
405 :
406 8648 : CALL section_vals_val_get(dbcsr_section, "ACC%binning_binsize", i_val=ival)
407 8648 : CALL dbcsr_set_config(accdrv_binning_binsize=ival)
408 :
409 8648 : CALL section_vals_val_get(dbcsr_section, "mm_driver", i_val=ival)
410 8646 : SELECT CASE (ival)
411 : CASE (mm_driver_auto)
412 8646 : CALL dbcsr_set_config(mm_driver="AUTO")
413 : #if defined(__LIBXSMM)
414 8646 : CALL cite_reference(Heinecke2016)
415 : #endif
416 : CASE (mm_driver_blas)
417 2 : CALL dbcsr_set_config(mm_driver="BLAS")
418 : CASE (mm_driver_matmul)
419 0 : CALL dbcsr_set_config(mm_driver="MATMUL")
420 : CASE (mm_driver_smm)
421 0 : CALL dbcsr_set_config(mm_driver="SMM")
422 : CASE (mm_driver_xsmm)
423 0 : CALL dbcsr_set_config(mm_driver="XSMM")
424 0 : CALL cite_reference(Heinecke2016)
425 : CASE DEFAULT
426 8648 : CPABORT("Unknown mm_driver")
427 : END SELECT
428 :
429 8648 : CALL timestop(handle)
430 8648 : END SUBROUTINE cp_dbcsr_config
431 :
432 : ! **************************************************************************************************
433 : !> \brief allocate the blocks of a dbcsr based on the neighbor list
434 : !> \param matrix the matrix
435 : !> \param sab_orb the corresponding neighbor list
436 : !> \param desymmetrize Allocate all block of a non-symmetric matrix from a symmetric list
437 : !> \par History
438 : !> 11.2009 created vw
439 : !> 01.2014 moved here from cp_dbcsr_operations (Ole Schuett)
440 : !> \author vw
441 : !> \note
442 : ! **************************************************************************************************
443 :
444 867284 : SUBROUTINE cp_dbcsr_alloc_block_from_nbl(matrix, sab_orb, desymmetrize)
445 :
446 : TYPE(dbcsr_type) :: matrix
447 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
448 : POINTER :: sab_orb
449 : LOGICAL, INTENT(IN), OPTIONAL :: desymmetrize
450 :
451 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_alloc_block_from_nbl'
452 :
453 : CHARACTER(LEN=1) :: symmetry
454 : INTEGER :: blk_cnt, handle, iatom, icol, inode, &
455 : irow, jatom, last_jatom, nadd
456 867284 : INTEGER, ALLOCATABLE, DIMENSION(:) :: cols, rows, tmp
457 : LOGICAL :: alloc_full, is_symmetric, new_atom_b
458 : TYPE(neighbor_list_iterator_p_type), &
459 867284 : DIMENSION(:), POINTER :: nl_iterator
460 :
461 867284 : CALL timeset(routineN, handle)
462 :
463 867284 : symmetry = dbcsr_get_matrix_type(matrix)
464 :
465 867284 : CPASSERT(ASSOCIATED(sab_orb))
466 :
467 867284 : CALL get_neighbor_list_set_p(neighbor_list_sets=sab_orb, symmetric=is_symmetric)
468 867284 : alloc_full = .FALSE.
469 867284 : IF (PRESENT(desymmetrize)) THEN
470 96 : IF (desymmetrize .AND. (symmetry == dbcsr_type_no_symmetry)) THEN
471 96 : IF (is_symmetric) alloc_full = .TRUE.
472 : END IF
473 : END IF
474 :
475 867284 : CALL dbcsr_finalize(matrix)
476 867284 : ALLOCATE (rows(3), cols(3))
477 867284 : blk_cnt = 0
478 867284 : nadd = 1
479 867284 : IF (alloc_full) nadd = 2
480 :
481 867284 : CALL neighbor_list_iterator_create(nl_iterator, sab_orb)
482 773672348 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
483 772805064 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, inode=inode)
484 772805064 : IF (inode == 1) last_jatom = 0
485 772805064 : IF (jatom /= last_jatom) THEN
486 17077250 : new_atom_b = .TRUE.
487 17077250 : last_jatom = jatom
488 : ELSE
489 773672348 : new_atom_b = .FALSE.
490 : CYCLE
491 : END IF
492 17077250 : IF (blk_cnt + nadd .GT. SIZE(rows)) THEN
493 3131922 : ALLOCATE (tmp(blk_cnt + nadd))
494 17489855 : tmp(1:blk_cnt) = rows(1:blk_cnt)
495 1043974 : DEALLOCATE (rows)
496 3131922 : ALLOCATE (rows((blk_cnt + nadd)*2))
497 17489855 : rows(1:blk_cnt) = tmp(1:blk_cnt)
498 17489855 : tmp(1:blk_cnt) = cols(1:blk_cnt)
499 1043974 : DEALLOCATE (cols)
500 2087948 : ALLOCATE (cols((blk_cnt + nadd)*2))
501 17489855 : cols(1:blk_cnt) = tmp(1:blk_cnt)
502 1043974 : DEALLOCATE (tmp)
503 : END IF
504 17944534 : IF (alloc_full) THEN
505 1368 : blk_cnt = blk_cnt + 1
506 1368 : rows(blk_cnt) = iatom
507 1368 : cols(blk_cnt) = jatom
508 1368 : IF (iatom /= jatom) THEN
509 1035 : blk_cnt = blk_cnt + 1
510 1035 : rows(blk_cnt) = jatom
511 1035 : cols(blk_cnt) = iatom
512 : END IF
513 : ELSE
514 17075882 : blk_cnt = blk_cnt + 1
515 17075882 : IF (symmetry == dbcsr_type_no_symmetry) THEN
516 83635 : rows(blk_cnt) = iatom
517 83635 : cols(blk_cnt) = jatom
518 : ELSE
519 16992247 : IF (iatom <= jatom) THEN
520 : irow = iatom
521 : icol = jatom
522 : ELSE
523 7820535 : irow = jatom
524 7820535 : icol = iatom
525 : END IF
526 16992247 : rows(blk_cnt) = irow
527 16992247 : cols(blk_cnt) = icol
528 : END IF
529 : END IF
530 :
531 : END DO
532 867284 : CALL neighbor_list_iterator_release(nl_iterator)
533 :
534 : !
535 867284 : CALL dbcsr_reserve_blocks(matrix, rows(1:blk_cnt), cols(1:blk_cnt))
536 867284 : DEALLOCATE (rows)
537 867284 : DEALLOCATE (cols)
538 867284 : CALL dbcsr_finalize(matrix)
539 :
540 867284 : CALL timestop(handle)
541 :
542 2601852 : END SUBROUTINE cp_dbcsr_alloc_block_from_nbl
543 :
544 : ! **************************************************************************************************
545 : !> \brief Apply distance screening to refine sparsity pattern of matrices in CSR
546 : !> format (using eps_pgf_orb). Currently this is used for the external
547 : !> library PEXSI.
548 : !> \param ks_env ...
549 : !> \param[in, out] csr_sparsity DBCSR matrix defining CSR sparsity pattern.
550 : !> This matrix must be initialized and allocated
551 : !> with exactly the same DBCSR sparsity pattern as
552 : !> the DBCSR matrix that is used to create the CSR
553 : !> matrix. It must have symmetric DBCSR format and
554 : !> must not be filtered.
555 : !> \par History
556 : !> 02.2015 created [Patrick Seewald]
557 : !> \author Patrick Seewald
558 : ! **************************************************************************************************
559 14 : SUBROUTINE cp_dbcsr_to_csr_screening(ks_env, csr_sparsity)
560 : TYPE(qs_ks_env_type), POINTER :: ks_env
561 : TYPE(dbcsr_type), INTENT(INOUT) :: csr_sparsity
562 :
563 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_dbcsr_to_csr_screening'
564 :
565 : INTEGER :: atom_a, atom_b, handle, iatom, icol, ikind, ipgf, irow, iset, isgf, ishell, &
566 : jatom, jkind, jpgf, jset, jsgf, jshell, nkind, nset_a, nset_b
567 14 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind
568 14 : INTEGER, DIMENSION(:), POINTER :: npgf_a, npgf_b, nshell_a, nshell_b
569 14 : INTEGER, DIMENSION(:, :), POINTER :: l_a, l_b
570 : LOGICAL :: do_symmetric, found
571 : REAL(KIND=dp) :: dab, eps_pgf_orb, r_a, r_b
572 : REAL(KIND=dp), DIMENSION(3) :: rab
573 14 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgfa, rpgfb, zet_a, zet_b
574 14 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: gcc_a, gcc_b
575 14 : REAL(KIND=real_8), DIMENSION(:, :), POINTER :: screen_blk
576 14 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
577 : TYPE(dft_control_type), POINTER :: dft_control
578 14 : TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER :: basis_set_list_a, basis_set_list_b
579 : TYPE(gto_basis_set_type), POINTER :: basis_set_a, basis_set_b
580 : TYPE(neighbor_list_iterator_p_type), &
581 14 : DIMENSION(:), POINTER :: nl_iterator
582 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
583 14 : POINTER :: neighbour_list
584 14 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
585 :
586 14 : NULLIFY (screen_blk, atomic_kind_set, basis_set_list_a, &
587 14 : basis_set_list_b, basis_set_a, basis_set_b, nl_iterator, &
588 14 : qs_kind_set, dft_control)
589 :
590 14 : CALL timeset(routineN, handle)
591 :
592 14 : CPASSERT(dbcsr_has_symmetry(csr_sparsity))
593 :
594 : CALL get_ks_env(ks_env, &
595 : sab_orb=neighbour_list, &
596 : atomic_kind_set=atomic_kind_set, &
597 : qs_kind_set=qs_kind_set, &
598 14 : dft_control=dft_control)
599 :
600 14 : eps_pgf_orb = dft_control%qs_control%eps_pgf_orb
601 :
602 14 : nkind = SIZE(qs_kind_set)
603 14 : CALL get_atomic_kind_set(atomic_kind_set, atom_of_kind=atom_of_kind)
604 14 : CPASSERT(SIZE(neighbour_list) > 0)
605 14 : CALL get_neighbor_list_set_p(neighbor_list_sets=neighbour_list, symmetric=do_symmetric)
606 14 : CPASSERT(do_symmetric)
607 104 : ALLOCATE (basis_set_list_a(nkind), basis_set_list_b(nkind))
608 14 : CALL basis_set_list_setup(basis_set_list_a, "ORB", qs_kind_set)
609 14 : CALL basis_set_list_setup(basis_set_list_b, "ORB", qs_kind_set)
610 :
611 : ! csr_sparsity can obtain values 0 (if zero element) or 1 (if non-zero element)
612 14 : CALL dbcsr_set(csr_sparsity, 0.0_dp)
613 :
614 14 : CALL neighbor_list_iterator_create(nl_iterator, neighbour_list)
615 :
616 : ! Iterate over interacting pairs of atoms corresponding to non-zero
617 : ! DBCSR blocks
618 5398 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
619 : CALL get_iterator_info(nl_iterator, &
620 : ikind=ikind, jkind=jkind, &
621 : iatom=iatom, jatom=jatom, &
622 5384 : r=rab)
623 :
624 5384 : basis_set_a => basis_set_list_a(ikind)%gto_basis_set
625 5384 : IF (.NOT. ASSOCIATED(basis_set_a)) CYCLE
626 5384 : basis_set_b => basis_set_list_b(jkind)%gto_basis_set
627 5384 : IF (.NOT. ASSOCIATED(basis_set_b)) CYCLE
628 :
629 5384 : atom_a = atom_of_kind(iatom)
630 5384 : atom_b = atom_of_kind(jatom)
631 :
632 5384 : nset_a = basis_set_a%nset
633 5384 : nset_b = basis_set_b%nset
634 5384 : npgf_a => basis_set_a%npgf
635 5384 : npgf_b => basis_set_b%npgf
636 5384 : nshell_a => basis_set_a%nshell
637 5384 : nshell_b => basis_set_b%nshell
638 :
639 5384 : l_a => basis_set_a%l
640 5384 : l_b => basis_set_b%l
641 5384 : gcc_a => basis_set_a%gcc
642 5384 : gcc_b => basis_set_b%gcc
643 5384 : zet_a => basis_set_a%zet
644 5384 : zet_b => basis_set_b%zet
645 :
646 5384 : rpgfa => basis_set_a%pgf_radius
647 5384 : rpgfb => basis_set_b%pgf_radius
648 :
649 5384 : IF (iatom <= jatom) THEN
650 3028 : irow = iatom
651 3028 : icol = jatom
652 : ELSE
653 2356 : irow = jatom
654 2356 : icol = iatom
655 : END IF
656 :
657 : CALL dbcsr_get_block_p(matrix=csr_sparsity, row=irow, col=icol, &
658 5384 : block=screen_blk, found=found)
659 :
660 5384 : CPASSERT(found)
661 :
662 : ! Distance between atoms a and b
663 5384 : dab = SQRT(rab(1)**2 + rab(2)**2 + rab(3)**2)
664 :
665 : ! iterate over pairs of primitive GTOs i,j, get their radii r_i, r_j according
666 : ! to eps_pgf_orb. Define all matrix elements as non-zero to which a
667 : ! contribution from two Gaussians i,j exists with r_i + r_j >= dab.
668 :
669 5384 : isgf = 0
670 16166 : DO iset = 1, nset_a
671 42352 : DO ishell = 1, nshell_a(iset)
672 : jsgf = 0
673 78600 : DO jset = 1, nset_b
674 207002 : DO jshell = 1, nshell_b(jset)
675 432252 : gto_loop: DO ipgf = 1, npgf_a(iset)
676 1619644 : DO jpgf = 1, npgf_b(jset)
677 1573342 : IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) .GE. dab) THEN
678 : ! more selective screening with radius calculated for each primitive GTO
679 : r_a = exp_radius(l_a(ishell, iset), &
680 : zet_a(ipgf, iset), &
681 : eps_pgf_orb, &
682 166054 : gcc_a(ipgf, ishell, iset))
683 : r_b = exp_radius(l_b(jshell, jset), &
684 : zet_b(jpgf, jset), &
685 : eps_pgf_orb, &
686 166054 : gcc_b(jpgf, jshell, jset))
687 166054 : IF (r_a + r_b .GE. dab) THEN
688 82100 : IF (irow .EQ. iatom) THEN
689 : screen_blk(isgf + 1:isgf + nso(l_a(ishell, iset)), &
690 410266 : jsgf + 1:jsgf + nso(l_b(jshell, jset))) = 1.0_dp
691 : ELSE
692 : screen_blk(jsgf + 1:jsgf + nso(l_b(jshell, jset)), &
693 318200 : isgf + 1:isgf + nso(l_a(ishell, iset))) = 1.0_dp
694 : END IF
695 : EXIT gto_loop
696 : END IF
697 : END IF
698 : END DO
699 : END DO gto_loop
700 180802 : jsgf = jsgf + nso(l_b(jshell, jset))
701 : END DO
702 : END DO
703 36968 : isgf = isgf + nso(l_a(ishell, iset))
704 : END DO
705 : END DO
706 : END DO
707 :
708 14 : CALL neighbor_list_iterator_release(nl_iterator)
709 14 : DEALLOCATE (basis_set_list_a, basis_set_list_b)
710 :
711 14 : CALL timestop(handle)
712 42 : END SUBROUTINE cp_dbcsr_to_csr_screening
713 :
714 : END MODULE cp_dbcsr_cp2k_link
|