LCOV - code coverage report
Current view: top level - src - cp_dbcsr_cp2k_link.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 289 293 98.6 %
Date: 2024-12-21 06:28:57 Functions: 4 4 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief  Routines that link DBCSR and CP2K concepts together
      10             : !> \author Ole Schuett
      11             : !> \par History
      12             : !>         01.2014 created
      13             : ! **************************************************************************************************
      14             : MODULE cp_dbcsr_cp2k_link
      15             :    USE ao_util,                         ONLY: exp_radius
      16             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      17             :                                               get_atomic_kind_set
      18             :    USE basis_set_types,                 ONLY: gto_basis_set_p_type,&
      19             :                                               gto_basis_set_type
      20             :    USE bibliography,                    ONLY: Borstnik2014,&
      21             :                                               Heinecke2016,&
      22             :                                               Schuett2016,&
      23             :                                               cite_reference
      24             :    USE cp_control_types,                ONLY: dft_control_type
      25             :    USE cp_dbcsr_api,                    ONLY: &
      26             :         dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_default_config, dbcsr_get_matrix_type, &
      27             :         dbcsr_has_symmetry, dbcsr_reserve_blocks, dbcsr_set, dbcsr_set_config, dbcsr_type, &
      28             :         dbcsr_type_no_symmetry
      29             :    USE cp_dbcsr_operations,             ONLY: max_elements_per_block
      30             :    USE input_keyword_types,             ONLY: keyword_create,&
      31             :                                               keyword_release,&
      32             :                                               keyword_type
      33             :    USE input_section_types,             ONLY: section_add_keyword,&
      34             :                                               section_add_subsection,&
      35             :                                               section_create,&
      36             :                                               section_release,&
      37             :                                               section_type,&
      38             :                                               section_vals_get_subs_vals,&
      39             :                                               section_vals_type,&
      40             :                                               section_vals_val_get
      41             :    USE kinds,                           ONLY: dp,&
      42             :                                               real_8
      43             :    USE orbital_pointers,                ONLY: nso
      44             :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      45             :    USE qs_kind_types,                   ONLY: qs_kind_type
      46             :    USE qs_ks_types,                     ONLY: get_ks_env,&
      47             :                                               qs_ks_env_type
      48             :    USE qs_neighbor_list_types,          ONLY: get_iterator_info,&
      49             :                                               get_neighbor_list_set_p,&
      50             :                                               neighbor_list_iterate,&
      51             :                                               neighbor_list_iterator_create,&
      52             :                                               neighbor_list_iterator_p_type,&
      53             :                                               neighbor_list_iterator_release,&
      54             :                                               neighbor_list_set_p_type
      55             :    USE string_utilities,                ONLY: s2a
      56             : #include "./base/base_uses.f90"
      57             : 
      58             :    IMPLICIT NONE
      59             : 
      60             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_cp2k_link'
      61             : 
      62             :    PUBLIC :: create_dbcsr_section
      63             :    PUBLIC :: cp_dbcsr_config
      64             :    PUBLIC :: cp_dbcsr_alloc_block_from_nbl
      65             :    PUBLIC :: cp_dbcsr_to_csr_screening
      66             : 
      67             :    PRIVATE
      68             : 
      69             :    ! Possible drivers to use for matrix multiplications
      70             :    INTEGER, PARAMETER :: mm_driver_auto = 0
      71             :    INTEGER, PARAMETER :: mm_driver_matmul = 1
      72             :    INTEGER, PARAMETER :: mm_driver_blas = 2
      73             :    INTEGER, PARAMETER :: mm_driver_smm = 3
      74             :    INTEGER, PARAMETER :: mm_driver_xsmm = 4
      75             : 
      76             :    CHARACTER(len=*), PARAMETER :: mm_name_auto = "AUTO", &
      77             :                                   mm_name_blas = "BLAS", &
      78             :                                   mm_name_matmul = "MATMUL", &
      79             :                                   mm_name_smm = "SMM", &
      80             :                                   mm_name_xsmm = "XSMM"
      81             : CONTAINS
      82             : 
      83             : ! **************************************************************************************************
      84             : !> \brief   Creates the dbcsr section for configuring DBCSR
      85             : !> \param section ...
      86             : !> \date    2011-04-05
      87             : !> \author  Urban Borstnik
      88             : ! **************************************************************************************************
      89      288116 :    SUBROUTINE create_dbcsr_section(section)
      90             :       TYPE(section_type), POINTER                        :: section
      91             : 
      92             :       INTEGER                                            :: idefault
      93             :       LOGICAL                                            :: ldefault
      94             :       REAL(KIND=dp)                                      :: rdefault
      95             :       TYPE(keyword_type), POINTER                        :: keyword
      96             :       TYPE(section_type), POINTER                        :: subsection
      97             : 
      98       15164 :       CPASSERT(.NOT. ASSOCIATED(section))
      99             :       CALL section_create(section, __LOCATION__, name="DBCSR", &
     100             :                           description="Configuration options for the DBCSR library.", &
     101             :                           n_keywords=1, n_subsections=0, repeats=.FALSE., &
     102       45492 :                           citations=(/Borstnik2014, Schuett2016/))
     103             : 
     104       15164 :       NULLIFY (keyword)
     105             : 
     106             :       CALL keyword_create(keyword, __LOCATION__, name="mm_stack_size", &
     107             :                           description="Size of multiplication parameter stack." &
     108             :                           //" A negative value leaves the decision up to DBCSR.", &
     109             :                           usage="mm_stack_size 1000", &
     110       15164 :                           default_i_val=-1)
     111       15164 :       CALL section_add_keyword(section, keyword)
     112       15164 :       CALL keyword_release(keyword)
     113             : 
     114             :       CALL keyword_create(keyword, __LOCATION__, name="mm_driver", &
     115             :                           description="Select which backend to use preferably "// &
     116             :                           "for matrix block multiplications on the host.", &
     117             :                           usage="mm_driver blas", &
     118             :                           default_i_val=mm_driver_auto, &
     119             :                           enum_c_vals=s2a("AUTO", "BLAS", "MATMUL", "SMM", "XSMM"), &
     120             :                           enum_i_vals=(/mm_driver_auto, mm_driver_blas, mm_driver_matmul, mm_driver_smm, &
     121             :                                         mm_driver_xsmm/), &
     122             :                           enum_desc=s2a("Choose automatically the best available driver", &
     123             :                                         "BLAS (requires the BLAS library at link time)", &
     124             :                                         "Fortran MATMUL", &
     125             :                                         "Library optimised for Small Matrix Multiplies "// &
     126             :                                         "(requires the SMM library at link time)", &
     127             :                                         "LIBXSMM"), &
     128       30328 :                           citations=(/Heinecke2016/))
     129       15164 :       CALL section_add_keyword(section, keyword)
     130       15164 :       CALL keyword_release(keyword)
     131             : 
     132       15164 :       CALL dbcsr_get_default_config(avg_elements_images=idefault)
     133             :       CALL keyword_create(keyword, __LOCATION__, name="avg_elements_images", &
     134             :                           description="Average number of elements (dense limit)" &
     135             :                           //" for each image, which also corresponds to" &
     136             :                           //" the average number of elements exchanged between MPI processes" &
     137             :                           //" during the operations." &
     138             :                           //" A negative or zero value means unlimited.", &
     139             :                           usage="avg_elements_images 10000", &
     140       15164 :                           default_i_val=idefault)
     141       15164 :       CALL section_add_keyword(section, keyword)
     142       15164 :       CALL keyword_release(keyword)
     143             : 
     144       15164 :       CALL dbcsr_get_default_config(num_mult_images=idefault)
     145             :       CALL keyword_create(keyword, __LOCATION__, name="num_mult_images", &
     146             :                           description="Multiplicative factor for number of virtual images.", &
     147             :                           usage="num_mult_images 2", &
     148       15164 :                           default_i_val=idefault)
     149       15164 :       CALL section_add_keyword(section, keyword)
     150       15164 :       CALL keyword_release(keyword)
     151             : 
     152       15164 :       CALL dbcsr_get_default_config(use_mpi_allocator=ldefault)
     153             :       CALL keyword_create(keyword, __LOCATION__, name="use_mpi_allocator", &
     154             :                           description="Use MPI allocator" &
     155             :                           //" to allocate buffers used in MPI communications.", &
     156             :                           usage="use_mpi_allocator T", &
     157       15164 :                           default_l_val=ldefault)
     158       15164 :       CALL section_add_keyword(section, keyword)
     159       15164 :       CALL keyword_release(keyword)
     160             : 
     161       15164 :       CALL dbcsr_get_default_config(use_mpi_rma=ldefault)
     162             :       CALL keyword_create(keyword, __LOCATION__, name="use_mpi_rma", &
     163             :                           description="Use RMA for MPI communications" &
     164             :                           //" for each image, which also corresponds to" &
     165             :                           //" the number of elements exchanged between MPI processes" &
     166             :                           //" during the operations.", &
     167             :                           usage="use_mpi_rma F", &
     168       15164 :                           default_l_val=ldefault)
     169       15164 :       CALL section_add_keyword(section, keyword)
     170       15164 :       CALL keyword_release(keyword)
     171             : 
     172       15164 :       CALL dbcsr_get_default_config(num_layers_3D=idefault)
     173             :       CALL keyword_create(keyword, __LOCATION__, name="num_layers_3D", &
     174             :                           description="Number of layers for the 3D multplication algorithm.", &
     175             :                           usage="num_layers_3D 1", &
     176       15164 :                           default_i_val=idefault)
     177       15164 :       CALL section_add_keyword(section, keyword)
     178       15164 :       CALL keyword_release(keyword)
     179             : 
     180       15164 :       CALL dbcsr_get_default_config(nstacks=idefault)
     181             :       CALL keyword_create(keyword, __LOCATION__, name="n_size_mnk_stacks", &
     182             :                           description="Number of stacks to use for distinct atomic sizes" &
     183             :                           //" (e.g., 2 for a system of mostly waters). ", &
     184             :                           usage="n_size_mnk_stacks 2", &
     185       15164 :                           default_i_val=idefault)
     186       15164 :       CALL section_add_keyword(section, keyword)
     187       15164 :       CALL keyword_release(keyword)
     188             : 
     189       15164 :       CALL dbcsr_get_default_config(use_comm_thread=ldefault)
     190             :       CALL keyword_create(keyword, __LOCATION__, name="use_comm_thread", &
     191             :                           description="During multiplication, use a thread to periodically poll" &
     192             :                           //" MPI to progress outstanding message completions.  This is" &
     193             :                           //" beneficial on systems without a DMA-capable network adapter" &
     194             :                           //" e.g. Cray XE6.", &
     195             :                           usage="use_comm_thread T", &
     196       15164 :                           default_l_val=ldefault)
     197       15164 :       CALL section_add_keyword(section, keyword)
     198       15164 :       CALL keyword_release(keyword)
     199             : 
     200             :       CALL keyword_create(keyword, __LOCATION__, name="MAX_ELEMENTS_PER_BLOCK", &
     201             :                           description="Default block size for turning dense matrices in blocked ones", &
     202             :                           usage="MAX_ELEMENTS_PER_BLOCK 32", &
     203       15164 :                           default_i_val=max_elements_per_block)
     204       15164 :       CALL section_add_keyword(section, keyword)
     205       15164 :       CALL keyword_release(keyword)
     206             : 
     207             :       CALL keyword_create(keyword, __LOCATION__, name="comm_thread_load", &
     208             :                           description="If a communications thread is used, specify how much " &
     209             :                           //"multiplication workload (%) the thread should perform in " &
     210             :                           //"addition to communication tasks. " &
     211             :                           //"A negative value leaves the decision up to DBCSR.", &
     212             :                           usage="comm_thread_load 50", &
     213       15164 :                           default_i_val=-1)
     214       15164 :       CALL section_add_keyword(section, keyword)
     215       15164 :       CALL keyword_release(keyword)
     216             : 
     217       15164 :       CALL dbcsr_get_default_config(multrec_limit=idefault)
     218             :       CALL keyword_create(keyword, __LOCATION__, name="multrec_limit", &
     219             :                           description="Recursion limit of cache oblivious multrec algorithm.", &
     220       15164 :                           default_i_val=idefault)
     221       15164 :       CALL section_add_keyword(section, keyword)
     222       15164 :       CALL keyword_release(keyword)
     223             : 
     224       15164 :       CALL dbcsr_get_default_config(use_mempools_cpu=ldefault)
     225             :       CALL keyword_create(keyword, __LOCATION__, name="use_mempools_cpu", &
     226             :                           description="Enable memory pools on the CPU.", &
     227       15164 :                           default_l_val=ldefault)
     228       15164 :       CALL section_add_keyword(section, keyword)
     229       15164 :       CALL keyword_release(keyword)
     230             : 
     231       15164 :       NULLIFY (subsection)
     232             :       CALL section_create(subsection, __LOCATION__, name="TENSOR", &
     233             :                           description="Configuration options for Tensors.", &
     234       15164 :                           n_keywords=1, n_subsections=0, repeats=.FALSE.)
     235             : 
     236       15164 :       CALL dbcsr_get_default_config(tas_split_factor=rdefault)
     237             :       CALL keyword_create(keyword, __LOCATION__, name="TAS_SPLIT_FACTOR", &
     238             :                           description="Parameter for hybrid DBCSR-TAS matrix-matrix multiplication algorithm: "// &
     239             :                           "a TAS matrix is split into s submatrices with s = N_max/(N_min*f) with f "// &
     240             :                           "given by this parameter and N_max/N_min the max/min occupancies of the matrices "// &
     241             :                           "involved in a multiplication. A large value makes the multiplication Cannon-based "// &
     242             :                           "(s=1) and a small value (> 0) makes the multiplication based on TAS algorithm "// &
     243             :                           "(s=number of MPI ranks)", &
     244       15164 :                           default_r_val=rdefault)
     245       15164 :       CALL section_add_keyword(subsection, keyword)
     246       15164 :       CALL keyword_release(keyword)
     247             : 
     248       15164 :       CALL section_add_subsection(section, subsection)
     249       15164 :       CALL section_release(subsection)
     250             : 
     251             :       !---------------------------------------------------------------------------
     252       15164 :       NULLIFY (subsection)
     253             :       CALL section_create(subsection, __LOCATION__, name="ACC", &
     254             :                           description="Configuration options for the ACC-Driver.", &
     255       15164 :                           n_keywords=1, n_subsections=0, repeats=.FALSE.)
     256             : 
     257       15164 :       CALL dbcsr_get_default_config(accdrv_thread_buffers=idefault)
     258             :       CALL keyword_create(keyword, __LOCATION__, name="thread_buffers", &
     259             :                           description="Number of transfer-buffers associated with each thread and corresponding stream.", &
     260       15164 :                           default_i_val=idefault)
     261       15164 :       CALL section_add_keyword(subsection, keyword)
     262       15164 :       CALL keyword_release(keyword)
     263             : 
     264       15164 :       CALL dbcsr_get_default_config(accdrv_avoid_after_busy=ldefault)
     265             :       CALL keyword_create(keyword, __LOCATION__, name="avoid_after_busy", &
     266             :                           description="If enabled, stacks are not processed by the acc-driver " &
     267             :                           //"after it has signaled congestion during a round of flushing. " &
     268             :                           //"For the next round of flusing the driver is used again.", &
     269       15164 :                           default_l_val=ldefault)
     270       15164 :       CALL section_add_keyword(subsection, keyword)
     271       15164 :       CALL keyword_release(keyword)
     272             : 
     273       15164 :       CALL dbcsr_get_default_config(accdrv_min_flop_process=idefault)
     274             :       CALL keyword_create(keyword, __LOCATION__, name="min_flop_process", &
     275             :                           description="Only process stacks with more than the given number of " &
     276             :                           //"floating-point operations per stack-entry (2*m*n*k).", &
     277       15164 :                           default_i_val=idefault)
     278       15164 :       CALL section_add_keyword(subsection, keyword)
     279       15164 :       CALL keyword_release(keyword)
     280             : 
     281       15164 :       CALL dbcsr_get_default_config(accdrv_stack_sort=ldefault)
     282             :       CALL keyword_create(keyword, __LOCATION__, name="stack_sort", &
     283             :                           description="Sort multiplication stacks according to C-access.", &
     284       15164 :                           default_l_val=ldefault)
     285       15164 :       CALL section_add_keyword(subsection, keyword)
     286       15164 :       CALL keyword_release(keyword)
     287             : 
     288       15164 :       CALL dbcsr_get_default_config(accdrv_min_flop_sort=idefault)
     289             :       CALL keyword_create(keyword, __LOCATION__, name="min_flop_sort", &
     290             :                           description="Only sort stacks with more than the given number of " &
     291             :                           //"floating-point operations per stack-entry (2*m*n*k). " &
     292             :                           //"Alternatively, the stacks are roughly ordered through a " &
     293             :                           //"binning-scheme by Peter Messmer. (Depends on ACC%STACK_SORT)", &
     294       15164 :                           default_i_val=idefault)
     295       15164 :       CALL section_add_keyword(subsection, keyword)
     296       15164 :       CALL keyword_release(keyword)
     297             : 
     298       15164 :       CALL dbcsr_get_default_config(accdrv_do_inhomogenous=ldefault)
     299             :       CALL keyword_create(keyword, __LOCATION__, name="process_inhomogenous", &
     300             :                           description="If enabled, inhomogenous stacks are also processed by the acc driver.", &
     301       15164 :                           default_l_val=ldefault)
     302       15164 :       CALL section_add_keyword(subsection, keyword)
     303       15164 :       CALL keyword_release(keyword)
     304             : 
     305       15164 :       CALL dbcsr_get_default_config(accdrv_binning_nbins=idefault)
     306             :       CALL keyword_create(keyword, __LOCATION__, name="binning_nbins", &
     307             :                           description="Number of bins used when ordering " &
     308             :                           //"the stacks with the binning-scheme.", &
     309       15164 :                           default_i_val=idefault)
     310       15164 :       CALL section_add_keyword(subsection, keyword)
     311       15164 :       CALL keyword_release(keyword)
     312             : 
     313       15164 :       CALL dbcsr_get_default_config(accdrv_binning_binsize=idefault)
     314             :       CALL keyword_create(keyword, __LOCATION__, name="binning_binsize", &
     315             :                           description="Size of bins used when ordering " &
     316             :                           //"the stacks with the binning-scheme.", &
     317       15164 :                           default_i_val=idefault)
     318       15164 :       CALL section_add_keyword(subsection, keyword)
     319       15164 :       CALL keyword_release(keyword)
     320             : 
     321       15164 :       CALL section_add_subsection(section, subsection)
     322       15164 :       CALL section_release(subsection)
     323             : 
     324       15164 :    END SUBROUTINE create_dbcsr_section
     325             : 
     326             : ! **************************************************************************************************
     327             : !> \brief Configures options for DBCSR
     328             : !> \param root_section ...
     329             : ! **************************************************************************************************
     330      213716 :    SUBROUTINE cp_dbcsr_config(root_section)
     331             :       TYPE(section_vals_type), POINTER                   :: root_section
     332             : 
     333             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_dbcsr_config'
     334             : 
     335             :       INTEGER                                            :: handle, ival
     336             :       LOGICAL                                            :: lval
     337             :       REAL(kind=real_8)                                  :: rval
     338             :       TYPE(section_vals_type), POINTER                   :: dbcsr_section
     339             : 
     340        9292 :       CALL timeset(routineN, handle)
     341             : 
     342        9292 :       CALL cite_reference(Borstnik2014)
     343        9292 :       CALL cite_reference(Schuett2016)
     344             : 
     345        9292 :       dbcsr_section => section_vals_get_subs_vals(root_section, "GLOBAL%DBCSR")
     346             : 
     347        9292 :       CALL section_vals_val_get(dbcsr_section, "mm_stack_size", i_val=ival)
     348        9292 :       CALL dbcsr_set_config(mm_stack_size=ival)
     349             : 
     350        9292 :       CALL section_vals_val_get(dbcsr_section, "MAX_ELEMENTS_PER_BLOCK", i_val=max_elements_per_block)
     351             : 
     352        9292 :       CALL section_vals_val_get(dbcsr_section, "avg_elements_images", i_val=ival)
     353        9292 :       CALL dbcsr_set_config(avg_elements_images=ival)
     354             : 
     355        9292 :       CALL section_vals_val_get(dbcsr_section, "num_mult_images", i_val=ival)
     356        9292 :       CALL dbcsr_set_config(num_mult_images=ival)
     357             : 
     358        9292 :       CALL section_vals_val_get(dbcsr_section, "n_size_mnk_stacks", i_val=ival)
     359        9292 :       CALL dbcsr_set_config(nstacks=ival)
     360             : 
     361        9292 :       CALL section_vals_val_get(dbcsr_section, "use_mpi_allocator", l_val=lval)
     362        9292 :       CALL dbcsr_set_config(use_mpi_allocator=lval)
     363             : 
     364        9292 :       CALL section_vals_val_get(dbcsr_section, "use_mpi_rma", l_val=lval)
     365        9292 :       CALL dbcsr_set_config(use_mpi_rma=lval)
     366             : 
     367        9292 :       CALL section_vals_val_get(dbcsr_section, "num_layers_3D", i_val=ival)
     368        9292 :       CALL dbcsr_set_config(num_layers_3D=ival)
     369             : 
     370        9292 :       CALL section_vals_val_get(dbcsr_section, "use_comm_thread", l_val=lval)
     371        9292 :       CALL dbcsr_set_config(use_comm_thread=lval)
     372             : 
     373        9292 :       CALL section_vals_val_get(dbcsr_section, "comm_thread_load", i_val=ival)
     374        9292 :       CALL dbcsr_set_config(comm_thread_load=ival)
     375             : 
     376        9292 :       CALL section_vals_val_get(dbcsr_section, "multrec_limit", i_val=ival)
     377        9292 :       CALL dbcsr_set_config(multrec_limit=ival)
     378             : 
     379        9292 :       CALL section_vals_val_get(dbcsr_section, "use_mempools_cpu", l_val=lval)
     380        9292 :       CALL dbcsr_set_config(use_mempools_cpu=lval)
     381             : 
     382        9292 :       CALL section_vals_val_get(dbcsr_section, "TENSOR%tas_split_factor", r_val=rval)
     383        9292 :       CALL dbcsr_set_config(tas_split_factor=rval)
     384             : 
     385        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%thread_buffers", i_val=ival)
     386        9292 :       CALL dbcsr_set_config(accdrv_thread_buffers=ival)
     387             : 
     388        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_process", i_val=ival)
     389        9292 :       CALL dbcsr_set_config(accdrv_min_flop_process=ival)
     390             : 
     391        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%stack_sort", l_val=lval)
     392        9292 :       CALL dbcsr_set_config(accdrv_stack_sort=lval)
     393             : 
     394        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_sort", i_val=ival)
     395        9292 :       CALL dbcsr_set_config(accdrv_min_flop_sort=ival)
     396             : 
     397        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%process_inhomogenous", l_val=lval)
     398        9292 :       CALL dbcsr_set_config(accdrv_do_inhomogenous=lval)
     399             : 
     400        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%avoid_after_busy", l_val=lval)
     401        9292 :       CALL dbcsr_set_config(accdrv_avoid_after_busy=lval)
     402             : 
     403        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%binning_nbins", i_val=ival)
     404        9292 :       CALL dbcsr_set_config(accdrv_binning_nbins=ival)
     405             : 
     406        9292 :       CALL section_vals_val_get(dbcsr_section, "ACC%binning_binsize", i_val=ival)
     407        9292 :       CALL dbcsr_set_config(accdrv_binning_binsize=ival)
     408             : 
     409        9292 :       CALL section_vals_val_get(dbcsr_section, "mm_driver", i_val=ival)
     410        9290 :       SELECT CASE (ival)
     411             :       CASE (mm_driver_auto)
     412        9290 :          CALL dbcsr_set_config(mm_driver="AUTO")
     413             : #if defined(__LIBXSMM)
     414        9290 :          CALL cite_reference(Heinecke2016)
     415             : #endif
     416             :       CASE (mm_driver_blas)
     417           2 :          CALL dbcsr_set_config(mm_driver="BLAS")
     418             :       CASE (mm_driver_matmul)
     419           0 :          CALL dbcsr_set_config(mm_driver="MATMUL")
     420             :       CASE (mm_driver_smm)
     421           0 :          CALL dbcsr_set_config(mm_driver="SMM")
     422             :       CASE (mm_driver_xsmm)
     423           0 :          CALL dbcsr_set_config(mm_driver="XSMM")
     424           0 :          CALL cite_reference(Heinecke2016)
     425             :       CASE DEFAULT
     426        9292 :          CPABORT("Unknown mm_driver")
     427             :       END SELECT
     428             : 
     429        9292 :       CALL timestop(handle)
     430        9292 :    END SUBROUTINE cp_dbcsr_config
     431             : 
     432             : ! **************************************************************************************************
     433             : !> \brief allocate the blocks of a dbcsr based on the neighbor list
     434             : !> \param matrix        the matrix
     435             : !> \param sab_orb       the corresponding neighbor list
     436             : !> \param desymmetrize  Allocate all block of a non-symmetric matrix from a symmetric list
     437             : !> \par History
     438             : !>      11.2009 created vw
     439             : !>      01.2014 moved here from cp_dbcsr_operations (Ole Schuett)
     440             : !> \author vw
     441             : !> \note
     442             : ! **************************************************************************************************
     443             : 
     444      875458 :    SUBROUTINE cp_dbcsr_alloc_block_from_nbl(matrix, sab_orb, desymmetrize)
     445             : 
     446             :       TYPE(dbcsr_type)                                   :: matrix
     447             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     448             :          POINTER                                         :: sab_orb
     449             :       LOGICAL, INTENT(IN), OPTIONAL                      :: desymmetrize
     450             : 
     451             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_alloc_block_from_nbl'
     452             : 
     453             :       CHARACTER(LEN=1)                                   :: symmetry
     454             :       INTEGER                                            :: blk_cnt, handle, iatom, icol, inode, &
     455             :                                                             irow, jatom, last_jatom, nadd
     456      875458 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cols, rows, tmp
     457             :       LOGICAL                                            :: alloc_full, is_symmetric, new_atom_b
     458             :       TYPE(neighbor_list_iterator_p_type), &
     459      875458 :          DIMENSION(:), POINTER                           :: nl_iterator
     460             : 
     461      875458 :       CALL timeset(routineN, handle)
     462             : 
     463      875458 :       symmetry = dbcsr_get_matrix_type(matrix)
     464             : 
     465      875458 :       CPASSERT(ASSOCIATED(sab_orb))
     466             : 
     467      875458 :       CALL get_neighbor_list_set_p(neighbor_list_sets=sab_orb, symmetric=is_symmetric)
     468      875458 :       alloc_full = .FALSE.
     469      875458 :       IF (PRESENT(desymmetrize)) THEN
     470          96 :          IF (desymmetrize .AND. (symmetry == dbcsr_type_no_symmetry)) THEN
     471          96 :             IF (is_symmetric) alloc_full = .TRUE.
     472             :          END IF
     473             :       END IF
     474             : 
     475      875458 :       CALL dbcsr_finalize(matrix)
     476      875458 :       ALLOCATE (rows(3), cols(3))
     477      875458 :       blk_cnt = 0
     478      875458 :       nadd = 1
     479      875458 :       IF (alloc_full) nadd = 2
     480             : 
     481      875458 :       CALL neighbor_list_iterator_create(nl_iterator, sab_orb)
     482   785429221 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
     483   784553763 :          CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, inode=inode)
     484   784553763 :          IF (inode == 1) last_jatom = 0
     485   784553763 :          IF (jatom /= last_jatom) THEN
     486    17184389 :             new_atom_b = .TRUE.
     487    17184389 :             last_jatom = jatom
     488             :          ELSE
     489   785429221 :             new_atom_b = .FALSE.
     490             :             CYCLE
     491             :          END IF
     492    17184389 :          IF (blk_cnt + nadd .GT. SIZE(rows)) THEN
     493     3168264 :             ALLOCATE (tmp(blk_cnt + nadd))
     494    17572236 :             tmp(1:blk_cnt) = rows(1:blk_cnt)
     495     1056088 :             DEALLOCATE (rows)
     496     3168264 :             ALLOCATE (rows((blk_cnt + nadd)*2))
     497    17572236 :             rows(1:blk_cnt) = tmp(1:blk_cnt)
     498    17572236 :             tmp(1:blk_cnt) = cols(1:blk_cnt)
     499     1056088 :             DEALLOCATE (cols)
     500     2112176 :             ALLOCATE (cols((blk_cnt + nadd)*2))
     501    17572236 :             cols(1:blk_cnt) = tmp(1:blk_cnt)
     502     1056088 :             DEALLOCATE (tmp)
     503             :          END IF
     504    18059847 :          IF (alloc_full) THEN
     505        1368 :             blk_cnt = blk_cnt + 1
     506        1368 :             rows(blk_cnt) = iatom
     507        1368 :             cols(blk_cnt) = jatom
     508        1368 :             IF (iatom /= jatom) THEN
     509        1035 :                blk_cnt = blk_cnt + 1
     510        1035 :                rows(blk_cnt) = jatom
     511        1035 :                cols(blk_cnt) = iatom
     512             :             END IF
     513             :          ELSE
     514    17183021 :             blk_cnt = blk_cnt + 1
     515    17183021 :             IF (symmetry == dbcsr_type_no_symmetry) THEN
     516       83653 :                rows(blk_cnt) = iatom
     517       83653 :                cols(blk_cnt) = jatom
     518             :             ELSE
     519    17099368 :                IF (iatom <= jatom) THEN
     520             :                   irow = iatom
     521             :                   icol = jatom
     522             :                ELSE
     523     7867501 :                   irow = jatom
     524     7867501 :                   icol = iatom
     525             :                END IF
     526    17099368 :                rows(blk_cnt) = irow
     527    17099368 :                cols(blk_cnt) = icol
     528             :             END IF
     529             :          END IF
     530             : 
     531             :       END DO
     532      875458 :       CALL neighbor_list_iterator_release(nl_iterator)
     533             : 
     534             :       !
     535      875458 :       CALL dbcsr_reserve_blocks(matrix, rows(1:blk_cnt), cols(1:blk_cnt))
     536      875458 :       DEALLOCATE (rows)
     537      875458 :       DEALLOCATE (cols)
     538      875458 :       CALL dbcsr_finalize(matrix)
     539             : 
     540      875458 :       CALL timestop(handle)
     541             : 
     542     2626374 :    END SUBROUTINE cp_dbcsr_alloc_block_from_nbl
     543             : 
     544             : ! **************************************************************************************************
     545             : !> \brief Apply distance screening to refine sparsity pattern of matrices in CSR
     546             : !>        format (using eps_pgf_orb). Currently this is used for the external
     547             : !>        library PEXSI.
     548             : !> \param ks_env ...
     549             : !> \param[in, out] csr_sparsity DBCSR matrix defining CSR sparsity pattern.
     550             : !>                              This matrix must be initialized and allocated
     551             : !>                              with exactly the same DBCSR sparsity pattern as
     552             : !>                              the DBCSR matrix that is used to create the CSR
     553             : !>                              matrix. It must have symmetric DBCSR format and
     554             : !>                              must not be filtered.
     555             : !> \par History
     556             : !>      02.2015 created [Patrick Seewald]
     557             : !> \author Patrick Seewald
     558             : ! **************************************************************************************************
     559          14 :    SUBROUTINE cp_dbcsr_to_csr_screening(ks_env, csr_sparsity)
     560             :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     561             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: csr_sparsity
     562             : 
     563             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_dbcsr_to_csr_screening'
     564             : 
     565             :       INTEGER :: atom_a, atom_b, handle, iatom, icol, ikind, ipgf, irow, iset, isgf, ishell, &
     566             :          jatom, jkind, jpgf, jset, jsgf, jshell, nkind, nset_a, nset_b
     567          14 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind
     568          14 :       INTEGER, DIMENSION(:), POINTER                     :: npgf_a, npgf_b, nshell_a, nshell_b
     569          14 :       INTEGER, DIMENSION(:, :), POINTER                  :: l_a, l_b
     570             :       LOGICAL                                            :: do_symmetric, found
     571             :       REAL(KIND=dp)                                      :: dab, eps_pgf_orb, r_a, r_b
     572             :       REAL(KIND=dp), DIMENSION(3)                        :: rab
     573          14 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgfa, rpgfb, zet_a, zet_b
     574          14 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: gcc_a, gcc_b
     575          14 :       REAL(KIND=real_8), DIMENSION(:, :), POINTER        :: screen_blk
     576          14 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     577             :       TYPE(dft_control_type), POINTER                    :: dft_control
     578          14 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b
     579             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set_a, basis_set_b
     580             :       TYPE(neighbor_list_iterator_p_type), &
     581          14 :          DIMENSION(:), POINTER                           :: nl_iterator
     582             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     583          14 :          POINTER                                         :: neighbour_list
     584          14 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     585             : 
     586          14 :       NULLIFY (screen_blk, atomic_kind_set, basis_set_list_a, &
     587          14 :                basis_set_list_b, basis_set_a, basis_set_b, nl_iterator, &
     588          14 :                qs_kind_set, dft_control)
     589             : 
     590          14 :       CALL timeset(routineN, handle)
     591             : 
     592          14 :       CPASSERT(dbcsr_has_symmetry(csr_sparsity))
     593             : 
     594             :       CALL get_ks_env(ks_env, &
     595             :                       sab_orb=neighbour_list, &
     596             :                       atomic_kind_set=atomic_kind_set, &
     597             :                       qs_kind_set=qs_kind_set, &
     598          14 :                       dft_control=dft_control)
     599             : 
     600          14 :       eps_pgf_orb = dft_control%qs_control%eps_pgf_orb
     601             : 
     602          14 :       nkind = SIZE(qs_kind_set)
     603          14 :       CALL get_atomic_kind_set(atomic_kind_set, atom_of_kind=atom_of_kind)
     604          14 :       CPASSERT(SIZE(neighbour_list) > 0)
     605          14 :       CALL get_neighbor_list_set_p(neighbor_list_sets=neighbour_list, symmetric=do_symmetric)
     606          14 :       CPASSERT(do_symmetric)
     607         104 :       ALLOCATE (basis_set_list_a(nkind), basis_set_list_b(nkind))
     608          14 :       CALL basis_set_list_setup(basis_set_list_a, "ORB", qs_kind_set)
     609          14 :       CALL basis_set_list_setup(basis_set_list_b, "ORB", qs_kind_set)
     610             : 
     611             :       ! csr_sparsity can obtain values 0 (if zero element) or 1 (if non-zero element)
     612          14 :       CALL dbcsr_set(csr_sparsity, 0.0_dp)
     613             : 
     614          14 :       CALL neighbor_list_iterator_create(nl_iterator, neighbour_list)
     615             : 
     616             :       ! Iterate over interacting pairs of atoms corresponding to non-zero
     617             :       ! DBCSR blocks
     618        5398 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
     619             :          CALL get_iterator_info(nl_iterator, &
     620             :                                 ikind=ikind, jkind=jkind, &
     621             :                                 iatom=iatom, jatom=jatom, &
     622        5384 :                                 r=rab)
     623             : 
     624        5384 :          basis_set_a => basis_set_list_a(ikind)%gto_basis_set
     625        5384 :          IF (.NOT. ASSOCIATED(basis_set_a)) CYCLE
     626        5384 :          basis_set_b => basis_set_list_b(jkind)%gto_basis_set
     627        5384 :          IF (.NOT. ASSOCIATED(basis_set_b)) CYCLE
     628             : 
     629        5384 :          atom_a = atom_of_kind(iatom)
     630        5384 :          atom_b = atom_of_kind(jatom)
     631             : 
     632        5384 :          nset_a = basis_set_a%nset
     633        5384 :          nset_b = basis_set_b%nset
     634        5384 :          npgf_a => basis_set_a%npgf
     635        5384 :          npgf_b => basis_set_b%npgf
     636        5384 :          nshell_a => basis_set_a%nshell
     637        5384 :          nshell_b => basis_set_b%nshell
     638             : 
     639        5384 :          l_a => basis_set_a%l
     640        5384 :          l_b => basis_set_b%l
     641        5384 :          gcc_a => basis_set_a%gcc
     642        5384 :          gcc_b => basis_set_b%gcc
     643        5384 :          zet_a => basis_set_a%zet
     644        5384 :          zet_b => basis_set_b%zet
     645             : 
     646        5384 :          rpgfa => basis_set_a%pgf_radius
     647        5384 :          rpgfb => basis_set_b%pgf_radius
     648             : 
     649        5384 :          IF (iatom <= jatom) THEN
     650        3028 :             irow = iatom
     651        3028 :             icol = jatom
     652             :          ELSE
     653        2356 :             irow = jatom
     654        2356 :             icol = iatom
     655             :          END IF
     656             : 
     657             :          CALL dbcsr_get_block_p(matrix=csr_sparsity, row=irow, col=icol, &
     658        5384 :                                 block=screen_blk, found=found)
     659             : 
     660        5384 :          CPASSERT(found)
     661             : 
     662             :          ! Distance between atoms a and b
     663        5384 :          dab = SQRT(rab(1)**2 + rab(2)**2 + rab(3)**2)
     664             : 
     665             :          ! iterate over pairs of primitive GTOs i,j, get their radii r_i, r_j according
     666             :          ! to eps_pgf_orb. Define all matrix elements as non-zero to which a
     667             :          ! contribution from two Gaussians i,j exists with r_i + r_j >= dab.
     668             : 
     669        5384 :          isgf = 0
     670       16166 :          DO iset = 1, nset_a
     671       42352 :             DO ishell = 1, nshell_a(iset)
     672             :                jsgf = 0
     673       78600 :                DO jset = 1, nset_b
     674      207002 :                   DO jshell = 1, nshell_b(jset)
     675      432252 :                      gto_loop: DO ipgf = 1, npgf_a(iset)
     676     1619644 :                         DO jpgf = 1, npgf_b(jset)
     677     1573342 :                            IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) .GE. dab) THEN
     678             :                               ! more selective screening with radius calculated for each primitive GTO
     679             :                               r_a = exp_radius(l_a(ishell, iset), &
     680             :                                                zet_a(ipgf, iset), &
     681             :                                                eps_pgf_orb, &
     682      166054 :                                                gcc_a(ipgf, ishell, iset))
     683             :                               r_b = exp_radius(l_b(jshell, jset), &
     684             :                                                zet_b(jpgf, jset), &
     685             :                                                eps_pgf_orb, &
     686      166054 :                                                gcc_b(jpgf, jshell, jset))
     687      166054 :                               IF (r_a + r_b .GE. dab) THEN
     688       82100 :                                  IF (irow .EQ. iatom) THEN
     689             :                                     screen_blk(isgf + 1:isgf + nso(l_a(ishell, iset)), &
     690      410266 :                                                jsgf + 1:jsgf + nso(l_b(jshell, jset))) = 1.0_dp
     691             :                                  ELSE
     692             :                                     screen_blk(jsgf + 1:jsgf + nso(l_b(jshell, jset)), &
     693      318200 :                                                isgf + 1:isgf + nso(l_a(ishell, iset))) = 1.0_dp
     694             :                                  END IF
     695             :                                  EXIT gto_loop
     696             :                               END IF
     697             :                            END IF
     698             :                         END DO
     699             :                      END DO gto_loop
     700      180802 :                      jsgf = jsgf + nso(l_b(jshell, jset))
     701             :                   END DO
     702             :                END DO
     703       36968 :                isgf = isgf + nso(l_a(ishell, iset))
     704             :             END DO
     705             :          END DO
     706             :       END DO
     707             : 
     708          14 :       CALL neighbor_list_iterator_release(nl_iterator)
     709          14 :       DEALLOCATE (basis_set_list_a, basis_set_list_b)
     710             : 
     711          14 :       CALL timestop(handle)
     712          42 :    END SUBROUTINE cp_dbcsr_to_csr_screening
     713             : 
     714             : END MODULE cp_dbcsr_cp2k_link

Generated by: LCOV version 1.15