LCOV - code coverage report
Current view: top level - src - qs_fb_filter_matrix_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 394 405 97.3 %
Date: 2024-11-21 06:45:46 Functions: 8 9 88.9 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : MODULE qs_fb_filter_matrix_methods
       9             : 
      10             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      11             :                                               get_atomic_kind
      12             :    USE cp_dbcsr_api,                    ONLY: dbcsr_create,&
      13             :                                               dbcsr_distribution_type,&
      14             :                                               dbcsr_finalize,&
      15             :                                               dbcsr_get_info,&
      16             :                                               dbcsr_get_stored_coordinates,&
      17             :                                               dbcsr_put_block,&
      18             :                                               dbcsr_type,&
      19             :                                               dbcsr_type_no_symmetry
      20             :    USE fermi_utils,                     ONLY: Fermi,&
      21             :                                               FermiFixed
      22             :    USE kinds,                           ONLY: default_string_length,&
      23             :                                               dp,&
      24             :                                               int_8
      25             :    USE message_passing,                 ONLY: mp_para_env_type
      26             :    USE particle_types,                  ONLY: particle_type
      27             :    USE qs_fb_atomic_halo_types,         ONLY: fb_atomic_halo_create,&
      28             :                                               fb_atomic_halo_get,&
      29             :                                               fb_atomic_halo_list_get,&
      30             :                                               fb_atomic_halo_list_obj,&
      31             :                                               fb_atomic_halo_nullify,&
      32             :                                               fb_atomic_halo_obj,&
      33             :                                               fb_atomic_halo_release,&
      34             :                                               fb_atomic_halo_set
      35             :    USE qs_fb_atomic_matrix_methods,     ONLY: fb_atmatrix_calc_size,&
      36             :                                               fb_atmatrix_construct,&
      37             :                                               fb_atmatrix_construct_2,&
      38             :                                               fb_atmatrix_generate_com_pairs_2
      39             :    USE qs_fb_com_tasks_types,           ONLY: &
      40             :         TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
      41             :         fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
      42             :         fb_com_atom_pairs_distribute_blks, fb_com_atom_pairs_gather_blks, fb_com_atom_pairs_get, &
      43             :         fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, fb_com_atom_pairs_nullify, &
      44             :         fb_com_atom_pairs_obj, fb_com_atom_pairs_release, fb_com_tasks_build_atom_pairs, &
      45             :         fb_com_tasks_create, fb_com_tasks_encode_pair, fb_com_tasks_nullify, fb_com_tasks_obj, &
      46             :         fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
      47             :    USE qs_fb_matrix_data_types,         ONLY: fb_matrix_data_add,&
      48             :                                               fb_matrix_data_create,&
      49             :                                               fb_matrix_data_has_data,&
      50             :                                               fb_matrix_data_nullify,&
      51             :                                               fb_matrix_data_obj,&
      52             :                                               fb_matrix_data_release
      53             :    USE qs_fb_trial_fns_types,           ONLY: fb_trial_fns_get,&
      54             :                                               fb_trial_fns_obj
      55             :    USE string_utilities,                ONLY: compress,&
      56             :                                               uppercase
      57             : #include "./base/base_uses.f90"
      58             : 
      59             :    IMPLICIT NONE
      60             : 
      61             :    PRIVATE
      62             : 
      63             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_filter_matrix_methods'
      64             : 
      65             :    PUBLIC :: fb_fltrmat_build, &
      66             :              fb_fltrmat_build_2
      67             : 
      68             : CONTAINS
      69             : 
      70             : ! **************************************************************************************************
      71             : !> \brief Build the filter matrix, with MPI communications happening at each
      72             : !>        step. Less efficient on communication, but more efficient on
      73             : !>        memory usage (compared to fb_fltrmat_build_2)
      74             : !> \param H_mat : DBCSR system KS matrix
      75             : !> \param S_mat : DBCSR system overlap matrix
      76             : !> \param atomic_halos : list of all local atomic halos, each halo gives
      77             : !>                       one atomic matrix and contributes to one blk
      78             : !>                       col to the filter matrix
      79             : !> \param trial_fns : the trial functions to be used to shrink the
      80             : !>                     size of the new "filtered" basis
      81             : !> \param para_env : cp2k parallel environment
      82             : !> \param particle_set : set of all particles in the system
      83             : !> \param fermi_level : the fermi level used for defining the filter
      84             : !>                      function, which is a Fermi-Dirac distribution
      85             : !>                      function
      86             : !> \param filter_temp : the filter temperature used for defining the
      87             : !>                      filter function
      88             : !> \param name        : name given to the filter matrix
      89             : !> \param filter_mat  : DBCSR format filter matrix
      90             : !> \param tolerance   : anything less than tolerance is treated as zero
      91             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      92             : ! **************************************************************************************************
      93          64 :    SUBROUTINE fb_fltrmat_build(H_mat, &
      94             :                                S_mat, &
      95             :                                atomic_halos, &
      96             :                                trial_fns, &
      97             :                                para_env, &
      98             :                                particle_set, &
      99             :                                fermi_level, &
     100             :                                filter_temp, &
     101             :                                name, &
     102             :                                filter_mat, &
     103             :                                tolerance)
     104             :       TYPE(dbcsr_type), POINTER                          :: H_mat, S_mat
     105             :       TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
     106             :       TYPE(fb_trial_fns_obj), INTENT(IN)                 :: trial_fns
     107             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     108             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     109             :       REAL(KIND=dp), INTENT(IN)                          :: fermi_level, filter_temp
     110             :       CHARACTER(LEN=*), INTENT(IN)                       :: name
     111             :       TYPE(dbcsr_type), POINTER                          :: filter_mat
     112             :       REAL(KIND=dp), INTENT(IN)                          :: tolerance
     113             : 
     114             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fb_fltrmat_build'
     115             : 
     116             :       CHARACTER(LEN=32)                                  :: symmetry_string
     117             :       CHARACTER(LEN=default_string_length)               :: name_string
     118             :       INTEGER                                            :: handle, iblkcol, ihalo, ikind, &
     119             :                                                             max_nhalos, nblkcols_total, nhalos
     120          64 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, dummy_halo_atoms, ntfns, &
     121          64 :                                                             row_blk_size
     122             :       LOGICAL                                            :: send_data_only
     123             :       TYPE(atomic_kind_type), POINTER                    :: atomic_kind
     124             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     125             :       TYPE(fb_atomic_halo_obj)                           :: dummy_atomic_halo
     126          64 :       TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
     127             : 
     128          64 :       CALL timeset(routineN, handle)
     129             : 
     130          64 :       NULLIFY (halos, atomic_kind, ntfns, dummy_halo_atoms, row_blk_size, col_blk_size)
     131          64 :       CALL fb_atomic_halo_nullify(dummy_atomic_halo)
     132             : 
     133             :       ! filter_mat must be of a dissassociated status (i.e. brand new)
     134          64 :       CPASSERT(.NOT. ASSOCIATED(filter_mat))
     135             : 
     136             :       ! get trial function information
     137             :       CALL fb_trial_fns_get(trial_fns=trial_fns, &
     138          64 :                             nfunctions=ntfns)
     139             : 
     140             :       ! calculate the row_blk_size and col_blk_size arrays for
     141             :       ! constructing the filter matrix in DBCSR format
     142             :       ! row_blk_size for the filter matrix is the same as H or S
     143             :       CALL dbcsr_get_info(H_mat, &
     144             :                           nblkcols_total=nblkcols_total, &
     145             :                           row_blk_size=row_blk_size, &
     146          64 :                           distribution=dbcsr_dist)
     147         192 :       ALLOCATE (col_blk_size(nblkcols_total))
     148         576 :       col_blk_size = 0
     149         576 :       DO iblkcol = 1, nblkcols_total
     150         512 :          atomic_kind => particle_set(iblkcol)%atomic_kind
     151             :          CALL get_atomic_kind(atomic_kind=atomic_kind, &
     152         512 :                               kind_number=ikind)
     153         576 :          col_blk_size(iblkcol) = ntfns(ikind)
     154             :       END DO
     155             :       ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
     156          64 :       name_string = name
     157          64 :       CALL compress(name_string)
     158          64 :       CALL uppercase(name_string)
     159             :       ! the filter matrix is non-square and is always non-symmetric
     160          64 :       symmetry_string = dbcsr_type_no_symmetry
     161             :       ! create empty filter matrix
     162          64 :       ALLOCATE (filter_mat)
     163             :       CALL dbcsr_create(matrix=filter_mat, &
     164             :                         name=name_string, &
     165             :                         dist=dbcsr_dist, &
     166             :                         matrix_type=symmetry_string, &
     167             :                         row_blk_size=row_blk_size, &
     168             :                         col_blk_size=col_blk_size, &
     169          64 :                         nze=0)
     170          64 :       DEALLOCATE (col_blk_size)
     171             : 
     172             :       CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
     173             :                                    nhalos=nhalos, &
     174             :                                    max_nhalos=max_nhalos, &
     175          64 :                                    halos=halos)
     176             : 
     177             :       ! create dummy empty atomic halo
     178          64 :       CALL fb_atomic_halo_create(dummy_atomic_halo)
     179          64 :       ALLOCATE (dummy_halo_atoms(0))
     180             :       CALL fb_atomic_halo_set(atomic_halo=dummy_atomic_halo, &
     181             :                               owner_atom=0, &
     182             :                               owner_id_in_halo=0, &
     183             :                               natoms=0, &
     184             :                               halo_atoms=dummy_halo_atoms, &
     185             :                               nelectrons=0, &
     186          64 :                               sorted=.TRUE.)
     187             : 
     188          64 :       send_data_only = .FALSE.
     189             : 
     190         320 :       DO ihalo = 1, max_nhalos
     191         256 :          IF (ihalo > nhalos) THEN
     192             :             send_data_only = .TRUE.
     193             :          END IF
     194             :          ! construct the filter matrix block by block
     195         320 :          IF (send_data_only) THEN
     196             :             CALL fb_fltrmat_add_blkcol(H_mat, &
     197             :                                        S_mat, &
     198             :                                        dummy_atomic_halo, &
     199             :                                        trial_fns, &
     200             :                                        para_env, &
     201             :                                        particle_set, &
     202             :                                        fermi_level, &
     203             :                                        filter_temp, &
     204             :                                        filter_mat, &
     205           0 :                                        tolerance)
     206             :          ELSE
     207             :             CALL fb_fltrmat_add_blkcol(H_mat, &
     208             :                                        S_mat, &
     209             :                                        halos(ihalo), &
     210             :                                        trial_fns, &
     211             :                                        para_env, &
     212             :                                        particle_set, &
     213             :                                        fermi_level, &
     214             :                                        filter_temp, &
     215             :                                        filter_mat, &
     216         256 :                                        tolerance)
     217             :          END IF ! send_data_only
     218             :       END DO
     219             : 
     220             :       ! finalise the filter matrix
     221          64 :       CALL dbcsr_finalize(filter_mat)
     222             : 
     223             :       ! cleanup
     224          64 :       CALL fb_atomic_halo_release(dummy_atomic_halo)
     225             : 
     226          64 :       CALL timestop(handle)
     227             : 
     228         192 :    END SUBROUTINE fb_fltrmat_build
     229             : 
     230             : ! **************************************************************************************************
     231             : !> \brief Build the filter matrix, with MPI communications grouped together.
     232             : !>        More effcient on communication, less efficient on memory (compared
     233             : !>        to fb_fltrmat_build)
     234             : !> \param H_mat : DBCSR system KS matrix
     235             : !> \param S_mat : DBCSR system overlap matrix
     236             : !> \param atomic_halos : list of all local atomic halos, each halo gives
     237             : !>                       one atomic matrix and contributes to one blk
     238             : !>                       col to the filter matrix
     239             : !> \param trial_fns : the trial functions to be used to shrink the
     240             : !>                     size of the new "filtered" basis
     241             : !> \param para_env : cp2k parallel environment
     242             : !> \param particle_set : set of all particles in the system
     243             : !> \param fermi_level : the fermi level used for defining the filter
     244             : !>                      function, which is a Fermi-Dirac distribution
     245             : !>                      function
     246             : !> \param filter_temp : the filter temperature used for defining the
     247             : !>                      filter function
     248             : !> \param name        : name given to the filter matrix
     249             : !> \param filter_mat  : DBCSR format filter matrix
     250             : !> \param tolerance   : anything less than tolerance is treated as zero
     251             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     252             : ! **************************************************************************************************
     253          16 :    SUBROUTINE fb_fltrmat_build_2(H_mat, &
     254             :                                  S_mat, &
     255             :                                  atomic_halos, &
     256             :                                  trial_fns, &
     257             :                                  para_env, &
     258             :                                  particle_set, &
     259             :                                  fermi_level, &
     260             :                                  filter_temp, &
     261             :                                  name, &
     262             :                                  filter_mat, &
     263             :                                  tolerance)
     264             :       TYPE(dbcsr_type), POINTER                          :: H_mat, S_mat
     265             :       TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
     266             :       TYPE(fb_trial_fns_obj), INTENT(IN)                 :: trial_fns
     267             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     268             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     269             :       REAL(KIND=dp), INTENT(IN)                          :: fermi_level, filter_temp
     270             :       CHARACTER(LEN=*), INTENT(IN)                       :: name
     271             :       TYPE(dbcsr_type), POINTER                          :: filter_mat
     272             :       REAL(KIND=dp), INTENT(IN)                          :: tolerance
     273             : 
     274             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_2'
     275             : 
     276             :       CHARACTER(LEN=default_string_length)               :: name_string
     277             :       INTEGER                                            :: handle, iblkcol, ihalo, ikind, &
     278             :                                                             natoms_global, natoms_in_halo, &
     279             :                                                             nblkcols_total, nblks_recv, nhalos, &
     280             :                                                             nmax
     281          16 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, ntfns, row_blk_size
     282             :       LOGICAL                                            :: check_ok
     283             :       TYPE(atomic_kind_type), POINTER                    :: atomic_kind
     284             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     285          16 :       TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
     286             :       TYPE(fb_com_atom_pairs_obj)                        :: atmatrix_blks_recv, atmatrix_blks_send, &
     287             :                                                             filter_mat_blks_recv, &
     288             :                                                             filter_mat_blks_send
     289             :       TYPE(fb_matrix_data_obj)                           :: filter_mat_data, H_mat_data, S_mat_data
     290             : 
     291          16 :       CALL timeset(routineN, handle)
     292             : 
     293          16 :       NULLIFY (halos, atomic_kind, row_blk_size, col_blk_size, ntfns)
     294             : 
     295             :       ! filter_mat must be of a dissassociated status (i.e. brand new)
     296          16 :       check_ok = .NOT. ASSOCIATED(filter_mat)
     297          16 :       CPASSERT(check_ok)
     298             : 
     299             :       ! get total number of atoms
     300          16 :       natoms_global = SIZE(particle_set)
     301             : 
     302             :       ! get trial function information
     303             :       CALL fb_trial_fns_get(trial_fns=trial_fns, &
     304          16 :                             nfunctions=ntfns)
     305             : 
     306             :       ! calculate the row_blk_size and col_blk_size arrays for
     307             :       ! constructing the filter matrix in DBCSR format
     308             :       ! row_blk_size for the filter matrix is the same as H or S
     309             :       CALL dbcsr_get_info(H_mat, &
     310             :                           nblkcols_total=nblkcols_total, &
     311             :                           row_blk_size=row_blk_size, &
     312          16 :                           distribution=dbcsr_dist)
     313          48 :       ALLOCATE (col_blk_size(nblkcols_total))
     314         144 :       col_blk_size = 0
     315         144 :       DO iblkcol = 1, nblkcols_total
     316         128 :          atomic_kind => particle_set(iblkcol)%atomic_kind
     317             :          CALL get_atomic_kind(atomic_kind=atomic_kind, &
     318         128 :                               kind_number=ikind)
     319         144 :          col_blk_size(iblkcol) = ntfns(ikind)
     320             :       END DO
     321             :       ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
     322          16 :       name_string = name
     323          16 :       CALL compress(name_string)
     324          16 :       CALL uppercase(name_string)
     325             :       ! create empty filter matrix (it is always non-symmetric as it is non-square)
     326          16 :       ALLOCATE (filter_mat)
     327             :       CALL dbcsr_create(matrix=filter_mat, &
     328             :                         name=name_string, &
     329             :                         dist=dbcsr_dist, &
     330             :                         matrix_type=dbcsr_type_no_symmetry, &
     331             :                         row_blk_size=row_blk_size, &
     332             :                         col_blk_size=col_blk_size, &
     333          16 :                         nze=0)
     334          16 :       DEALLOCATE (col_blk_size)
     335             : 
     336             :       ! get all the blocks required for constructing atomic matrics, and
     337             :       ! store it in a fb_matrix_data object
     338          16 :       CALL fb_matrix_data_nullify(H_mat_data)
     339          16 :       CALL fb_matrix_data_nullify(S_mat_data)
     340          16 :       CALL fb_com_atom_pairs_nullify(atmatrix_blks_send)
     341          16 :       CALL fb_com_atom_pairs_nullify(atmatrix_blks_recv)
     342          16 :       CALL fb_com_atom_pairs_create(atmatrix_blks_send)
     343          16 :       CALL fb_com_atom_pairs_create(atmatrix_blks_recv)
     344             :       ! H matrix
     345             :       CALL fb_atmatrix_generate_com_pairs_2(H_mat, &
     346             :                                             atomic_halos, &
     347             :                                             para_env, &
     348             :                                             atmatrix_blks_send, &
     349          16 :                                             atmatrix_blks_recv)
     350             :       CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
     351          16 :                                  npairs=nblks_recv)
     352             :       CALL fb_matrix_data_create(H_mat_data, &
     353             :                                  nblks_recv, &
     354          16 :                                  natoms_global)
     355             :       CALL fb_com_atom_pairs_gather_blks(H_mat, &
     356             :                                          atmatrix_blks_send, &
     357             :                                          atmatrix_blks_recv, &
     358             :                                          para_env, &
     359          16 :                                          H_mat_data)
     360             :       ! S matrix
     361             :       CALL fb_atmatrix_generate_com_pairs_2(S_mat, &
     362             :                                             atomic_halos, &
     363             :                                             para_env, &
     364             :                                             atmatrix_blks_send, &
     365          16 :                                             atmatrix_blks_recv)
     366             :       CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
     367          16 :                                  npairs=nblks_recv)
     368             :       CALL fb_matrix_data_create(S_mat_data, &
     369             :                                  nblks_recv, &
     370          16 :                                  natoms_global)
     371             :       CALL fb_com_atom_pairs_gather_blks(S_mat, &
     372             :                                          atmatrix_blks_send, &
     373             :                                          atmatrix_blks_recv, &
     374             :                                          para_env, &
     375          16 :                                          S_mat_data)
     376             :       ! cleanup
     377          16 :       CALL fb_com_atom_pairs_release(atmatrix_blks_send)
     378          16 :       CALL fb_com_atom_pairs_release(atmatrix_blks_recv)
     379             : 
     380             :       ! make filter matrix blocks one by one and store in an
     381             :       ! matrix_data_obj
     382          16 :       CALL fb_matrix_data_nullify(filter_mat_data)
     383             :       CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
     384             :                                    nhalos=nhalos, &
     385          16 :                                    halos=halos)
     386          16 :       nmax = 0
     387          80 :       DO ihalo = 1, nhalos
     388             :          CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
     389          64 :                                  natoms=natoms_in_halo)
     390          80 :          nmax = nmax + natoms_in_halo
     391             :       END DO
     392             :       CALL fb_matrix_data_create(filter_mat_data, &
     393             :                                  nmax, &
     394          16 :                                  natoms_global)
     395          80 :       DO ihalo = 1, nhalos
     396             :          CALL fb_fltrmat_add_blkcol_2(H_mat, &
     397             :                                       S_mat, &
     398             :                                       H_mat_data, &
     399             :                                       S_mat_data, &
     400             :                                       halos(ihalo), &
     401             :                                       trial_fns, &
     402             :                                       particle_set, &
     403             :                                       fermi_level, &
     404             :                                       filter_temp, &
     405             :                                       filter_mat_data, &
     406          80 :                                       tolerance)
     407             :       END DO
     408             :       ! clean up
     409          16 :       CALL fb_matrix_data_release(H_mat_data)
     410          16 :       CALL fb_matrix_data_release(S_mat_data)
     411             : 
     412             :       ! distribute the relevant blocks from the matrix_data_obj to DBCSR
     413             :       ! filter matrix
     414          16 :       CALL fb_com_atom_pairs_nullify(filter_mat_blks_send)
     415          16 :       CALL fb_com_atom_pairs_nullify(filter_mat_blks_recv)
     416          16 :       CALL fb_com_atom_pairs_create(filter_mat_blks_send)
     417          16 :       CALL fb_com_atom_pairs_create(filter_mat_blks_recv)
     418             :       CALL fb_fltrmat_generate_com_pairs_2(filter_mat, &
     419             :                                            atomic_halos, &
     420             :                                            para_env, &
     421             :                                            filter_mat_blks_send, &
     422          16 :                                            filter_mat_blks_recv)
     423             :       CALL fb_com_atom_pairs_distribute_blks(filter_mat_data, &
     424             :                                              filter_mat_blks_send, &
     425             :                                              filter_mat_blks_recv, &
     426             :                                              para_env, &
     427          16 :                                              filter_mat)
     428             :       ! cleanup
     429          16 :       CALL fb_com_atom_pairs_release(filter_mat_blks_send)
     430          16 :       CALL fb_com_atom_pairs_release(filter_mat_blks_recv)
     431          16 :       CALL fb_matrix_data_release(filter_mat_data)
     432             : 
     433             :       ! finalise matrix
     434          16 :       CALL dbcsr_finalize(filter_mat)
     435             : 
     436          16 :       CALL timestop(handle)
     437             : 
     438          96 :    END SUBROUTINE fb_fltrmat_build_2
     439             : 
     440             : ! **************************************************************************************************
     441             : !> \brief Add a computed blocks in one column to the filter matrix. This
     442             : !>        version is used by fb_fltrmat_build, for the case where MPI
     443             : !>        communications are done at each step
     444             : !>        It does not finalise the filter matrix
     445             : !> \param H_mat : DBCSR system KS matrix
     446             : !> \param S_mat : DBCSR system overlap matrix
     447             : !> \param atomic_halo :  the halo that contributes to the blk
     448             : !>                       col of the filter matrix
     449             : !> \param trial_fns ...
     450             : !> \param para_env : cp2k parallel environment
     451             : !> \param particle_set : set of all particles in the system
     452             : !> \param fermi_level : the fermi level used for defining the filter
     453             : !>                      function, which is a Fermi-Dirac distribution
     454             : !>                      function
     455             : !> \param filter_temp : the filter temperature used for defining the
     456             : !>                      filter function
     457             : !> \param filter_mat  : DBCSR format filter matrix
     458             : !> \param tolerance   : anything smaller than tolerance is treated as zero
     459             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     460             : ! **************************************************************************************************
     461         256 :    SUBROUTINE fb_fltrmat_add_blkcol(H_mat, &
     462             :                                     S_mat, &
     463             :                                     atomic_halo, &
     464             :                                     trial_fns, &
     465             :                                     para_env, &
     466             :                                     particle_set, &
     467             :                                     fermi_level, &
     468             :                                     filter_temp, &
     469             :                                     filter_mat, &
     470             :                                     tolerance)
     471             :       TYPE(dbcsr_type), POINTER                          :: H_mat, S_mat
     472             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     473             :       TYPE(fb_trial_fns_obj), INTENT(IN)                 :: trial_fns
     474             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     475             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     476             :       REAL(KIND=dp), INTENT(IN)                          :: fermi_level, filter_temp
     477             :       TYPE(dbcsr_type), POINTER                          :: filter_mat
     478             :       REAL(KIND=dp), INTENT(IN)                          :: tolerance
     479             : 
     480             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol'
     481             : 
     482             :       INTEGER :: handle, handle_mpi, iatom_global, iatom_in_halo, ind, ipair, ipe, itrial, &
     483             :          jatom_global, jatom_in_halo, jkind, natoms_global, natoms_in_halo, ncols_atmatrix, &
     484             :          ncols_blk, nrows_atmatrix, nrows_blk, numprocs, pe, recv_encode, send_encode, stat
     485         256 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
     486         256 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
     487         256 :          atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data, ind_in_halo, &
     488         256 :          recv_disps, recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
     489         256 :          send_pair_disps, send_sizes
     490         256 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, ntfns, row_block_size_data
     491         256 :       INTEGER, DIMENSION(:, :), POINTER                  :: tfns
     492         256 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
     493         256 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: atomic_filter_mat, atomic_H, atomic_S
     494             :       TYPE(atomic_kind_type), POINTER                    :: atomic_kind
     495             :       TYPE(fb_com_atom_pairs_obj)                        :: com_pairs_recv, com_pairs_send
     496             : 
     497         256 :       CALL timeset(routineN, handle)
     498             : 
     499         256 :       NULLIFY (atomic_kind, halo_atoms, ntfns, pairs_send, pairs_recv, &
     500         256 :                row_block_size_data, tfns)
     501         256 :       CALL fb_com_atom_pairs_nullify(com_pairs_send)
     502         256 :       CALL fb_com_atom_pairs_nullify(com_pairs_recv)
     503             : 
     504             :       ! ----------------------------------------------------------------------
     505             :       ! Get communication buffers ready
     506             :       ! ----------------------------------------------------------------------
     507             : 
     508             :       ! generate send and recv atom pairs
     509         256 :       CALL fb_com_atom_pairs_create(com_pairs_send)
     510         256 :       CALL fb_com_atom_pairs_create(com_pairs_recv)
     511             :       CALL fb_fltrmat_generate_com_pairs(filter_mat, &
     512             :                                          atomic_halo, &
     513             :                                          para_env, &
     514             :                                          com_pairs_send, &
     515         256 :                                          com_pairs_recv)
     516             :       CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_send, &
     517             :                                  natoms_encode=send_encode, &
     518         256 :                                  pairs=pairs_send)
     519             :       CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_recv, &
     520             :                                  natoms_encode=recv_encode, &
     521         256 :                                  pairs=pairs_recv)
     522             : 
     523             :       ! get para_env info
     524         256 :       numprocs = para_env%num_pe
     525             :       ! me = para_env%mepos + 1   ! my process id, starting counting from 1
     526             : 
     527             :       ! obtain trail function information
     528             :       CALL fb_trial_fns_get(trial_fns=trial_fns, &
     529             :                             nfunctions=ntfns, &
     530         256 :                             functions=tfns)
     531             : 
     532             :       ! obtain row and col block size data for filter matrix
     533         256 :       CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
     534         256 :       natoms_global = SIZE(particle_set)
     535         768 :       ALLOCATE (col_block_size_data(natoms_global))
     536        2304 :       DO jatom_global = 1, natoms_global
     537        2048 :          atomic_kind => particle_set(jatom_global)%atomic_kind
     538        2048 :          CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
     539        2304 :          col_block_size_data(jatom_global) = ntfns(jkind)
     540             :       END DO
     541             : 
     542             :       ! allocate temporary arrays for send
     543         768 :       ALLOCATE (send_sizes(numprocs))
     544         512 :       ALLOCATE (send_disps(numprocs))
     545         512 :       ALLOCATE (send_pair_count(numprocs))
     546         512 :       ALLOCATE (send_pair_disps(numprocs))
     547             :       ! setup send buffer sizes
     548             :       CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_send, &
     549             :                                                numprocs, &
     550             :                                                row_block_size_data, &
     551             :                                                col_block_size_data, &
     552             :                                                send_sizes, &
     553             :                                                send_disps, &
     554             :                                                send_pair_count, &
     555         256 :                                                send_pair_disps)
     556             :       ! allocate send buffer
     557        1280 :       ALLOCATE (send_buf(SUM(send_sizes)))
     558             : 
     559             :       ! allocate temporary array for recv
     560         512 :       ALLOCATE (recv_sizes(numprocs))
     561         512 :       ALLOCATE (recv_disps(numprocs))
     562         512 :       ALLOCATE (recv_pair_count(numprocs))
     563         512 :       ALLOCATE (recv_pair_disps(numprocs))
     564             :       ! setup recv buffer sizes
     565             :       CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_recv, &
     566             :                                                numprocs, &
     567             :                                                row_block_size_data, &
     568             :                                                col_block_size_data, &
     569             :                                                recv_sizes, &
     570             :                                                recv_disps, &
     571             :                                                recv_pair_count, &
     572         256 :                                                recv_pair_disps)
     573             :       ! allocate recv buffer
     574        1280 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
     575             : 
     576             :       ! ----------------------------------------------------------------------
     577             :       ! Construct atomic filter matrix for this atomic_halo
     578             :       ! ----------------------------------------------------------------------
     579             : 
     580             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     581             :                               natoms=natoms_in_halo, &
     582         256 :                               halo_atoms=halo_atoms)
     583             : 
     584             :       ! construct atomic matrix for H for atomic_halo
     585             :       ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
     586             :                 atomic_H_blk_col_start(natoms_in_halo + 1), &
     587        1024 :                 STAT=stat)
     588         256 :       CPASSERT(stat == 0)
     589             :       CALL fb_atmatrix_calc_size(H_mat, &
     590             :                                  atomic_halo, &
     591             :                                  nrows_atmatrix, &
     592             :                                  ncols_atmatrix, &
     593             :                                  atomic_H_blk_row_start, &
     594         256 :                                  atomic_H_blk_col_start)
     595             : 
     596        1024 :       ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
     597             :       CALL fb_atmatrix_construct(H_mat, &
     598             :                                  atomic_halo, &
     599             :                                  para_env, &
     600             :                                  atomic_H, &
     601             :                                  atomic_H_blk_row_start, &
     602         256 :                                  atomic_H_blk_col_start)
     603             : 
     604             :       ! construct atomic matrix for S for atomic_halo
     605             :       ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
     606             :                 atomic_S_blk_col_start(natoms_in_halo + 1), &
     607        1024 :                 STAT=stat)
     608         256 :       CPASSERT(stat == 0)
     609             :       CALL fb_atmatrix_calc_size(S_mat, &
     610             :                                  atomic_halo, &
     611             :                                  nrows_atmatrix, &
     612             :                                  ncols_atmatrix, &
     613             :                                  atomic_S_blk_row_start, &
     614         256 :                                  atomic_S_blk_col_start)
     615        1024 :       ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
     616             :       CALL fb_atmatrix_construct(S_mat, &
     617             :                                  atomic_halo, &
     618             :                                  para_env, &
     619             :                                  atomic_S, &
     620             :                                  atomic_S_blk_row_start, &
     621         256 :                                  atomic_S_blk_col_start)
     622             : 
     623             :       ! construct the atomic filter matrix
     624         768 :       ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
     625             :       ! calculate atomic filter matrix only if it is non-zero sized
     626         256 :       IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
     627             :          CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
     628             :                                               atomic_S, &
     629             :                                               fermi_level, &
     630             :                                               filter_temp, &
     631             :                                               atomic_filter_mat, &
     632         256 :                                               tolerance)
     633             :       END IF
     634             : 
     635             :       ! ----------------------------------------------------------------------
     636             :       ! Construct filter matrix blocks and add to the correct locations
     637             :       ! in send_buffer
     638             :       ! ----------------------------------------------------------------------
     639             : 
     640             :       ! preconstruct iatom_global to iatom_in_halo map
     641         512 :       ALLOCATE (ind_in_halo(natoms_global))
     642        2304 :       ind_in_halo = 0
     643        2304 :       DO iatom_in_halo = 1, natoms_in_halo
     644        2048 :          iatom_global = halo_atoms(iatom_in_halo)
     645        2304 :          ind_in_halo(iatom_global) = iatom_in_halo
     646             :       END DO
     647             : 
     648             :       ! initialise send buffer
     649      106752 :       IF (SIZE(send_buf) > 0) send_buf = 0.0_dp
     650             :       ! assign values
     651         768 :       DO ipe = 1, numprocs
     652         512 :          send_sizes(ipe) = 0
     653        2816 :          DO ipair = 1, send_pair_count(ipe)
     654             :             CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
     655             :                                           pe, iatom_global, jatom_global, &
     656        2048 :                                           send_encode)
     657        2048 :             iatom_in_halo = ind_in_halo(iatom_global)
     658        2048 :             CPASSERT(iatom_in_halo > 0)
     659        2048 :             jatom_in_halo = ind_in_halo(jatom_global)
     660        2048 :             CPASSERT(jatom_in_halo > 0)
     661        2048 :             atomic_kind => particle_set(jatom_global)%atomic_kind
     662             :             CALL get_atomic_kind(atomic_kind=atomic_kind, &
     663        2048 :                                  kind_number=jkind)
     664        2048 :             nrows_blk = row_block_size_data(iatom_global)
     665        2048 :             ncols_blk = ntfns(jkind)
     666             : 
     667             :             ! do it column-wise one trial function at a time
     668       10240 :             DO itrial = 1, ntfns(jkind)
     669        8192 :                ind = send_disps(ipe) + send_sizes(ipe) + (itrial - 1)*nrows_blk
     670             :                CALL dgemv("N", &
     671             :                           nrows_blk, &
     672             :                           ncols_atmatrix, &
     673             :                           1.0_dp, &
     674             :                           atomic_filter_mat( &
     675             :                           atomic_H_blk_row_start(iatom_in_halo): &
     676             :                           atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
     677             :                           1:ncols_atmatrix &
     678             :                           ), &
     679             :                           nrows_blk, &
     680             :                           atomic_S( &
     681             :                           1:nrows_atmatrix, &
     682             :                           atomic_S_blk_col_start(jatom_in_halo) + &
     683             :                           tfns(itrial, jkind) - 1 &
     684             :                           ), &
     685             :                           1, &
     686             :                           0.0_dp, &
     687             :                           send_buf(ind + 1:ind + nrows_blk), &
     688    23865344 :                           1)
     689             :             END DO ! itrial
     690        6656 :             send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
     691             :          END DO ! ipair
     692             :       END DO ! ipe
     693             : 
     694         256 :       DEALLOCATE (atomic_H)
     695         256 :       DEALLOCATE (atomic_H_blk_row_start)
     696         256 :       DEALLOCATE (atomic_S)
     697         256 :       DEALLOCATE (atomic_S_blk_row_start)
     698         256 :       DEALLOCATE (atomic_filter_mat)
     699         256 :       DEALLOCATE (ind_in_halo)
     700             : 
     701             :       ! ----------------------------------------------------------------------
     702             :       ! Do communication
     703             :       ! ----------------------------------------------------------------------
     704             : 
     705         256 :       CALL timeset("fb_fltrmat_add_blkcol_mpi", handle_mpi)
     706             : 
     707             :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
     708         256 :                              recv_buf, recv_sizes, recv_disps)
     709             : 
     710         256 :       CALL timestop(handle_mpi)
     711             : 
     712         256 :       DEALLOCATE (send_buf)
     713         256 :       DEALLOCATE (send_sizes)
     714         256 :       DEALLOCATE (send_disps)
     715         256 :       DEALLOCATE (send_pair_count)
     716         256 :       DEALLOCATE (send_pair_disps)
     717             : 
     718             :       ! ----------------------------------------------------------------------
     719             :       ! Unpack the recv buffer and add the blocks to correct parts of
     720             :       ! the DBCSR filter matrix
     721             :       ! ----------------------------------------------------------------------
     722             : 
     723         768 :       DO ipe = 1, numprocs
     724         512 :          recv_sizes(ipe) = 0
     725        2816 :          DO ipair = 1, recv_pair_count(ipe)
     726             :             CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
     727             :                                           pe, iatom_global, jatom_global, &
     728        2048 :                                           recv_encode)
     729        2048 :             nrows_blk = row_block_size_data(iatom_global)
     730        2048 :             ncols_blk = col_block_size_data(jatom_global)
     731        2048 :             ind = recv_disps(ipe) + recv_sizes(ipe)
     732             :             CALL dbcsr_put_block(filter_mat, &
     733             :                                  iatom_global, jatom_global, &
     734        2048 :                                  recv_buf((ind + 1):(ind + nrows_blk*ncols_blk)))
     735        4608 :             recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
     736             :          END DO ! ipair
     737             :       END DO ! ipe
     738             : 
     739             :       ! cleanup rest of the temporary arrays
     740         256 :       DEALLOCATE (recv_buf)
     741         256 :       DEALLOCATE (recv_sizes)
     742         256 :       DEALLOCATE (recv_pair_count)
     743         256 :       DEALLOCATE (recv_pair_disps)
     744             : 
     745         256 :       CALL fb_com_atom_pairs_release(com_pairs_send)
     746         256 :       CALL fb_com_atom_pairs_release(com_pairs_recv)
     747             : 
     748             :       ! cannot finalise the matrix until all blocks has been added
     749             : 
     750         256 :       CALL timestop(handle)
     751             : 
     752        1792 :    END SUBROUTINE fb_fltrmat_add_blkcol
     753             : 
     754             : ! **************************************************************************************************
     755             : !> \brief Computed blocks in one filter matrix column. This version is used by
     756             : !>        fb_fltrmat_build_2, where MPI communication is done collectively
     757             : !> \param H_mat : DBCSR system KS matrix
     758             : !> \param S_mat : DBCSR system overlap matrix
     759             : !> \param H_mat_data  :  local storage of the relevant H_mat matrix blocks
     760             : !> \param S_mat_data  :  local storage of the relevant S_mat matrix blocks
     761             : !> \param atomic_halo :  the halo that contributes to the blk
     762             : !>                       col of the filter matrix
     763             : !> \param trial_fns   :  trial functions data
     764             : !> \param particle_set : set of all particles in the system
     765             : !> \param fermi_level : the fermi level used for defining the filter
     766             : !>                      function, which is a Fermi-Dirac distribution
     767             : !>                      function
     768             : !> \param filter_temp : the filter temperature used for defining the
     769             : !>                      filter function
     770             : !> \param filter_mat_data : local storage for the the computed filter matrix
     771             : !>                          blocks
     772             : !> \param tolerance : anything less than this is regarded as zero
     773             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     774             : ! **************************************************************************************************
     775          64 :    SUBROUTINE fb_fltrmat_add_blkcol_2(H_mat, &
     776             :                                       S_mat, &
     777             :                                       H_mat_data, &
     778             :                                       S_mat_data, &
     779             :                                       atomic_halo, &
     780             :                                       trial_fns, &
     781             :                                       particle_set, &
     782             :                                       fermi_level, &
     783             :                                       filter_temp, &
     784             :                                       filter_mat_data, &
     785             :                                       tolerance)
     786             :       TYPE(dbcsr_type), POINTER                          :: H_mat, S_mat
     787             :       TYPE(fb_matrix_data_obj), INTENT(IN)               :: H_mat_data, S_mat_data
     788             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     789             :       TYPE(fb_trial_fns_obj), INTENT(IN)                 :: trial_fns
     790             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     791             :       REAL(KIND=dp), INTENT(IN)                          :: fermi_level, filter_temp
     792             :       TYPE(fb_matrix_data_obj), INTENT(INOUT)            :: filter_mat_data
     793             :       REAL(KIND=dp), INTENT(IN)                          :: tolerance
     794             : 
     795             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol_2'
     796             : 
     797             :       INTEGER :: handle, iatom_global, iatom_in_halo, itrial, jatom_global, jatom_in_halo, jkind, &
     798             :          natoms_global, natoms_in_halo, ncols_atmatrix, ncols_blk, ncols_blk_max, nrows_atmatrix, &
     799             :          nrows_blk, nrows_blk_max, stat
     800          64 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
     801          64 :          atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data
     802          64 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, ntfns, row_block_size_data
     803          64 :       INTEGER, DIMENSION(:, :), POINTER                  :: tfns
     804             :       LOGICAL                                            :: check_ok
     805          64 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: atomic_filter_mat, atomic_H, atomic_S, &
     806          64 :                                                             mat_blk
     807             :       TYPE(atomic_kind_type), POINTER                    :: atomic_kind
     808             : 
     809          64 :       CALL timeset(routineN, handle)
     810             : 
     811          64 :       NULLIFY (atomic_kind, halo_atoms, ntfns, row_block_size_data, tfns)
     812             : 
     813          64 :       check_ok = fb_matrix_data_has_data(H_mat_data)
     814          64 :       CPASSERT(check_ok)
     815          64 :       check_ok = fb_matrix_data_has_data(S_mat_data)
     816          64 :       CPASSERT(check_ok)
     817             : 
     818             :       ! obtain trial function information
     819             :       CALL fb_trial_fns_get(trial_fns=trial_fns, &
     820             :                             nfunctions=ntfns, &
     821          64 :                             functions=tfns)
     822             : 
     823             :       ! obtain row and col block size data for filter matrix
     824          64 :       CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
     825          64 :       natoms_global = SIZE(particle_set)
     826         192 :       ALLOCATE (col_block_size_data(natoms_global))
     827         576 :       DO jatom_global = 1, natoms_global
     828         512 :          atomic_kind => particle_set(jatom_global)%atomic_kind
     829         512 :          CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
     830         576 :          col_block_size_data(jatom_global) = ntfns(jkind)
     831             :       END DO
     832             : 
     833             :       ! ----------------------------------------------------------------------
     834             :       ! Construct atomic filter matrix for this atomic_halo
     835             :       ! ----------------------------------------------------------------------
     836             : 
     837             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     838             :                               natoms=natoms_in_halo, &
     839          64 :                               halo_atoms=halo_atoms)
     840             : 
     841             :       ! construct atomic matrix for H for atomic_halo
     842             :       ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
     843             :                 atomic_H_blk_col_start(natoms_in_halo + 1), &
     844         256 :                 STAT=stat)
     845          64 :       CPASSERT(stat == 0)
     846             :       CALL fb_atmatrix_calc_size(H_mat, &
     847             :                                  atomic_halo, &
     848             :                                  nrows_atmatrix, &
     849             :                                  ncols_atmatrix, &
     850             :                                  atomic_H_blk_row_start, &
     851          64 :                                  atomic_H_blk_col_start)
     852         256 :       ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
     853             :       CALL fb_atmatrix_construct_2(H_mat_data, &
     854             :                                    atomic_halo, &
     855             :                                    atomic_H, &
     856             :                                    atomic_H_blk_row_start, &
     857          64 :                                    atomic_H_blk_col_start)
     858             : 
     859             :       ! construct atomic matrix for S for atomic_halo
     860             :       ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
     861             :                 atomic_S_blk_col_start(natoms_in_halo + 1), &
     862         256 :                 STAT=stat)
     863          64 :       CPASSERT(stat == 0)
     864             :       CALL fb_atmatrix_calc_size(S_mat, &
     865             :                                  atomic_halo, &
     866             :                                  nrows_atmatrix, &
     867             :                                  ncols_atmatrix, &
     868             :                                  atomic_S_blk_row_start, &
     869          64 :                                  atomic_S_blk_col_start)
     870         256 :       ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
     871             :       CALL fb_atmatrix_construct_2(S_mat_data, &
     872             :                                    atomic_halo, &
     873             :                                    atomic_S, &
     874             :                                    atomic_S_blk_row_start, &
     875          64 :                                    atomic_S_blk_col_start)
     876             : 
     877             :       ! construct the atomic filter matrix
     878         192 :       ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
     879             :       ! calculate atomic filter matrix only if it is non-zero sized
     880          64 :       IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
     881             :          CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
     882             :                                               atomic_S, &
     883             :                                               fermi_level, &
     884             :                                               filter_temp, &
     885             :                                               atomic_filter_mat, &
     886          64 :                                               tolerance)
     887             :       END IF
     888             : 
     889             :       ! ----------------------------------------------------------------------
     890             :       ! Construct filter matrix block and add to filter_mat_data
     891             :       ! ----------------------------------------------------------------------
     892             : 
     893             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     894             :                               owner_atom=jatom_global, &
     895          64 :                               owner_id_in_halo=jatom_in_halo)
     896         576 :       nrows_blk_max = MAXVAL(row_block_size_data)
     897         128 :       ncols_blk_max = MAXVAL(ntfns)
     898         256 :       ALLOCATE (mat_blk(nrows_blk_max, ncols_blk_max))
     899        3648 :       mat_blk(:, :) = 0.0_dp
     900         576 :       DO iatom_in_halo = 1, natoms_in_halo
     901         512 :          iatom_global = halo_atoms(iatom_in_halo)
     902         512 :          atomic_kind => particle_set(jatom_global)%atomic_kind
     903             :          CALL get_atomic_kind(atomic_kind=atomic_kind, &
     904         512 :                               kind_number=jkind)
     905         512 :          nrows_blk = row_block_size_data(iatom_global)
     906         512 :          ncols_blk = ntfns(jkind)
     907             : 
     908             :          ! ALLOCATE(mat_blk(nrows_blk,ncols_blk) STAT=stat)
     909             :          ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
     910             : 
     911             :          ! do it column-wise one trial function at a time
     912        2560 :          DO itrial = 1, ntfns(jkind)
     913             :             CALL dgemv("N", &
     914             :                        nrows_blk, &
     915             :                        ncols_atmatrix, &
     916             :                        1.0_dp, &
     917             :                        atomic_filter_mat( &
     918             :                        atomic_H_blk_row_start(iatom_in_halo): &
     919             :                        atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
     920             :                        1:ncols_atmatrix &
     921             :                        ), &
     922             :                        nrows_blk, &
     923             :                        atomic_S( &
     924             :                        1:nrows_atmatrix, &
     925             :                        atomic_S_blk_col_start(jatom_in_halo) + &
     926             :                        tfns(itrial, jkind) - 1 &
     927             :                        ), &
     928             :                        1, &
     929             :                        0.0_dp, &
     930             :                        mat_blk( &
     931             :                        1:nrows_blk, &
     932             :                        itrial), &
     933     5966336 :                        1)
     934             :          END DO ! itrial
     935             :          CALL fb_matrix_data_add(filter_mat_data, &
     936             :                                  iatom_global, &
     937             :                                  jatom_global, &
     938        1088 :                                  mat_blk(1:nrows_blk, 1:ncols_blk))
     939             :          ! DEALLOCATE(mat_blk, STAT=stat)
     940             :          ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
     941             :       END DO ! iatom_in_halo
     942          64 :       DEALLOCATE (mat_blk)
     943             : 
     944             :       ! clean up
     945          64 :       DEALLOCATE (atomic_H)
     946          64 :       DEALLOCATE (atomic_H_blk_row_start)
     947          64 :       DEALLOCATE (atomic_S)
     948          64 :       DEALLOCATE (atomic_S_blk_row_start)
     949          64 :       DEALLOCATE (atomic_filter_mat)
     950             : 
     951          64 :       CALL timestop(handle)
     952             : 
     953         384 :    END SUBROUTINE fb_fltrmat_add_blkcol_2
     954             : 
     955             : ! **************************************************************************************************
     956             : !> \brief generate the list of blocks (atom pairs) to be sent and received
     957             : !>        in order to construct the filter matrix for each atomic halo.
     958             : !>        This version is for use with fb_fltrmat_build, where MPI
     959             : !>        communications are done at each step
     960             : !> \param filter_mat : DBCSR formatted filter matrix
     961             : !> \param atomic_halo :  the halo that contributes to a blk
     962             : !>                       col of the filter matrix
     963             : !> \param para_env : cp2k parallel environment
     964             : !> \param atom_pairs_send : list of blocks to be sent
     965             : !> \param atom_pairs_recv : list of blocks to be received
     966             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     967             : ! **************************************************************************************************
     968         256 :    SUBROUTINE fb_fltrmat_generate_com_pairs(filter_mat, &
     969             :                                             atomic_halo, &
     970             :                                             para_env, &
     971             :                                             atom_pairs_send, &
     972             :                                             atom_pairs_recv)
     973             :       TYPE(dbcsr_type), POINTER                          :: filter_mat
     974             :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     975             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     976             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
     977             : 
     978             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs'
     979             : 
     980             :       INTEGER                                            :: dest, handle, iatom_global, &
     981             :                                                             iatom_in_halo, itask, jatom_global, &
     982             :                                                             natoms_in_halo, nblkrows_total, &
     983             :                                                             ntasks_send
     984         256 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_send
     985         256 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     986             :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
     987             : 
     988         256 :       CALL timeset(routineN, handle)
     989             : 
     990         256 :       NULLIFY (tasks_send)
     991         256 :       CALL fb_com_tasks_nullify(com_tasks_send)
     992         256 :       CALL fb_com_tasks_nullify(com_tasks_recv)
     993             : 
     994             :       ! initialise atom_pairs_send and atom_pairs_recv
     995         256 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
     996         256 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
     997             :       ELSE
     998           0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
     999             :       END IF
    1000         256 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
    1001         256 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
    1002             :       ELSE
    1003           0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
    1004             :       END IF
    1005             : 
    1006             :       ! The total number of filter matrix blocks each processor is going
    1007             :       ! to construct equals to the total number of halo atoms in all of
    1008             :       ! the atomic halos local to the processor. The number of send
    1009             :       ! tasks will not exceed this. We do one halo (col) at a time, and
    1010             :       ! each call of this subroutine will only work on one filter matrix
    1011             :       ! col corresponding to atomic_halo.
    1012             : 
    1013             :       ! The col atom block index for each filter matrix block are the
    1014             :       ! owner atom of each halo. The row atom block index for each
    1015             :       ! filter matrix block corresponding to each col are the halo atoms
    1016             :       ! of the corresponding halos. Filter matrix is non-symmetric: it
    1017             :       ! is non-square, because the blocks themselves are non-square
    1018             : 
    1019             :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
    1020             :                               owner_atom=jatom_global, &
    1021             :                               natoms=natoms_in_halo, &
    1022         256 :                               halo_atoms=halo_atoms)
    1023         256 :       ntasks_send = natoms_in_halo
    1024             : 
    1025             :       ! allocate send tasks
    1026         768 :       ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
    1027             : 
    1028             :       ! Get the total number of atoms, this can be obtained from the
    1029             :       ! total number of block rows in the DBCSR filter matrix.  We
    1030             :       ! assumes that before calling this subroutine, the filter_mat has
    1031             :       ! already been created and initialised: i.e. using
    1032             :       ! dbcsr_create_new. Even if the matrix is at the moment empty,
    1033             :       ! the attribute nblkrows_total is already assigned from the dbcsr
    1034             :       ! distribution data
    1035             :       CALL dbcsr_get_info(filter_mat, &
    1036         256 :                           nblkrows_total=nblkrows_total)
    1037             : 
    1038             :       ! source is always the local processor
    1039             :       ASSOCIATE (src => para_env%mepos)
    1040             :          ! construct send tasks
    1041         256 :          itask = 1
    1042        2304 :          DO iatom_in_halo = 1, natoms_in_halo
    1043        2048 :             iatom_global = halo_atoms(iatom_in_halo)
    1044             :             ! find where the constructed block of filter matrix belongs to
    1045             :             CALL dbcsr_get_stored_coordinates(filter_mat, &
    1046             :                                               iatom_global, &
    1047             :                                               jatom_global, &
    1048        2048 :                                               processor=dest)
    1049             :             ! create the send tasks
    1050        2048 :             tasks_send(TASK_DEST, itask) = dest
    1051        2048 :             tasks_send(TASK_SRC, itask) = src
    1052             :             CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
    1053             :                                           iatom_global, jatom_global, &
    1054        2048 :                                           nblkrows_total)
    1055             :             ! calculation of cost not implemented at the moment
    1056        2048 :             tasks_send(TASK_COST, itask) = 0
    1057        4352 :             itask = itask + 1
    1058             :          END DO ! iatom_in_halo
    1059             :       END ASSOCIATE
    1060             : 
    1061         256 :       CALL fb_com_tasks_create(com_tasks_recv)
    1062         256 :       CALL fb_com_tasks_create(com_tasks_send)
    1063             : 
    1064             :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
    1065             :                             task_dim=TASK_N_RECORDS, &
    1066             :                             ntasks=ntasks_send, &
    1067             :                             nencode=nblkrows_total, &
    1068         256 :                             tasks=tasks_send)
    1069             : 
    1070             :       ! generate the recv task list (tasks_recv) from the send task list
    1071             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
    1072         256 :                                            para_env)
    1073             : 
    1074             :       ! task lists are now complete, now construct the atom_pairs_send
    1075             :       ! and atom_pairs_recv from the tasks lists
    1076             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
    1077             :                                          atom_pairs=atom_pairs_send, &
    1078             :                                          natoms_encode=nblkrows_total, &
    1079         256 :                                          send_or_recv="send")
    1080             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
    1081             :                                          atom_pairs=atom_pairs_recv, &
    1082             :                                          natoms_encode=nblkrows_total, &
    1083         256 :                                          send_or_recv="recv")
    1084             : 
    1085             :       ! cleanup
    1086         256 :       CALL fb_com_tasks_release(com_tasks_recv)
    1087         256 :       CALL fb_com_tasks_release(com_tasks_send)
    1088             : 
    1089         256 :       CALL timestop(handle)
    1090             : 
    1091         768 :    END SUBROUTINE fb_fltrmat_generate_com_pairs
    1092             : 
    1093             : ! **************************************************************************************************
    1094             : !> \brief generate the list of blocks (atom pairs) to be sent and received
    1095             : !>        in order to construct the filter matrix for each atomic halo.
    1096             : !>        This version is for use with fb_fltrmat_build_2, where MPI
    1097             : !>        communications are done collectively.
    1098             : !> \param filter_mat  : DBCSR formatted filter matrix
    1099             : !> \param atomic_halos : set of all local atomic halos contributing to the
    1100             : !>                       filter matrix
    1101             : !> \param para_env : cp2k parallel environment
    1102             : !> \param atom_pairs_send : list of blocks to be sent
    1103             : !> \param atom_pairs_recv : list of blocks to be received
    1104             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
    1105             : ! **************************************************************************************************
    1106          16 :    SUBROUTINE fb_fltrmat_generate_com_pairs_2(filter_mat, &
    1107             :                                               atomic_halos, &
    1108             :                                               para_env, &
    1109             :                                               atom_pairs_send, &
    1110             :                                               atom_pairs_recv)
    1111             :       TYPE(dbcsr_type), POINTER                          :: filter_mat
    1112             :       TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
    1113             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1114             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
    1115             : 
    1116             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs_2'
    1117             : 
    1118             :       INTEGER :: dest, handle, iatom_global, iatom_in_halo, iatom_stored, ihalo, itask, &
    1119             :          jatom_global, jatom_stored, natoms_in_halo, nblkrows_total, nhalos, ntasks_send
    1120          16 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_send
    1121          16 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
    1122             :       LOGICAL                                            :: transpose
    1123          16 :       TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
    1124             :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
    1125             : 
    1126          16 :       CALL timeset(routineN, handle)
    1127             : 
    1128          16 :       NULLIFY (tasks_send)
    1129          16 :       CALL fb_com_tasks_nullify(com_tasks_send)
    1130          16 :       CALL fb_com_tasks_nullify(com_tasks_recv)
    1131             : 
    1132             :       ! initialise atom_pairs_send and atom_pairs_recv
    1133          16 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
    1134          16 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
    1135             :       ELSE
    1136           0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
    1137             :       END IF
    1138          16 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
    1139          16 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
    1140             :       ELSE
    1141           0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
    1142             :       END IF
    1143             : 
    1144             :       ! The col atom block index for each filter matrix block are the
    1145             :       ! owner atom of each halo. The row atom block index for each
    1146             :       ! filter matrix block corresponding to each col are the halo atoms
    1147             :       ! of the corresponding halos. Filter matrix is non-symmetric: it
    1148             :       ! is non-square, because the blocks themselves are non-square
    1149             : 
    1150             :       CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
    1151             :                                    nhalos=nhalos, &
    1152          16 :                                    halos=halos)
    1153             : 
    1154             :       ! estimate the maximum number of blocks (i.e. atom paris) to send
    1155          16 :       ntasks_send = 0
    1156          80 :       DO ihalo = 1, nhalos
    1157             :          CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
    1158          64 :                                  natoms=natoms_in_halo)
    1159          80 :          ntasks_send = ntasks_send + natoms_in_halo
    1160             :       END DO ! ihalo
    1161             : 
    1162             :       ! allocate send tasks
    1163          48 :       ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
    1164             : 
    1165             :       ! Get the total number of atoms. This can be obtained from the
    1166             :       ! total number of block rows in the DBCSR filter matrix.  We
    1167             :       ! assumes that before calling this subroutine, the filter_mat has
    1168             :       ! already been created and initialised: i.e. using
    1169             :       ! dbcsr_create_new. Even if the matrix is at the moment empty,
    1170             :       ! the attribute nblkrows_total is already assigned from the dbcsr
    1171             :       ! distribution data
    1172             :       CALL dbcsr_get_info(filter_mat, &
    1173          16 :                           nblkrows_total=nblkrows_total)
    1174             : 
    1175             :       ! source is always the local processor
    1176             :       ASSOCIATE (src => para_env%mepos)
    1177             :          ! construct send tasks
    1178          16 :          itask = 1
    1179          80 :          DO ihalo = 1, nhalos
    1180             :             CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
    1181             :                                     owner_atom=jatom_global, &
    1182             :                                     natoms=natoms_in_halo, &
    1183          64 :                                     halo_atoms=halo_atoms)
    1184         592 :             DO iatom_in_halo = 1, natoms_in_halo
    1185         512 :                iatom_global = halo_atoms(iatom_in_halo)
    1186         512 :                iatom_stored = iatom_global
    1187         512 :                jatom_stored = jatom_global
    1188         512 :                transpose = .FALSE.
    1189             :                ! find where the constructed block of filter matrix belongs to
    1190             :                CALL dbcsr_get_stored_coordinates(filter_mat, &
    1191             :                                                  iatom_stored, &
    1192             :                                                  jatom_stored, &
    1193         512 :                                                  processor=dest)
    1194             :                ! create the send tasks
    1195         512 :                tasks_send(TASK_DEST, itask) = dest
    1196         512 :                tasks_send(TASK_SRC, itask) = src
    1197             :                CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
    1198             :                                              iatom_global, jatom_global, &
    1199         512 :                                              nblkrows_total)
    1200             :                ! calculation of cost not implemented at the moment
    1201         512 :                tasks_send(TASK_COST, itask) = 0
    1202        1088 :                itask = itask + 1
    1203             :             END DO ! iatom_in_halo
    1204             :          END DO ! ihalo
    1205             :       END ASSOCIATE
    1206             : 
    1207             :       ! get the actual number of tasks
    1208          16 :       ntasks_send = itask - 1
    1209             : 
    1210          16 :       CALL fb_com_tasks_create(com_tasks_send)
    1211             :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
    1212             :                             task_dim=TASK_N_RECORDS, &
    1213             :                             ntasks=ntasks_send, &
    1214             :                             nencode=nblkrows_total, &
    1215          16 :                             tasks=tasks_send)
    1216             : 
    1217             :       ! generate the recv task list (tasks_recv) from the send task list
    1218          16 :       CALL fb_com_tasks_create(com_tasks_recv)
    1219             :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
    1220          16 :                                            para_env)
    1221             : 
    1222             :       ! task lists are now complete, now construct the atom_pairs_send
    1223             :       ! and atom_pairs_recv from the tasks lists
    1224             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
    1225             :                                          atom_pairs=atom_pairs_send, &
    1226             :                                          natoms_encode=nblkrows_total, &
    1227          16 :                                          send_or_recv="send")
    1228             :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
    1229             :                                          atom_pairs=atom_pairs_recv, &
    1230             :                                          natoms_encode=nblkrows_total, &
    1231          16 :                                          send_or_recv="recv")
    1232             : 
    1233             :       ! cleanup
    1234          16 :       CALL fb_com_tasks_release(com_tasks_recv)
    1235          16 :       CALL fb_com_tasks_release(com_tasks_send)
    1236             : 
    1237          16 :       CALL timestop(handle)
    1238             : 
    1239          48 :    END SUBROUTINE fb_fltrmat_generate_com_pairs_2
    1240             : 
    1241             : ! **************************************************************************************************
    1242             : !> \brief Build the atomic filter matrix for each atomic halo
    1243             : !> \param atomic_H : atomic KS matrix
    1244             : !> \param atomic_S : atomic overlap matrix
    1245             : !> \param fermi_level : fermi level used to construct the Fermi-Dirac
    1246             : !>                      filter function
    1247             : !> \param filter_temp : temperature used to construct the Fermi-Dirac
    1248             : !>                      filter function
    1249             : !> \param atomic_filter_mat : the atomic filter matrix
    1250             : !> \param tolerance : anything smaller than tolerance is treated as zero
    1251             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
    1252             : ! **************************************************************************************************
    1253         960 :    SUBROUTINE fb_fltrmat_build_atomic_fltrmat(atomic_H, &
    1254         320 :                                               atomic_S, &
    1255             :                                               fermi_level, &
    1256             :                                               filter_temp, &
    1257         320 :                                               atomic_filter_mat, &
    1258             :                                               tolerance)
    1259             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: atomic_H, atomic_S
    1260             :       REAL(KIND=dp), INTENT(IN)                          :: fermi_level, filter_temp
    1261             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_filter_mat
    1262             :       REAL(KIND=dp), INTENT(IN)                          :: tolerance
    1263             : 
    1264             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_atomic_fltrmat'
    1265             : 
    1266             :       INTEGER                                            :: handle, handle_dgemm, handle_dsygv, ii, &
    1267             :                                                             info, jj, mat_dim, work_array_size
    1268             :       LOGICAL                                            :: check_ok
    1269         320 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, filter_function, work
    1270         320 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: atomic_S_copy, eigenvectors, &
    1271         320 :                                                             filtered_eigenvectors
    1272             : 
    1273         320 :       CALL timeset(routineN, handle)
    1274             : 
    1275             :       ! This subroutine assumes atomic_filter_mat is not zero size, in
    1276             :       ! other words, it really has to be constructed, instead of just
    1277             :       ! being a dummy
    1278             : 
    1279             :       check_ok = SIZE(atomic_filter_mat, 1) > 0 .AND. &
    1280         320 :                  SIZE(atomic_filter_mat, 2) > 0
    1281           0 :       CPASSERT(check_ok)
    1282             : 
    1283             :       ! initialise
    1284     3494720 :       atomic_filter_mat = 0.0_dp
    1285         320 :       mat_dim = SIZE(atomic_H, 1)
    1286             : 
    1287             :       ! diagonalise using LAPACK
    1288         960 :       ALLOCATE (eigenvalues(mat_dim))
    1289             :       ! get optimal work array size
    1290         320 :       ALLOCATE (work(1))
    1291             :       ! dsygv will overwrite part of atomic_H and atomic_S, thus need to copy them
    1292        1280 :       ALLOCATE (atomic_S_copy(SIZE(atomic_S, 1), SIZE(atomic_S, 2)))
    1293     3494720 :       atomic_S_copy(:, :) = atomic_S(:, :)
    1294        1280 :       ALLOCATE (eigenvectors(SIZE(atomic_H, 1), SIZE(atomic_H, 2)))
    1295     3494720 :       eigenvectors(:, :) = atomic_H(:, :)
    1296             : 
    1297         320 :       CALL timeset("fb_atomic_filter_dsygv", handle_dsygv)
    1298             : 
    1299         320 :       info = 0
    1300             :       CALL dsygv(1, &
    1301             :                  'V', &
    1302             :                  'U', &
    1303             :                  mat_dim, &
    1304             :                  eigenvectors, &
    1305             :                  mat_dim, &
    1306             :                  atomic_S_copy, &
    1307             :                  mat_dim, &
    1308             :                  eigenvalues, &
    1309             :                  work, &
    1310             :                  -1, &
    1311         320 :                  info)
    1312         320 :       work_array_size = NINT(work(1))
    1313             :       ! now allocate work array
    1314         320 :       DEALLOCATE (work)
    1315         960 :       ALLOCATE (work(work_array_size))
    1316     1131840 :       work = 0.0_dp
    1317             :       ! do calculation
    1318     3494720 :       atomic_S_copy(:, :) = atomic_S(:, :)
    1319     3494720 :       eigenvectors(:, :) = atomic_H(:, :)
    1320         320 :       info = 0
    1321             :       CALL dsygv(1, &
    1322             :                  'V', &
    1323             :                  'U', &
    1324             :                  mat_dim, &
    1325             :                  eigenvectors, &
    1326             :                  mat_dim, &
    1327             :                  atomic_S_copy, &
    1328             :                  mat_dim, &
    1329             :                  eigenvalues, &
    1330             :                  work, &
    1331             :                  work_array_size, &
    1332         320 :                  info)
    1333             :       ! check if diagonalisation is successful
    1334         320 :       IF (info .NE. 0) THEN
    1335           0 :          WRITE (*, *) "DSYGV ERROR MESSAGE: ", info
    1336           0 :          CPABORT("DSYGV failed")
    1337             :       END IF
    1338             : 
    1339         320 :       CALL timestop(handle_dsygv)
    1340             : 
    1341         320 :       DEALLOCATE (work)
    1342         320 :       DEALLOCATE (atomic_S_copy)
    1343             : 
    1344             :       ! first get the filter function
    1345         960 :       ALLOCATE (filter_function(mat_dim))
    1346       33600 :       filter_function = 0.0_dp
    1347             :       CALL fb_fltrmat_fermi_dirac_mu(filter_function, &
    1348             :                                      eigenvalues, &
    1349             :                                      filter_temp, &
    1350         320 :                                      fermi_level)
    1351         320 :       DEALLOCATE (eigenvalues)
    1352             : 
    1353             :       ! atomic_H has the eigenvectors, construct the version of it
    1354             :       ! filtered through the filter function
    1355        1280 :       ALLOCATE (filtered_eigenvectors(mat_dim, mat_dim))
    1356       33600 :       DO jj = 1, mat_dim
    1357     3494720 :          DO ii = 1, mat_dim
    1358             :             filtered_eigenvectors(ii, jj) = &
    1359     3494400 :                filter_function(jj)*eigenvectors(ii, jj)
    1360             :          END DO ! ii
    1361             :       END DO ! jj
    1362             : 
    1363         320 :       DEALLOCATE (filter_function)
    1364             : 
    1365         320 :       CALL timeset("fb_atomic_filter_dgemm", handle_dgemm)
    1366             : 
    1367             :       ! construct atomic filter matrix
    1368             :       CALL dgemm("N", &
    1369             :                  "T", &
    1370             :                  mat_dim, &
    1371             :                  mat_dim, &
    1372             :                  mat_dim, &
    1373             :                  1.0_dp, &
    1374             :                  filtered_eigenvectors, &
    1375             :                  mat_dim, &
    1376             :                  eigenvectors, &
    1377             :                  mat_dim, &
    1378             :                  0.0_dp, &
    1379             :                  atomic_filter_mat, &
    1380         320 :                  mat_dim)
    1381             : 
    1382         320 :       CALL timestop(handle_dgemm)
    1383             : 
    1384             :       ! remove small negative terms due to numerical error, the filter
    1385             :       ! matrix must not be negative definite
    1386       33600 :       DO jj = 1, SIZE(atomic_filter_mat, 2)
    1387     3494720 :          DO ii = 1, SIZE(atomic_filter_mat, 1)
    1388     3494400 :             IF (ABS(atomic_filter_mat(ii, jj)) < tolerance) THEN
    1389       50656 :                atomic_filter_mat(ii, jj) = 0.0_dp
    1390             :             END IF
    1391             :          END DO
    1392             :       END DO
    1393             : 
    1394         320 :       DEALLOCATE (filtered_eigenvectors)
    1395         320 :       DEALLOCATE (eigenvectors)
    1396             : 
    1397         320 :       CALL timestop(handle)
    1398             : 
    1399         960 :    END SUBROUTINE fb_fltrmat_build_atomic_fltrmat
    1400             : 
    1401             : ! **************************************************************************************************
    1402             : !> \brief get values of Fermi-Dirac distribution based on a given fermi
    1403             : !>        level at a given set of energy eigenvalues
    1404             : !> \param f : the Fermi-Dirac distribution function values
    1405             : !> \param eigenvals : set of energy eigenvalues
    1406             : !> \param T : temperature
    1407             : !> \param mu : the fermi level
    1408             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
    1409             : ! **************************************************************************************************
    1410         320 :    SUBROUTINE fb_fltrmat_fermi_dirac_mu(f, eigenvals, T, mu)
    1411             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: f
    1412             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: eigenvals
    1413             :       REAL(KIND=dp), INTENT(IN)                          :: T, mu
    1414             : 
    1415             :       REAL(KIND=dp)                                      :: kTS, ne
    1416             : 
    1417             : ! we want fermi function max at 1, so maxocc = 1 here
    1418             : 
    1419         320 :       CALL Fermi(f, ne, kTS, eigenvals, mu, T, 1.0_dp)
    1420         320 :    END SUBROUTINE fb_fltrmat_fermi_dirac_mu
    1421             : 
    1422             : ! **************************************************************************************************
    1423             : !> \brief get values of Fermi-Dirac distribution based on a given electron
    1424             : !>        number at a given set of energy eigenvales
    1425             : !> \param f : the Fermi-Dirac distribution function values
    1426             : !> \param eigenvals : set of energy eigenvalues
    1427             : !> \param T : temperature
    1428             : !> \param ne : number of electrons
    1429             : !> \param maxocc : maximum occupancy per orbital
    1430             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
    1431             : ! **************************************************************************************************
    1432           0 :    SUBROUTINE fb_fltrmat_fermi_dirac_ne(f, eigenvals, T, ne, maxocc)
    1433             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: f
    1434             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: eigenvals
    1435             :       REAL(KIND=dp), INTENT(IN)                          :: T, ne, maxocc
    1436             : 
    1437             :       REAL(KIND=dp)                                      :: kTS, mu
    1438             : 
    1439             : ! mu is the calculated fermi level
    1440             : ! kTS is the calculated entropic contribution to the energy i.e. -TS
    1441             : ! kTS = kT*[f ln f + (1-f) ln (1-f)]
    1442             : 
    1443           0 :       CALL FermiFixed(f, mu, kTS, eigenvals, ne, T, maxocc)
    1444           0 :    END SUBROUTINE fb_fltrmat_fermi_dirac_ne
    1445             : 
    1446             : END MODULE qs_fb_filter_matrix_methods

Generated by: LCOV version 1.15