LCOV - code coverage report
Current view: top level - src/arnoldi - arnoldi_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 325 329 98.8 %
Date: 2024-11-21 06:45:46 Functions: 21 34 61.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief methods for arnoldi iteration
      10             : !> \par History
      11             : !>       2014.09 created [Florian Schiffmann]
      12             : !> \author Florian Schiffmann
      13             : ! **************************************************************************************************
      14             : 
      15             : #:include 'arnoldi.fypp'
      16             : MODULE arnoldi_methods
      17             :    USE arnoldi_geev, ONLY: arnoldi_general_local_diag, &
      18             :                            arnoldi_symm_local_diag, &
      19             :                            arnoldi_tridiag_local_diag
      20             :    USE arnoldi_types, ONLY: &
      21             :       arnoldi_control_type, arnoldi_data_c_type, arnoldi_data_d_type, arnoldi_data_s_type, &
      22             :       arnoldi_data_type, arnoldi_data_z_type, get_control, get_data_c, get_data_d, get_data_s, &
      23             :       get_data_z, has_d_cmplx, has_d_real, m_x_v_vectors_type
      24             :    USE cp_dbcsr_api, ONLY: &
      25             :       dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
      26             :       dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
      27             :       dbcsr_p_type, dbcsr_scale, dbcsr_type
      28             :    USE dbcsr_vector, ONLY: dbcsr_matrix_colvec_multiply
      29             :    USE kinds, ONLY: real_8
      30             :    USE message_passing, ONLY: mp_comm_type
      31             : #include "../base/base_uses.f90"
      32             : 
      33             :    IMPLICIT NONE
      34             : 
      35             :    PRIVATE
      36             : 
      37             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'
      38             : 
      39             :    PUBLIC :: arnoldi_init, build_subspace, compute_evals, arnoldi_iram, &
      40             :              gev_arnoldi_init, gev_build_subspace, gev_update_data
      41             : 
      42             :    INTERFACE convert_matrix
      43             :       MODULE PROCEDURE convert_matrix_z_to_d
      44             :       MODULE PROCEDURE convert_matrix_d_to_z
      45             :       MODULE PROCEDURE convert_matrix_z_to_z
      46             :    END INTERFACE
      47             : 
      48             : CONTAINS
      49             : 
      50             : ! **************************************************************************************************
      51             : !> \brief Interface for the routine calcualting the implicit restarts
      52             : !>        Currently all based on lapack
      53             : !> \param arnoldi_data ...
      54             : ! **************************************************************************************************
      55         250 :    SUBROUTINE arnoldi_iram(arnoldi_data)
      56             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
      57             : 
      58             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_iram'
      59             : 
      60             :       INTEGER                                            :: handle
      61             : 
      62         250 :       CALL timeset(routineN, handle)
      63             : 
      64         250 :       IF (has_d_real(arnoldi_data)) CALL arnoldi_iram_d(arnoldi_data)
      65         250 :       IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_iram_z(arnoldi_data)
      66             : 
      67         250 :       CALL timestop(handle)
      68             : 
      69         250 :    END SUBROUTINE arnoldi_iram
      70             : 
      71             : ! **************************************************************************************************
      72             : !> \brief Interface to compute the eigenvalues of a nonsymmetric matrix
      73             : !>        This is only the serial version
      74             : !> \param arnoldi_data ...
      75             : ! **************************************************************************************************
      76      129914 :    SUBROUTINE compute_evals(arnoldi_data)
      77             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
      78             : 
      79             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_evals'
      80             : 
      81             :       INTEGER                                            :: handle
      82             : 
      83      129914 :       CALL timeset(routineN, handle)
      84             : 
      85      129914 :       IF (has_d_real(arnoldi_data)) CALL compute_evals_d(arnoldi_data)
      86      129914 :       IF (has_d_cmplx(arnoldi_data)) CALL compute_evals_z(arnoldi_data)
      87             : 
      88      129914 :       CALL timestop(handle)
      89             : 
      90      129914 :    END SUBROUTINE compute_evals
      91             : 
      92             : ! **************************************************************************************************
      93             : !> \brief Interface for the initialization of the arnoldi subspace creation
      94             : !>        currently it can only setup a random vector but can be improved to
      95             : !>        various types of restarts easily
      96             : !> \param matrix pointer to the matrices as described in main interface
      97             : !> \param vectors work vectors for the matrix vector multiplications
      98             : !> \param arnoldi_data all data concerning the subspace
      99             : ! **************************************************************************************************
     100      125059 :    SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_data)
     101             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     102             :       TYPE(m_x_v_vectors_type)                           :: vectors
     103             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
     104             : 
     105             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_init'
     106             : 
     107             :       INTEGER                                            :: handle
     108             : 
     109      125059 :       CALL timeset(routineN, handle)
     110             : 
     111      125059 :       IF (has_d_real(arnoldi_data)) CALL arnoldi_init_d(matrix, vectors, arnoldi_data)
     112      125059 :       IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_init_z(matrix, vectors, arnoldi_data)
     113             : 
     114      125059 :       CALL timestop(handle)
     115             : 
     116      125059 :    END SUBROUTINE arnoldi_init
     117             : 
     118             : ! **************************************************************************************************
     119             : !> \brief Interface for the initialization of the arnoldi subspace creation
     120             : !>        for the generalized eigenvalue problem
     121             : !> \param matrix pointer to the matrices as described in main interface
     122             : !> \param matrix_arnoldi ...
     123             : !> \param vectors work vectors for the matrix vector multiplications
     124             : !> \param arnoldi_data all data concerning the subspace
     125             : ! **************************************************************************************************
     126        3728 :    SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)
     127             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi
     128             :       TYPE(m_x_v_vectors_type)                           :: vectors
     129             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
     130             : 
     131             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_arnoldi_init'
     132             : 
     133             :       INTEGER                                            :: handle
     134             : 
     135        3728 :       CALL timeset(routineN, handle)
     136             : 
     137        3728 :       IF (has_d_real(arnoldi_data)) CALL gev_arnoldi_init_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
     138        3728 :       IF (has_d_cmplx(arnoldi_data)) CALL gev_arnoldi_init_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
     139             : 
     140        3728 :       CALL timestop(handle)
     141             : 
     142        3728 :    END SUBROUTINE gev_arnoldi_init
     143             : 
     144             : ! **************************************************************************************************
     145             : !> \brief here the iterations are performed and the krylov space is constructed
     146             : !> \param matrix see above
     147             : !> \param vectors see above
     148             : !> \param arnoldi_data see above
     149             : ! **************************************************************************************************
     150      125309 :    SUBROUTINE build_subspace(matrix, vectors, arnoldi_data)
     151             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     152             :       TYPE(m_x_v_vectors_type)                           :: vectors
     153             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
     154             : 
     155             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'build_subspace'
     156             : 
     157             :       INTEGER                                            :: handle
     158             : 
     159      125309 :       CALL timeset(routineN, handle)
     160             : 
     161      125309 :       IF (has_d_real(arnoldi_data)) CALL build_subspace_d(matrix, vectors, arnoldi_data)
     162      125309 :       IF (has_d_cmplx(arnoldi_data)) CALL build_subspace_z(matrix, vectors, arnoldi_data)
     163             : 
     164      125309 :       CALL timestop(handle)
     165             : 
     166      125309 :    END SUBROUTINE build_subspace
     167             : 
     168             : ! **************************************************************************************************
     169             : !> \brief here the iterations are performed and the krylov space for the generalized
     170             : !>        eigenvalue problem is created
     171             : !> \param matrix see above
     172             : !> \param vectors see above
     173             : !> \param arnoldi_data see above
     174             : ! **************************************************************************************************
     175        4605 :    SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_data)
     176             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     177             :       TYPE(m_x_v_vectors_type)                           :: vectors
     178             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
     179             : 
     180             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_build_subspace'
     181             : 
     182             :       INTEGER                                            :: handle
     183             : 
     184        4605 :       CALL timeset(routineN, handle)
     185             : 
     186        4605 :       IF (has_d_real(arnoldi_data)) CALL gev_build_subspace_d(matrix, vectors, arnoldi_data)
     187        4605 :       IF (has_d_cmplx(arnoldi_data)) CALL gev_build_subspace_z(matrix, vectors, arnoldi_data)
     188             : 
     189        4605 :       CALL timestop(handle)
     190             : 
     191        4605 :    END SUBROUTINE gev_build_subspace
     192             : 
     193             : ! **************************************************************************************************
     194             : !> \brief in the generalized eigenvalue the matrix depende on the projection
     195             : !>        therefore the outer loop has to build a new set of matrices for the
     196             : !>        inner loop
     197             : !> \param matrix see above
     198             : !> \param matrix_arnoldi ...
     199             : !> \param vectors ...
     200             : !> \param arnoldi_data see above
     201             : ! **************************************************************************************************
     202        4605 :    SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)
     203             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi
     204             :       TYPE(m_x_v_vectors_type)                           :: vectors
     205             :       TYPE(arnoldi_data_type)                            :: arnoldi_data
     206             : 
     207             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_update_data'
     208             : 
     209             :       INTEGER                                            :: handle
     210             : 
     211        4605 :       CALL timeset(routineN, handle)
     212             : 
     213        4605 :       IF (has_d_real(arnoldi_data)) CALL gev_update_data_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
     214        4605 :       IF (has_d_cmplx(arnoldi_data)) CALL gev_update_data_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
     215             : 
     216        4605 :       CALL timestop(handle)
     217             : 
     218        4605 :    END SUBROUTINE gev_update_data
     219             : 
     220             : ! **************************************************************************************************
     221             : !> \brief ...
     222             : !> \param m_out ...
     223             : !> \param m_in ...
     224             : ! **************************************************************************************************
     225         500 :    SUBROUTINE convert_matrix_z_to_d(m_out, m_in)
     226             :       REAL(real_8), DIMENSION(:, :)                      :: m_out
     227             :       COMPLEX(real_8), DIMENSION(:, :)                   :: m_in
     228             : 
     229      451828 :       m_out(:, :) = REAL(m_in(:, :), KIND=real_8)
     230         500 :    END SUBROUTINE convert_matrix_z_to_d
     231             : 
     232             : ! **************************************************************************************************
     233             : !> \brief ...
     234             : !> \param m_out ...
     235             : !> \param m_in ...
     236             : ! **************************************************************************************************
     237         250 :    SUBROUTINE convert_matrix_d_to_z(m_out, m_in)
     238             :       COMPLEX(real_8), DIMENSION(:, :)                   :: m_out
     239             :       REAL(real_8), DIMENSION(:, :)                      :: m_in
     240             : 
     241      225914 :       m_out(:, :) = CMPLX(m_in, 0.0, KIND=real_8)
     242         250 :    END SUBROUTINE convert_matrix_d_to_z
     243             : 
     244             : ! I kno that one is stupid but like this it simplifies the template
     245             : ! **************************************************************************************************
     246             : !> \brief ...
     247             : !> \param m_out ...
     248             : !> \param m_in ...
     249             : ! **************************************************************************************************
     250           0 :    SUBROUTINE convert_matrix_z_to_z(m_out, m_in)
     251             :       COMPLEX(real_8), DIMENSION(:, :)                   :: m_out, m_in
     252             : 
     253           0 :       m_out(:, :) = m_in
     254           0 :    END SUBROUTINE convert_matrix_z_to_z
     255             : 
     256             :    #:for nametype1, type_prec, type_nametype1, nametype_zero, nametype_one, nametype_negone, czero, cone, ctype, rnorm_to_norm, val_to_type in inst_params_2
     257             : ! **************************************************************************************************
     258             : !> \brief Call the correct eigensolver, in the arnoldi method only the right
     259             : !>        eigenvectors are used. Lefts are created here but dumped immediately
     260             : !> \param arnoldi_data ...
     261             : ! **************************************************************************************************
     262      129914 :       SUBROUTINE compute_evals_${nametype1}$ (arnoldi_data)
     263             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     264             : 
     265             :          COMPLEX(${type_prec}$), DIMENSION(:, :), ALLOCATABLE   :: levec
     266             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     267             :          INTEGER                                  :: ndim
     268             :          TYPE(arnoldi_control_type), POINTER           :: control
     269             : 
     270      129914 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     271      129914 :          control => get_control(arnoldi_data)
     272      129914 :          ndim = control%current_step
     273      519656 :          ALLOCATE (levec(ndim, ndim))
     274             : 
     275             : ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
     276             : ! only perform the diagonalization on processors which hold data
     277      129914 :          IF (control%generalized_ev) THEN
     278             :             CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     279        4605 :                                          ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
     280             :          ELSE
     281      125309 :             IF (control%symmetric) THEN
     282             :                CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     283        8650 :                                                ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
     284             :             ELSE
     285             :                CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     286      116659 :                                                ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
     287             :             END IF
     288             :          END IF
     289             : 
     290      129914 :          DEALLOCATE (levec)
     291             : 
     292      129914 :       END SUBROUTINE compute_evals_${nametype1}$
     293             : 
     294             : ! **************************************************************************************************
     295             : !> \brief Initialization of the arnoldi vector. Here a random vector is used,
     296             : !>        might be generalized in the future
     297             : !> \param matrix ...
     298             : !> \param vectors ...
     299             : !> \param arnoldi_data ...
     300             : ! **************************************************************************************************
     301      125059 :       SUBROUTINE arnoldi_init_${nametype1}$ (matrix, vectors, arnoldi_data)
     302             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix
     303             :          TYPE(m_x_v_vectors_type)                :: vectors
     304             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     305             : 
     306             :          INTEGER                                  :: i, iseed(4), row_size, col_size, &
     307             :                                                      nrow_local, ncol_local, &
     308             :                                                      row, col
     309             :          REAL(${type_prec}$)                        :: rnorm
     310             :          TYPE(dbcsr_iterator_type)                     :: iter
     311             :          ${type_nametype1}$                         :: norm
     312             :          ${type_nametype1}$, DIMENSION(:), ALLOCATABLE :: &
     313             :             v_vec, w_vec
     314      125059 :          ${type_nametype1}$, DIMENSION(:), POINTER          :: data_vec
     315             :          LOGICAL                                  :: local_comp
     316             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     317             :          TYPE(arnoldi_control_type), POINTER           :: control
     318             :          TYPE(mp_comm_type)                       :: pcol_group
     319             : 
     320      250118 :          control => get_control(arnoldi_data)
     321      125059 :          pcol_group = control%pcol_group
     322      125059 :          local_comp = control%local_comp
     323             : 
     324      125059 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     325             : 
     326             :          ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
     327      125059 :          CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     328      331516 :          ALLOCATE (v_vec(nrow_local))
     329      206457 :          ALLOCATE (w_vec(nrow_local))
     330     1542391 :          v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
     331   135044179 :          ar_data%Hessenberg = ${nametype_zero}$
     332             : 
     333      125059 :          IF (control%has_initial_vector) THEN
     334             :             ! after calling the set routine the initial vector is stored in f_vec
     335        2735 :             CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     336             :          ELSE
     337             :             ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
     338      122324 :             CALL dbcsr_iterator_start(iter, vectors%input_vec)
     339      226894 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
     340      104570 :                CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
     341      104570 :                iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
     342      226894 :                CALL ${nametype1}$larnv(2, iseed, row_size*col_size, data_vec)
     343             :             END DO
     344      122324 :             CALL dbcsr_iterator_stop(iter)
     345             :          END IF
     346             : 
     347      125059 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
     348             : 
     349             :          ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
     350      125059 :          CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
     351             : 
     352      125059 :          IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     353      125059 :          CALL dbcsr_scale(vectors%input_vec, REAL(1.0, ${type_prec}$)/rnorm)
     354             : 
     355             :          ! Everything prepared, initialize the Arnoldi iteration
     356      125059 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
     357             : 
     358             :          ! This permits to compute the subspace of a matrix which is a product of multiple matrices
     359      250118 :          DO i = 1, SIZE(matrix)
     360             :             CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     361      125059 :                                               ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     362      250118 :             CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
     363             :          END DO
     364             : 
     365      125059 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
     366             : 
     367             :          ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
     368      833725 :          ar_data%Hessenberg(1, 1) = DOT_PRODUCT(v_vec, w_vec)
     369      125059 :          CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
     370      833725 :          ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
     371             : 
     372      833725 :          ar_data%local_history(:, 1) = v_vec(:)
     373             : 
     374             :          ! We did the first step in here so we should set the current step for the subspace generation accordingly
     375      125059 :          control%current_step = 1
     376             : 
     377      125059 :          DEALLOCATE (v_vec, w_vec)
     378             : 
     379      250118 :       END SUBROUTINE arnoldi_init_${nametype1}$
     380             : 
     381             : ! **************************************************************************************************
     382             : !> \brief Alogorithm for the implicit restarts in the arnoldi method
     383             : !>        this is an early implementation which scales subspace size^4
     384             : !>        by replacing the lapack calls with direct math the
     385             : !>        QR and  gemms can be made linear and a N^2 sacling will be acchieved
     386             : !>        however this already sets the framework but should be used with care
     387             : !> \param arnoldi_data ...
     388             : ! **************************************************************************************************
     389         250 :       SUBROUTINE arnoldi_iram_${nametype1}$ (arnoldi_data)
     390             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     391             : 
     392             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     393             :          TYPE(arnoldi_control_type), POINTER                     :: control
     394         250 :          COMPLEX(${type_prec}$), DIMENSION(:, :), ALLOCATABLE  :: tmp_mat, safe_mat, Q, tmp_mat1
     395         250 :          COMPLEX(${type_prec}$), DIMENSION(:), ALLOCATABLE    :: work, tau, work_measure
     396             :          INTEGER                                   :: msize, lwork, i, j, info, nwant
     397             :          ${type_nametype1}$                          :: beta, sigma
     398             :          ${type_nametype1}$, DIMENSION(:, :), ALLOCATABLE :: Qdata
     399             : 
     400             :          ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
     401         250 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     402         250 :          control => get_control(arnoldi_data)
     403         250 :          msize = control%current_step
     404         250 :          nwant = control%nval_out
     405        1500 :          ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
     406        1250 :          ALLOCATE (Q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
     407         250 :          ALLOCATE (work_measure(1))
     408        1500 :          ALLOCATE (tau(msize)); ALLOCATE (Qdata(msize, msize))
     409             :          !make Q identity
     410      225914 :          Q = ${czero}$
     411        6778 :          DO i = 1, msize
     412        6778 :             Q(i, i) = ${cone}$
     413             :          END DO
     414             : 
     415             :          ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
     416         250 :          CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))
     417      225914 :          safe_mat(:, :) = tmp_mat(:, :)
     418             : 
     419        6778 :          DO i = 1, msize
     420             :             ! A bit a strange check but in the end we only want to shift the unwanted evals
     421      212858 :             IF (ANY(control%selected_ind == i)) CYCLE
     422             :             ! Here we shift the matrix by subtracting unwanted evals from the diagonal
     423      212108 :             DO j = 1, msize
     424      212108 :                tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
     425             :             END DO
     426             :             ! Now we repair the damage by QR factorizing
     427        6028 :             lwork = -1
     428        6028 :             CALL ${ctype}$geqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
     429        6028 :             lwork = INT(work_measure(1))
     430        6028 :             IF (ALLOCATED(work)) THEN
     431        5778 :                IF (SIZE(work) .LT. lwork) THEN
     432           0 :                   DEALLOCATE (work)
     433             :                END IF
     434             :             END IF
     435        6528 :             IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
     436        6028 :             CALL ${ctype}$geqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
     437             :             ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
     438        6028 :             CALL ${ctype}$ungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
     439             :             ! update Q=Q*Q_current
     440     9112716 :             tmp_mat1(:, :) = Q(:, :)
     441             :             CALL ${ctype}$gemm('N', 'N', msize, msize, msize, ${cone}$, tmp_mat1, msize, tmp_mat, msize, ${czero}$, &
     442        6028 :                                Q, msize)
     443             :             ! Update H=(Q*)HQ
     444             :             CALL ${ctype}$gemm('C', 'N', msize, msize, msize, ${cone}$, tmp_mat, msize, safe_mat, msize, ${czero}$, &
     445        6028 :                                tmp_mat1, msize)
     446             :             CALL ${ctype}$gemm('N', 'N', msize, msize, msize, ${cone}$, tmp_mat1, msize, tmp_mat, msize, ${czero}$, &
     447        6028 :                                safe_mat, msize)
     448             : 
     449             :             ! this one is crucial for numerics not to accumulate noise in the subdiagonals
     450      212108 :             DO j = 1, msize
     451     4359320 :                safe_mat(j + 2:msize, j) = ${czero}$
     452             :             END DO
     453     9113466 :             tmp_mat(:, :) = safe_mat(:, :)
     454             :          END DO
     455             : 
     456             :          ! Now we can compute our restart quantities
     457      232442 :          ar_data%Hessenberg = ${nametype_zero}$
     458         250 :          CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)
     459         250 :          CALL convert_matrix(Qdata, Q)
     460             : 
     461         250 :          beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = Qdata(msize, nwant)
     462             : 
     463             :          !update the residuum and the basis vectors
     464         250 :          IF (control%local_comp) THEN
     465      443564 :             ar_data%f_vec = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
     466     1267924 :             ar_data%local_history(:, 1:nwant) = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, 1:nwant))
     467             :          END IF
     468             :          ! Set the current step to nwant so the subspace build knows where to start
     469         250 :          control%current_step = nwant
     470             : 
     471         250 :          DEALLOCATE (tmp_mat, safe_mat, Q, Qdata, tmp_mat1, work, tau, work_measure)
     472             : 
     473         250 :       END SUBROUTINE arnoldi_iram_${nametype1}$
     474             : 
     475             : ! **************************************************************************************************
     476             : !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
     477             : !>        convergence check is only performed on subspace convergence
     478             : !>        Gram Schidt is used to orthonogonalize.
     479             : !>        If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
     480             : !>        correction is performed
     481             : !> \param matrix ...
     482             : !> \param vectors ...
     483             : !> \param arnoldi_data ...
     484             : ! **************************************************************************************************
     485      125309 :       SUBROUTINE build_subspace_${nametype1}$ (matrix, vectors, arnoldi_data)
     486             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix
     487             :          TYPE(m_x_v_vectors_type), TARGET        :: vectors
     488             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     489             : 
     490             :          INTEGER                                  :: i, j, ncol_local, nrow_local
     491             :          REAL(${type_prec}$)                        :: rnorm
     492             :          TYPE(arnoldi_control_type), POINTER           :: control
     493             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER  :: ar_data
     494             :          ${type_nametype1}$                         :: norm
     495             :          ${type_nametype1}$, ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec
     496             :          TYPE(dbcsr_type), POINTER                 :: input_vec, result_vec, swap_vec
     497             : 
     498      125309 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     499      125309 :          control => get_control(arnoldi_data)
     500      125309 :          control%converged = .FALSE.
     501             : 
     502             :          ! create the vectors required during the iterations
     503      125309 :          CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     504      495562 :          ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
     505     1699084 :          v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
     506      626545 :          ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
     507             : 
     508      593008 :          DO j = control%current_step, control%max_iter - 1
     509             : 
     510             :             ! compute the vector norm of the residuum, get it real valued as well (rnorm)
     511      589990 :             CALL compute_norms_${nametype1}$ (ar_data%f_vec, norm, rnorm, control%pcol_group)
     512             : 
     513             :             ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
     514      589990 :             IF (control%myproc == 0) control%converged = rnorm .LT. REAL(control%threshold, ${type_prec}$)
     515      589990 :             CALL control%mp_group%bcast(control%converged, 0)
     516      589990 :             IF (control%converged) EXIT
     517             : 
     518             :             ! transfer normalized residdum to history and its norm to the Hessenberg matrix
     519      467699 :             IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     520    12750653 :             v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
     521             : 
     522      467699 :             input_vec => vectors%input_vec
     523      467699 :             result_vec => vectors%result_vec
     524      467699 :             CALL transfer_local_array_to_dbcsr_${nametype1}$ (input_vec, v_vec, nrow_local, control%local_comp)
     525             : 
     526             :             ! This permits to compute the subspace of a matrix which is a product of two matrices
     527      935398 :             DO i = 1, SIZE(matrix)
     528             :                CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, ${nametype_one}$, &
     529      467699 :                                                  ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     530      467699 :                swap_vec => input_vec
     531      467699 :                input_vec => result_vec
     532      935398 :                result_vec => swap_vec
     533             :             END DO
     534             : 
     535      467699 :             CALL transfer_dbcsr_to_local_array_${nametype1}$ (input_vec, w_vec, nrow_local, control%local_comp)
     536             : 
     537             :             ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
     538             :             CALL Gram_Schmidt_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
     539      467699 :                                                ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
     540             : 
     541             :             ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
     542             :             ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
     543             :             CALL DGKS_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
     544      467699 :                                            ar_data%local_history, control%local_comp, control%pcol_group)
     545             :             ! Finally we can put the projections into our Hessenberg matrix
     546     4440024 :             ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
     547      593008 :             control%current_step = j + 1
     548             :          END DO
     549             : 
     550             :          ! compute the vector norm of the final residuum and put it in to Hessenberg
     551      125309 :          CALL compute_norms_${nametype1}$ (ar_data%f_vec, norm, rnorm, control%pcol_group)
     552      125309 :          ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
     553             : 
     554             :          ! broadcast the Hessenberg matrix so we don't need to care later on
     555   270427933 :          CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
     556             : 
     557      125309 :          DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
     558             : 
     559      125309 :       END SUBROUTINE build_subspace_${nametype1}$
     560             : 
     561             : ! **************************************************************************************************
     562             : !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
     563             : !> \param vec ...
     564             : !> \param array ...
     565             : !> \param n ...
     566             : !> \param is_local ...
     567             : ! **************************************************************************************************
     568     1016516 :       SUBROUTINE transfer_dbcsr_to_local_array_${nametype1}$ (vec, array, n, is_local)
     569             :          TYPE(dbcsr_type)                          :: vec
     570             :          ${type_nametype1}$, DIMENSION(:)           :: array
     571             :          INTEGER                                  :: n
     572             :          LOGICAL                                  :: is_local
     573     1016516 :          ${type_nametype1}$, DIMENSION(:), POINTER          :: data_vec
     574             : 
     575     2033032 :          data_vec => dbcsr_get_data_p(vec, select_data_type=${nametype_zero}$)
     576    13532709 :          IF (is_local) array(1:n) = data_vec(1:n)
     577             : 
     578     1016516 :       END SUBROUTINE transfer_dbcsr_to_local_array_${nametype1}$
     579             : 
     580             : ! **************************************************************************************************
     581             : !> \brief The inverse routine transferring data back from an array to a dbcsr
     582             : !> \param vec ...
     583             : !> \param array ...
     584             : !> \param n ...
     585             : !> \param is_local ...
     586             : ! **************************************************************************************************
     587      634941 :       SUBROUTINE transfer_local_array_to_dbcsr_${nametype1}$ (vec, array, n, is_local)
     588             :          TYPE(dbcsr_type)                          :: vec
     589             :          ${type_nametype1}$, DIMENSION(:)           :: array
     590             :          INTEGER                                  :: n
     591             :          LOGICAL                                  :: is_local
     592      634941 :          ${type_nametype1}$, DIMENSION(:), POINTER          :: data_vec
     593             : 
     594     1269882 :          data_vec => dbcsr_get_data_p(vec, select_data_type=${nametype_zero}$)
     595    10911307 :          IF (is_local) data_vec(1:n) = array(1:n)
     596             : 
     597      634941 :       END SUBROUTINE transfer_local_array_to_dbcsr_${nametype1}$
     598             : 
     599             : ! **************************************************************************************************
     600             : !> \brief Gram-Schmidt in matrix vector form
     601             : !> \param h_vec ...
     602             : !> \param f_vec ...
     603             : !> \param s_vec ...
     604             : !> \param w_vec ...
     605             : !> \param nrow_local ...
     606             : !> \param j ...
     607             : !> \param local_history ...
     608             : !> \param reorth_mat ...
     609             : !> \param local_comp ...
     610             : !> \param pcol_group ...
     611             : ! **************************************************************************************************
     612      544322 :       SUBROUTINE Gram_Schmidt_ortho_${nametype1}$ (h_vec, f_vec, s_vec, w_vec, nrow_local, &
     613      544322 :                                                    j, local_history, reorth_mat, local_comp, pcol_group)
     614             :          ${type_nametype1}$, DIMENSION(:)      :: h_vec, s_vec, f_vec, w_vec
     615             :          ${type_nametype1}$, DIMENSION(:, :)    :: local_history, reorth_mat
     616             :          INTEGER                                          :: nrow_local, j
     617             :          TYPE(mp_comm_type), INTENT(IN)                   :: pcol_group
     618             :          LOGICAL                                          :: local_comp
     619             : 
     620             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'Gram_Schmidt_ortho_${nametype1}$'
     621             :          INTEGER                                  :: handle
     622             : 
     623      544322 :          CALL timeset(routineN, handle)
     624             : 
     625             :          ! Let's do the orthonormalization, first try the Gram Schmidt scheme
     626    42880751 :          h_vec = ${nametype_zero}$; f_vec = ${nametype_zero}$; s_vec = ${nametype_zero}$
     627      544322 :          IF (local_comp) CALL ${nametype1}$gemv('T', nrow_local, j, ${nametype_one}$, local_history, &
     628      460645 :                                                 nrow_local, w_vec, 1, ${nametype_zero}$, h_vec, 1)
     629     9913372 :          CALL pcol_group%sum(h_vec(1:j))
     630             : 
     631      544322 :          IF (local_comp) CALL ${nametype1}$gemv('N', nrow_local, j, ${nametype_one}$, reorth_mat, &
     632      460645 :                                                 nrow_local, h_vec, 1, ${nametype_zero}$, f_vec, 1)
     633     8609339 :          f_vec(:) = w_vec(:) - f_vec(:)
     634             : 
     635      544322 :          CALL timestop(handle)
     636             : 
     637      544322 :       END SUBROUTINE Gram_Schmidt_ortho_${nametype1}$
     638             : 
     639             : ! **************************************************************************************************
     640             : !> \brief Compute the  Daniel, Gragg, Kaufman and Steward correction
     641             : !> \param h_vec ...
     642             : !> \param f_vec ...
     643             : !> \param s_vec ...
     644             : !> \param nrow_local ...
     645             : !> \param j ...
     646             : !> \param local_history ...
     647             : !> \param reorth_mat ...
     648             : !> \param local_comp ...
     649             : !> \param pcol_group ...
     650             : ! **************************************************************************************************
     651      544322 :       SUBROUTINE DGKS_ortho_${nametype1}$ (h_vec, f_vec, s_vec, nrow_local, j, &
     652     1088644 :                                            local_history, reorth_mat, local_comp, pcol_group)
     653             :          ${type_nametype1}$, DIMENSION(:)      :: h_vec, s_vec, f_vec
     654             :          ${type_nametype1}$, DIMENSION(:, :)    :: local_history, reorth_mat
     655             :          INTEGER                                          :: nrow_local, j
     656             :          TYPE(mp_comm_type), INTENT(IN)           :: pcol_group
     657             : 
     658             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'DGKS_ortho_${nametype1}$'
     659             :          LOGICAL                                          :: local_comp
     660             :          INTEGER                                  :: handle
     661             : 
     662      544322 :          CALL timeset(routineN, handle)
     663             : 
     664      544322 :          IF (local_comp) CALL ${nametype1}$gemv('T', nrow_local, j, ${nametype_one}$, local_history, &
     665      460645 :                                                 nrow_local, f_vec, 1, ${nametype_zero}$, s_vec, 1)
     666     9913372 :          CALL pcol_group%sum(s_vec(1:j))
     667      544322 :          IF (local_comp) CALL ${nametype1}$gemv('N', nrow_local, j, ${nametype_negone}$, reorth_mat, &
     668      460645 :                                                 nrow_local, s_vec, 1, ${nametype_one}$, f_vec, 1)
     669     5228847 :          h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
     670             : 
     671      544322 :          CALL timestop(handle)
     672             : 
     673      544322 :       END SUBROUTINE DGKS_ortho_${nametype1}$
     674             : 
     675             : ! **************************************************************************************************
     676             : !> \brief Compute the norm of a vector distributed along proc_col
     677             : !>        as local arrays. Always return the real part next to the complex rep.
     678             : !> \param vec ...
     679             : !> \param norm ...
     680             : !> \param rnorm ...
     681             : !> \param pcol_group ...
     682             : ! **************************************************************************************************
     683      853296 :       SUBROUTINE compute_norms_${nametype1}$ (vec, norm, rnorm, pcol_group)
     684             :          ${type_nametype1}$, DIMENSION(:)           :: vec
     685             :          REAL(${type_prec}$)                        :: rnorm
     686             :          ${type_nametype1}$                         :: norm
     687             :          TYPE(mp_comm_type), INTENT(IN)             :: pcol_group
     688             : 
     689             :          ! the norm is computed along the processor column
     690     9307725 :          norm = DOT_PRODUCT(vec, vec)
     691      853296 :          CALL pcol_group%sum(norm)
     692      853296 :          rnorm = SQRT(REAL(norm, ${type_prec}$))
     693      853296 :          ${rnorm_to_norm}$
     694             : 
     695      853296 :       END SUBROUTINE compute_norms_${nametype1}$
     696             : 
     697             : ! **************************************************************************************************
     698             : !> \brief Computes the initial guess for the solution of the generalized eigenvalue
     699             : !>        using the arnoldi method
     700             : !> \param matrix ...
     701             : !> \param matrix_arnoldi ...
     702             : !> \param vectors ...
     703             : !> \param arnoldi_data ...
     704             : ! **************************************************************************************************
     705             : 
     706        3728 :       SUBROUTINE gev_arnoldi_init_${nametype1}$ (matrix, matrix_arnoldi, vectors, arnoldi_data)
     707             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix
     708             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi
     709             :          TYPE(m_x_v_vectors_type)                :: vectors
     710             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     711             : 
     712             :          INTEGER                                  :: iseed(4), row_size, col_size, &
     713             :                                                      nrow_local, ncol_local, &
     714             :                                                      row, col
     715             :          REAL(${type_prec}$)                        :: rnorm
     716             :          TYPE(dbcsr_iterator_type)                     :: iter
     717             :          ${type_nametype1}$                         :: norm, denom
     718             :          ${type_nametype1}$, DIMENSION(:), ALLOCATABLE :: &
     719             :             v_vec, w_vec
     720        3728 :          ${type_nametype1}$, DIMENSION(:), POINTER          :: data_vec
     721             :          LOGICAL                                  :: local_comp
     722             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     723             :          TYPE(arnoldi_control_type), POINTER           :: control
     724             :          TYPE(mp_comm_type)                         :: pcol_group
     725             : 
     726        7456 :          control => get_control(arnoldi_data)
     727        3728 :          pcol_group = control%pcol_group
     728        3728 :          local_comp = control%local_comp
     729             : 
     730        3728 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     731             : 
     732             :          ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
     733        3728 :          CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     734       11156 :          ALLOCATE (v_vec(nrow_local))
     735        7428 :          ALLOCATE (w_vec(nrow_local))
     736      132348 :          v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
     737     1644048 :          ar_data%Hessenberg = ${nametype_zero}$
     738             : 
     739        3728 :          IF (control%has_initial_vector) THEN
     740             :             ! after calling the set routine the initial vector is stored in f_vec
     741        2051 :             CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     742             :          ELSE
     743             :             ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
     744        1677 :             CALL dbcsr_iterator_start(iter, vectors%input_vec)
     745        5095 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
     746        3418 :                CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
     747        3418 :                iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
     748        5095 :                CALL ${nametype1}$larnv(2, iseed, row_size*col_size, data_vec)
     749             :             END DO
     750        1677 :             CALL dbcsr_iterator_stop(iter)
     751             :          END IF
     752             : 
     753        3728 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
     754             : 
     755             :          ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
     756        3728 :          CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
     757             : 
     758        3728 :          IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     759        3728 :          CALL dbcsr_scale(vectors%input_vec, REAL(1.0, ${type_prec}$)/rnorm)
     760             : 
     761             :          CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     762        3728 :                                            ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     763             : 
     764        3728 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
     765             : 
     766             :          ar_data%rho_scale = ${nametype_zero}$
     767       68038 :          ar_data%rho_scale = DOT_PRODUCT(v_vec, w_vec)
     768        3728 :          CALL pcol_group%sum(ar_data%rho_scale)
     769             : 
     770             :          CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     771        3728 :                                            ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     772             : 
     773        3728 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
     774             : 
     775             :          denom = ${nametype_zero}$
     776       68038 :          denom = DOT_PRODUCT(v_vec, w_vec)
     777        3728 :          CALL pcol_group%sum(denom)
     778        3728 :          IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
     779        3728 :          CALL control%mp_group%bcast(ar_data%rho_scale, 0)
     780             : 
     781             :          ! if the maximum ev is requested we need to optimize with -A-rho*B
     782        3728 :          CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
     783        3728 :          CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, ${nametype_one}$, -ar_data%rho_scale)
     784             : 
     785       68038 :          ar_data%x_vec = v_vec
     786             : 
     787       11184 :       END SUBROUTINE gev_arnoldi_init_${nametype1}$
     788             : 
     789             : ! **************************************************************************************************
     790             : !> \brief builds the basis rothogonal wrt. the metric.
     791             : !>        The structure looks similar to normal arnoldi but norms, vectors and
     792             : !>        matrix_vector products are very differently defined. Therefore it is
     793             : !>        cleaner to put it in a separate subroutine to avoid confusion
     794             : !> \param matrix ...
     795             : !> \param vectors ...
     796             : !> \param arnoldi_data ...
     797             : ! **************************************************************************************************
     798        4605 :       SUBROUTINE gev_build_subspace_${nametype1}$ (matrix, vectors, arnoldi_data)
     799             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix
     800             :          TYPE(m_x_v_vectors_type)                :: vectors
     801             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     802             : 
     803             :          INTEGER                                  :: j, ncol_local, nrow_local
     804             :          TYPE(arnoldi_control_type), POINTER           :: control
     805             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     806             :          ${type_nametype1}$                         :: norm
     807             :          ${type_nametype1}$, ALLOCATABLE, DIMENSION(:)      :: h_vec, s_vec, v_vec, w_vec
     808        4605 :          ${type_nametype1}$, ALLOCATABLE, DIMENSION(:, :)    :: Zmat, CZmat, BZmat
     809             :          TYPE(mp_comm_type)                           :: pcol_group
     810             : 
     811        9210 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     812        4605 :          control => get_control(arnoldi_data)
     813        4605 :          control%converged = .FALSE.
     814        4605 :          pcol_group = control%pcol_group
     815             : 
     816             :          ! create the vectors required during the iterations
     817        4605 :          CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     818       18362 :          ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
     819      213313 :          v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
     820       18420 :          ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
     821       27572 :          ALLOCATE (Zmat(nrow_local, control%max_iter)); ALLOCATE (CZmat(nrow_local, control%max_iter))
     822       13786 :          ALLOCATE (BZmat(nrow_local, control%max_iter))
     823             : 
     824        4605 :          CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
     825             :          CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     826        4605 :                                            ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     827        4605 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, BZmat(:, 1), nrow_local, control%local_comp)
     828             : 
     829             :          norm = ${nametype_zero}$
     830      108959 :          norm = DOT_PRODUCT(ar_data%x_vec, BZmat(:, 1))
     831        4605 :          CALL pcol_group%sum(norm)
     832        4605 :          IF (control%local_comp) THEN
     833      213284 :             Zmat(:, 1) = ar_data%x_vec/SQRT(norm); BZmat(:, 1) = BZmat(:, 1)/SQRT(norm)
     834             :          END IF
     835             : 
     836       76623 :          DO j = 1, control%max_iter
     837       76623 :             control%current_step = j
     838       76623 :             CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, Zmat(:, j), nrow_local, control%local_comp)
     839             :             CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     840       76623 :                                               ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     841       76623 :             CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, CZmat(:, j), nrow_local, control%local_comp)
     842     2000163 :             w_vec(:) = CZmat(:, j)
     843             : 
     844             :             ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
     845             :             CALL Gram_Schmidt_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
     846       76623 :                                                    BZmat, Zmat, control%local_comp, control%pcol_group)
     847             : 
     848             :             ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
     849             :             ! can becom a problem later on, there is probably a good check, but we don't perform it
     850             :             CALL DGKS_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, nrow_local, j, BZmat, &
     851       76623 :                                            Zmat, control%local_comp, control%pcol_group)
     852             : 
     853       76623 :             CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     854             :             CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     855       76623 :                                               ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     856       76623 :             CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, v_vec, nrow_local, control%local_comp)
     857             :             norm = ${nametype_zero}$
     858     2000163 :             norm = DOT_PRODUCT(ar_data%f_vec, v_vec)
     859       76623 :             CALL pcol_group%sum(norm)
     860             : 
     861       76623 :             IF (control%myproc == 0) control%converged = REAL(norm, ${type_prec}$) .LT. EPSILON(REAL(1.0, ${type_prec}$))
     862       76623 :             CALL control%mp_group%bcast(control%converged, 0)
     863       76623 :             IF (control%converged) EXIT
     864       74555 :             IF (j == control%max_iter - 1) EXIT
     865             : 
     866       76623 :             IF (control%local_comp) THEN
     867     3710054 :                Zmat(:, j + 1) = ar_data%f_vec/SQRT(norm); BZmat(:, j + 1) = v_vec(:)/SQRT(norm)
     868             :             END IF
     869             :          END DO
     870             : 
     871             : ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
     872             : ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
     873             : ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
     874     2030805 :          ar_data%Hessenberg = ${nametype_zero}$
     875        4605 :          IF (control%local_comp) THEN
     876             :             ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
     877    24431267 :                MATMUL(TRANSPOSE(CZmat(:, 1:control%current_step)), Zmat(:, 1:control%current_step))
     878             :          END IF
     879     4057005 :          CALL control%mp_group%sum(ar_data%Hessenberg)
     880             : 
     881     2183785 :          ar_data%local_history = Zmat
     882             :          ! broadcast the Hessenberg matrix so we don't need to care later on
     883             : 
     884        4605 :          DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (CZmat); 
     885        4605 :          DEALLOCATE (Zmat); DEALLOCATE (BZmat)
     886             : 
     887        9210 :       END SUBROUTINE gev_build_subspace_${nametype1}$
     888             : 
     889             : ! **************************************************************************************************
     890             : !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
     891             : !>        Updates rho and C=A-rho*B accordingly.
     892             : !>        As an update scheme is used for he ev, the output ev has to be replaced
     893             : !>        with the updated one.
     894             : !>        Furthermore a convergence check is performed. The mv product could
     895             : !>        be skiiped by making clever use of the precomputed data, However,
     896             : !>        it is most likely not worth the effort.
     897             : !> \param matrix ...
     898             : !> \param matrix_arnoldi ...
     899             : !> \param vectors ...
     900             : !> \param arnoldi_data ...
     901             : ! **************************************************************************************************
     902        4605 :       SUBROUTINE gev_update_data_${nametype1}$ (matrix, matrix_arnoldi, vectors, arnoldi_data)
     903             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix
     904             :          TYPE(dbcsr_p_type), DIMENSION(:)     :: matrix_arnoldi
     905             :          TYPE(m_x_v_vectors_type)                :: vectors
     906             :          TYPE(arnoldi_data_type)                 :: arnoldi_data
     907             : 
     908             :          TYPE(arnoldi_control_type), POINTER           :: control
     909             :          TYPE(arnoldi_data_${nametype1}$_type), POINTER            :: ar_data
     910             :          INTEGER                                  :: ncol_local, nrow_local, ind, i
     911        4605 :          ${type_nametype1}$, ALLOCATABLE, DIMENSION(:)      :: v_vec
     912             :          REAL(${type_prec}$)                        :: rnorm
     913             :          ${type_nametype1}$                         :: norm
     914             :          COMPLEX(${type_prec}$)                     :: val
     915             : 
     916        4605 :          control => get_control(arnoldi_data)
     917             : 
     918        4605 :          ar_data => get_data_${nametype1}$ (arnoldi_data)
     919             : 
     920             : ! compute the new shift, hack around the problem templating the conversion
     921        4605 :          val = ar_data%evals(control%selected_ind(1))
     922        4605 :          ar_data%rho_scale = ar_data%rho_scale + ${val_to_type}$
     923             : ! compute the new eigenvector / initial guess for the next arnoldi loop
     924      108959 :          ar_data%x_vec = ${nametype_zero}$
     925       81228 :          DO i = 1, control%current_step
     926       76623 :             val = ar_data%revec(i, control%selected_ind(1))
     927     2004768 :             ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*${val_to_type}$
     928             :          END DO
     929             : !      ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
     930             : !                        ar_data%revec(1:control%current_step,control%selected_ind(1)))
     931             : 
     932             : ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
     933        4605 :          CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
     934        4605 :          CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, ${nametype_one}$, -ar_data%rho_scale)
     935             : 
     936             : ! compute convergence and set the correct eigenvalue and eigenvector
     937        4605 :          CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     938        4605 :          IF (ncol_local > 0) THEN
     939       13786 :             ALLOCATE (v_vec(nrow_local))
     940        4605 :             CALL compute_norms_${nametype1}$ (ar_data%x_vec, norm, rnorm, control%pcol_group)
     941      108959 :             v_vec(:) = ar_data%x_vec(:)/rnorm
     942             :          END IF
     943        4605 :          CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
     944             :          CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
     945        4605 :                                            ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
     946        4605 :          CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, v_vec, nrow_local, control%local_comp)
     947        4605 :          IF (ncol_local > 0) THEN
     948        4605 :             CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
     949             :             ! check convergence
     950        4605 :             control%converged = rnorm .LT. control%threshold
     951        4605 :             DEALLOCATE (v_vec)
     952             :          END IF
     953             :          ! and broadcast the real eigenvalue
     954        4605 :          CALL control%mp_group%bcast(control%converged, 0)
     955        4605 :          ind = control%selected_ind(1)
     956        4605 :          CALL control%mp_group%bcast(ar_data%rho_scale, 0)
     957             : 
     958             : ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
     959        4605 :          ar_data%evals(ind) = ar_data%rho_scale
     960             : 
     961        4605 :       END SUBROUTINE gev_update_data_${nametype1}$
     962             :    #:endfor
     963             : 
     964         250 : END MODULE arnoldi_methods

Generated by: LCOV version 1.15