LCOV - CP2K Regtests (git:15a58fb) - src/arnoldi/arnoldi

LCOV - code coverage report

Current view:	top level - src/arnoldi - arnoldi_methods.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:15a58fb)	Lines:	292	293	99.7 %
Date:	2025-02-18 08:24:35	Functions:	12	12	100.0 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief methods for arnoldi iteration
      10             : !> \par History
      11             : !>       2014.09 created [Florian Schiffmann]
      12             : !>       2023.12 Removed support for single-precision [Ole Schuett]
      13             : !>       2024.12 Removed support for complex input matrices [Ole Schuett]
      14             : !> \author Florian Schiffmann
      15             : ! **************************************************************************************************
      16             : MODULE arnoldi_methods
      17             :    USE arnoldi_geev,                    ONLY: arnoldi_general_local_diag,&
      18             :                                               arnoldi_symm_local_diag,&
      19             :                                               arnoldi_tridiag_local_diag
      20             :    USE arnoldi_types,                   ONLY: arnoldi_control_type,&
      21             :                                               arnoldi_data_type,&
      22             :                                               arnoldi_env_type,&
      23             :                                               get_control,&
      24             :                                               get_data,&
      25             :                                               m_x_v_vectors_type
      26             :    USE arnoldi_vector,                  ONLY: dbcsr_matrix_colvec_multiply
      27             :    USE cp_dbcsr_api,                    ONLY: &
      28             :         dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
      29             :         dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
      30             :         dbcsr_p_type, dbcsr_scale, dbcsr_type
      31             :    USE kinds,                           ONLY: dp
      32             :    USE message_passing,                 ONLY: mp_comm_type
      33             : #include "../base/base_uses.f90"
      34             : 
      35             :    IMPLICIT NONE
      36             : 
      37             :    PRIVATE
      38             : 
      39             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'
      40             : 
      41             :    PUBLIC :: arnoldi_init, build_subspace, compute_evals, arnoldi_iram, &
      42             :              gev_arnoldi_init, gev_build_subspace, gev_update_data
      43             : 
      44             : CONTAINS
      45             : 
      46             : ! **************************************************************************************************
      47             : !> \brief Alogorithm for the implicit restarts in the arnoldi method
      48             : !>        this is an early implementation which scales subspace size^4
      49             : !>        by replacing the lapack calls with direct math the
      50             : !>        QR and  gemms can be made linear and a N^2 sacling will be acchieved
      51             : !>        however this already sets the framework but should be used with care.
      52             : !>        Currently all based on lapack.
      53             : !> \param arnoldi_env ...
      54             : ! **************************************************************************************************
      55         250 :    SUBROUTINE arnoldi_iram(arnoldi_env)
      56             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
      57             : 
      58             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'arnoldi_iram'
      59             : 
      60         250 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:)             :: tau, work, work_measure
      61         250 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: Q, safe_mat, tmp_mat, tmp_mat1
      62             :       INTEGER                                            :: handle, i, info, j, lwork, msize, nwant
      63             :       REAL(kind=dp)                                      :: beta, sigma
      64         250 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :)        :: Qdata
      65             :       TYPE(arnoldi_control_type), POINTER                :: control
      66             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
      67             : 
      68         250 :       CALL timeset(routineN, handle)
      69             : 
      70             :       ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
      71         250 :       ar_data => get_data(arnoldi_env)
      72         250 :       control => get_control(arnoldi_env)
      73         250 :       msize = control%current_step
      74         250 :       nwant = control%nval_out
      75        1500 :       ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
      76        1250 :       ALLOCATE (Q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
      77         250 :       ALLOCATE (work_measure(1))
      78        1500 :       ALLOCATE (tau(msize)); ALLOCATE (Qdata(msize, msize))
      79             :       !make Q identity
      80      225914 :       Q = CMPLX(0.0, 0.0, dp)
      81        6778 :       DO i = 1, msize
      82        6778 :          Q(i, i) = CMPLX(1.0, 0.0, dp)
      83             :       END DO
      84             : 
      85             :       ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
      86      225914 :       tmp_mat(:, :) = CMPLX(ar_data%Hessenberg(1:msize, 1:msize), 0.0, KIND=dp)
      87      225914 :       safe_mat(:, :) = tmp_mat(:, :)
      88             : 
      89        6778 :       DO i = 1, msize
      90             :          ! A bit a strange check but in the end we only want to shift the unwanted evals
      91      212858 :          IF (ANY(control%selected_ind == i)) CYCLE
      92             :          ! Here we shift the matrix by subtracting unwanted evals from the diagonal
      93      212108 :          DO j = 1, msize
      94      212108 :             tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
      95             :          END DO
      96             :          ! Now we repair the damage by QR factorizing
      97        6028 :          lwork = -1
      98        6028 :          CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
      99        6028 :          lwork = INT(work_measure(1))
     100        6028 :          IF (ALLOCATED(work)) THEN
     101        5778 :             IF (SIZE(work) .LT. lwork) THEN
     102           0 :                DEALLOCATE (work)
     103             :             END IF
     104             :          END IF
     105        6528 :          IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
     106        6028 :          CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
     107             :          ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
     108        6028 :          CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
     109             :          ! update Q=Q*Q_current
     110     9112716 :          tmp_mat1(:, :) = Q(:, :)
     111             :          CALL zgemm('N', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat1, &
     112        6028 :                     msize, tmp_mat, msize, CMPLX(0.0, 0.0, dp), Q, msize)
     113             :          ! Update H=(Q*)HQ
     114             :          CALL zgemm('C', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat, &
     115        6028 :                     msize, safe_mat, msize, CMPLX(0.0, 0.0, dp), tmp_mat1, msize)
     116             :          CALL zgemm('N', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat1, &
     117        6028 :                     msize, tmp_mat, msize, CMPLX(0.0, 0.0, dp), safe_mat, msize)
     118             : 
     119             :          ! this one is crucial for numerics not to accumulate noise in the subdiagonals
     120      212108 :          DO j = 1, msize
     121     4359320 :             safe_mat(j + 2:msize, j) = CMPLX(0.0, 0.0, dp)
     122             :          END DO
     123     9113466 :          tmp_mat(:, :) = safe_mat(:, :)
     124             :       END DO
     125             : 
     126             :       ! Now we can compute our restart quantities
     127      232442 :       ar_data%Hessenberg = 0.0_dp
     128      225914 :       ar_data%Hessenberg(1:msize, 1:msize) = REAL(safe_mat, KIND=dp)
     129      225914 :       Qdata(:, :) = REAL(Q(:, :), KIND=dp)
     130             : 
     131         250 :       beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = Qdata(msize, nwant)
     132             : 
     133             :       !update the residuum and the basis vectors
     134         250 :       IF (control%local_comp) THEN
     135      443564 :          ar_data%f_vec = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
     136     1267924 :          ar_data%local_history(:, 1:nwant) = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, 1:nwant))
     137             :       END IF
     138             :       ! Set the current step to nwant so the subspace build knows where to start
     139         250 :       control%current_step = nwant
     140             : 
     141         250 :       DEALLOCATE (tmp_mat, safe_mat, Q, Qdata, tmp_mat1, work, tau, work_measure)
     142         250 :       CALL timestop(handle)
     143             : 
     144         250 :    END SUBROUTINE arnoldi_iram
     145             : 
     146             : ! **************************************************************************************************
     147             : !> \brief Call the correct eigensolver, in the arnoldi method only the right
     148             : !>        eigenvectors are used. Lefts are created here but dumped immediately
     149             : !>        This is only the serial version
     150             : !> \param arnoldi_env ...
     151             : ! **************************************************************************************************
     152      129834 :    SUBROUTINE compute_evals(arnoldi_env)
     153             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     154             : 
     155             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_evals'
     156             : 
     157             :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: levec
     158             :       INTEGER                                            :: handle, ndim
     159             :       TYPE(arnoldi_control_type), POINTER                :: control
     160             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     161             : 
     162      129834 :       CALL timeset(routineN, handle)
     163             : 
     164      129834 :       ar_data => get_data(arnoldi_env)
     165      129834 :       control => get_control(arnoldi_env)
     166      129834 :       ndim = control%current_step
     167      519336 :       ALLOCATE (levec(ndim, ndim))
     168             : 
     169             :       ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
     170             :       ! only perform the diagonalization on processors which hold data
     171      129834 :       IF (control%generalized_ev) THEN
     172             :          CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     173        4469 :                                       ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
     174             :       ELSE
     175      125365 :          IF (control%symmetric) THEN
     176             :             CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     177        8656 :                                             ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
     178             :          ELSE
     179             :             CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
     180      116709 :                                             ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
     181             :          END IF
     182             :       END IF
     183             : 
     184      129834 :       DEALLOCATE (levec)
     185      129834 :       CALL timestop(handle)
     186             : 
     187      129834 :    END SUBROUTINE compute_evals
     188             : 
     189             : ! **************************************************************************************************
     190             : !> \brief Interface for the initialization of the arnoldi subspace creation
     191             : !>        currently it can only setup a random vector but can be improved to
     192             : !>        various types of restarts easily
     193             : !> \param matrix pointer to the matrices as described in main interface
     194             : !> \param vectors work vectors for the matrix vector multiplications
     195             : !> \param arnoldi_env all data concerning the subspace
     196             : ! **************************************************************************************************
     197      125115 :    SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_env)
     198             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     199             :       TYPE(m_x_v_vectors_type)                           :: vectors
     200             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     201             : 
     202             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'arnoldi_init'
     203             : 
     204             :       INTEGER                                            :: col, col_size, handle, i, iseed(4), &
     205             :                                                             ncol_local, nrow_local, row, row_size
     206             :       LOGICAL                                            :: local_comp
     207             :       REAL(dp)                                           :: rnorm
     208             :       REAL(kind=dp)                                      :: norm
     209             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: v_vec, w_vec
     210      125115 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_block
     211             :       TYPE(arnoldi_control_type), POINTER                :: control
     212             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     213             :       TYPE(dbcsr_iterator_type)                          :: iter
     214             :       TYPE(mp_comm_type)                                 :: pcol_group
     215             : 
     216      125115 :       CALL timeset(routineN, handle)
     217             : 
     218      125115 :       control => get_control(arnoldi_env)
     219      125115 :       pcol_group = control%pcol_group
     220      125115 :       local_comp = control%local_comp
     221             : 
     222      125115 :       ar_data => get_data(arnoldi_env)
     223             : 
     224             :       ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
     225      125115 :       CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     226      331675 :       ALLOCATE (v_vec(nrow_local))
     227      288005 :       ALLOCATE (w_vec(nrow_local))
     228     1668992 :       v_vec = 0.0_dp; w_vec = 0.0_dp
     229   135101275 :       ar_data%Hessenberg = 0.0_dp
     230             : 
     231      125115 :       IF (control%has_initial_vector) THEN
     232             :          ! after calling the set routine the initial vector is stored in f_vec
     233        2735 :          CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     234             :       ELSE
     235             :          ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
     236      122380 :          CALL dbcsr_iterator_start(iter, vectors%input_vec)
     237      227121 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     238      104741 :             CALL dbcsr_iterator_next_block(iter, row, col, data_block, row_size=row_size, col_size=col_size)
     239      104741 :             iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
     240      227121 :             CALL dlarnv(2, iseed, row_size*col_size, data_block)
     241             :          END DO
     242      122380 :          CALL dbcsr_iterator_stop(iter)
     243             :       END IF
     244             : 
     245      125115 :       CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
     246             : 
     247             :       ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
     248      125115 :       CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
     249             : 
     250      125115 :       IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     251      125115 :       CALL dbcsr_scale(vectors%input_vec, REAL(1.0, dp)/rnorm)
     252             : 
     253             :       ! Everything prepared, initialize the Arnoldi iteration
     254      125115 :       CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
     255             : 
     256             :       ! This permits to compute the subspace of a matrix which is a product of multiple matrices
     257      250230 :       DO i = 1, SIZE(matrix)
     258             :          CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     259      125115 :                                            0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     260      250230 :          CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
     261             :       END DO
     262             : 
     263      125115 :       CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
     264             : 
     265             :       ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
     266      834496 :       ar_data%Hessenberg(1, 1) = DOT_PRODUCT(v_vec, w_vec)
     267      125115 :       CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
     268      834496 :       ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
     269             : 
     270      834496 :       ar_data%local_history(:, 1) = v_vec(:)
     271             : 
     272             :       ! We did the first step in here so we should set the current step for the subspace generation accordingly
     273      125115 :       control%current_step = 1
     274             : 
     275      125115 :       DEALLOCATE (v_vec, w_vec)
     276      125115 :       CALL timestop(handle)
     277             : 
     278      250230 :    END SUBROUTINE arnoldi_init
     279             : 
     280             : ! **************************************************************************************************
     281             : !> \brief Computes the initial guess for the solution of the generalized eigenvalue
     282             : !>        using the arnoldi method
     283             : !> \param matrix pointer to the matrices as described in main interface
     284             : !> \param matrix_arnoldi ...
     285             : !> \param vectors work vectors for the matrix vector multiplications
     286             : !> \param arnoldi_env all data concerning the subspace
     287             : ! **************************************************************************************************
     288        3640 :    SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_env)
     289             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi
     290             :       TYPE(m_x_v_vectors_type)                           :: vectors
     291             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     292             : 
     293             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'gev_arnoldi_init'
     294             : 
     295             :       INTEGER                                            :: col, col_size, handle, iseed(4), &
     296             :                                                             ncol_local, nrow_local, row, row_size
     297             :       LOGICAL                                            :: local_comp
     298             :       REAL(dp)                                           :: rnorm
     299             :       REAL(kind=dp)                                      :: denom, norm
     300             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: v_vec, w_vec
     301        3640 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_block
     302             :       TYPE(arnoldi_control_type), POINTER                :: control
     303             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     304             :       TYPE(dbcsr_iterator_type)                          :: iter
     305             :       TYPE(mp_comm_type)                                 :: pcol_group
     306             : 
     307        3640 :       CALL timeset(routineN, handle)
     308             : 
     309        3640 :       control => get_control(arnoldi_env)
     310        3640 :       pcol_group = control%pcol_group
     311        3640 :       local_comp = control%local_comp
     312             : 
     313        3640 :       ar_data => get_data(arnoldi_env)
     314             : 
     315             :       ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
     316        3640 :       CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     317       10892 :       ALLOCATE (v_vec(nrow_local))
     318        7252 :       ALLOCATE (w_vec(nrow_local))
     319      129286 :       v_vec = 0.0_dp; w_vec = 0.0_dp
     320     1605240 :       ar_data%Hessenberg = 0.0_dp
     321             : 
     322        3640 :       IF (control%has_initial_vector) THEN
     323             :          ! after calling the set routine the initial vector is stored in f_vec
     324        1971 :          CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     325             :       ELSE
     326             :          ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
     327        1669 :          CALL dbcsr_iterator_start(iter, vectors%input_vec)
     328        5050 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     329        3381 :             CALL dbcsr_iterator_next_block(iter, row, col, data_block, row_size=row_size, col_size=col_size)
     330        3381 :             iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
     331        5050 :             CALL dlarnv(2, iseed, row_size*col_size, data_block)
     332             :          END DO
     333        1669 :          CALL dbcsr_iterator_stop(iter)
     334             :       END IF
     335             : 
     336        3640 :       CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
     337             : 
     338             :       ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
     339        3640 :       CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
     340             : 
     341        3640 :       IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     342        3640 :       CALL dbcsr_scale(vectors%input_vec, REAL(1.0, dp)/rnorm)
     343             : 
     344             :       CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     345        3640 :                                         0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     346             : 
     347        3640 :       CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
     348             : 
     349             :       ar_data%rho_scale = 0.0_dp
     350       64643 :       ar_data%rho_scale = DOT_PRODUCT(v_vec, w_vec)
     351        3640 :       CALL pcol_group%sum(ar_data%rho_scale)
     352             : 
     353             :       CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     354        3640 :                                         0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     355             : 
     356        3640 :       CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
     357             : 
     358             :       denom = 0.0_dp
     359       64643 :       denom = DOT_PRODUCT(v_vec, w_vec)
     360        3640 :       CALL pcol_group%sum(denom)
     361        3640 :       IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
     362        3640 :       CALL control%mp_group%bcast(ar_data%rho_scale, 0)
     363             : 
     364             :       ! if the maximum ev is requested we need to optimize with -A-rho*B
     365        3640 :       CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
     366        3640 :       CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_dp, -ar_data%rho_scale)
     367             : 
     368       64643 :       ar_data%x_vec = v_vec
     369             : 
     370        3640 :       CALL timestop(handle)
     371             : 
     372       10920 :    END SUBROUTINE gev_arnoldi_init
     373             : 
     374             : ! **************************************************************************************************
     375             : !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
     376             : !>        convergence check is only performed on subspace convergence
     377             : !>        Gram Schidt is used to orthonogonalize.
     378             : !>        If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
     379             : !>        correction is performed
     380             : !> \param matrix ...
     381             : !> \param vectors ...
     382             : !> \param arnoldi_env ...
     383             : ! **************************************************************************************************
     384      125365 :    SUBROUTINE build_subspace(matrix, vectors, arnoldi_env)
     385             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     386             :       TYPE(m_x_v_vectors_type), TARGET                   :: vectors
     387             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     388             : 
     389             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'build_subspace'
     390             : 
     391             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
     392             :       REAL(dp)                                           :: rnorm
     393             :       REAL(kind=dp)                                      :: norm
     394             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: h_vec, s_vec, v_vec, w_vec
     395             :       TYPE(arnoldi_control_type), POINTER                :: control
     396             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     397             :       TYPE(dbcsr_type), POINTER                          :: input_vec, result_vec, swap_vec
     398             : 
     399      125365 :       CALL timeset(routineN, handle)
     400             : 
     401      125365 :       ar_data => get_data(arnoldi_env)
     402      125365 :       control => get_control(arnoldi_env)
     403      125365 :       control%converged = .FALSE.
     404             : 
     405             :       ! create the vectors required during the iterations
     406      125365 :       CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     407      495815 :       ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
     408     1700626 :       v_vec = 0.0_dp; w_vec = 0.0_dp
     409      626825 :       ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
     410             : 
     411      594460 :       DO j = control%current_step, control%max_iter - 1
     412             : 
     413             :          ! compute the vector norm of the residuum, get it real valued as well (rnorm)
     414      591442 :          CALL compute_norms(ar_data%f_vec, norm, rnorm, control%pcol_group)
     415             : 
     416             :          ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
     417      591442 :          IF (control%myproc == 0) control%converged = rnorm .LT. REAL(control%threshold, dp)
     418      591442 :          CALL control%mp_group%bcast(control%converged, 0)
     419      591442 :          IF (control%converged) EXIT
     420             : 
     421             :          ! transfer normalized residdum to history and its norm to the Hessenberg matrix
     422      469095 :          IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
     423    12785265 :          v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
     424             : 
     425      469095 :          input_vec => vectors%input_vec
     426      469095 :          result_vec => vectors%result_vec
     427      469095 :          CALL transfer_local_array_to_dbcsr(input_vec, v_vec, nrow_local, control%local_comp)
     428             : 
     429             :          ! This permits to compute the subspace of a matrix which is a product of two matrices
     430      938190 :          DO i = 1, SIZE(matrix)
     431             :             CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, 1.0_dp, &
     432      469095 :                                               0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     433      469095 :             swap_vec => input_vec
     434      469095 :             input_vec => result_vec
     435      938190 :             result_vec => swap_vec
     436             :          END DO
     437             : 
     438      469095 :          CALL transfer_dbcsr_to_local_array(input_vec, w_vec, nrow_local, control%local_comp)
     439             : 
     440             :          ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
     441             :          CALL Gram_Schmidt_ortho(h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
     442      469095 :                                  ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
     443             : 
     444             :          ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
     445             :          ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
     446             :          CALL DGKS_ortho(h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
     447      469095 :                          ar_data%local_history, control%local_comp, control%pcol_group)
     448             :          ! Finally we can put the projections into our Hessenberg matrix
     449     4459540 :          ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
     450      594460 :          control%current_step = j + 1
     451             :       END DO
     452             : 
     453             :       ! compute the vector norm of the final residuum and put it in to Hessenberg
     454      125365 :       CALL compute_norms(ar_data%f_vec, norm, rnorm, control%pcol_group)
     455      125365 :       ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
     456             : 
     457             :       ! broadcast the Hessenberg matrix so we don't need to care later on
     458   270542069 :       CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
     459             : 
     460      125365 :       DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
     461      125365 :       CALL timestop(handle)
     462             : 
     463      250730 :    END SUBROUTINE build_subspace
     464             : 
     465             : ! **************************************************************************************************
     466             : !> \brief builds the basis rothogonal wrt. the metric.
     467             : !>        The structure looks similar to normal arnoldi but norms, vectors and
     468             : !>        matrix_vector products are very differently defined. Therefore it is
     469             : !>        cleaner to put it in a separate subroutine to avoid confusion
     470             : !> \param matrix ...
     471             : !> \param vectors ...
     472             : !> \param arnoldi_env ...
     473             : ! **************************************************************************************************
     474        4469 :    SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_env)
     475             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix
     476             :       TYPE(m_x_v_vectors_type)                           :: vectors
     477             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     478             : 
     479             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_build_subspace'
     480             : 
     481             :       INTEGER                                            :: handle, j, ncol_local, nrow_local
     482             :       REAL(kind=dp)                                      :: norm
     483             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: h_vec, s_vec, v_vec, w_vec
     484        4469 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :)        :: BZmat, CZmat, Zmat
     485             :       TYPE(arnoldi_control_type), POINTER                :: control
     486             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     487             :       TYPE(mp_comm_type)                                 :: pcol_group
     488             : 
     489        4469 :       CALL timeset(routineN, handle)
     490             : 
     491        4469 :       ar_data => get_data(arnoldi_env)
     492        4469 :       control => get_control(arnoldi_env)
     493        4469 :       control%converged = .FALSE.
     494        4469 :       pcol_group = control%pcol_group
     495             : 
     496             :       ! create the vectors required during the iterations
     497        4469 :       CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     498       17818 :       ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
     499      201879 :       v_vec = 0.0_dp; w_vec = 0.0_dp
     500       17876 :       ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
     501       26756 :       ALLOCATE (Zmat(nrow_local, control%max_iter)); ALLOCATE (CZmat(nrow_local, control%max_iter))
     502       13378 :       ALLOCATE (BZmat(nrow_local, control%max_iter))
     503             : 
     504        4469 :       CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
     505             :       CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     506        4469 :                                         0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     507        4469 :       CALL transfer_dbcsr_to_local_array(vectors%result_vec, BZmat(:, 1), nrow_local, control%local_comp)
     508             : 
     509             :       norm = 0.0_dp
     510      103174 :       norm = DOT_PRODUCT(ar_data%x_vec, BZmat(:, 1))
     511        4469 :       CALL pcol_group%sum(norm)
     512        4469 :       IF (control%local_comp) THEN
     513      201850 :          Zmat(:, 1) = ar_data%x_vec/SQRT(norm); BZmat(:, 1) = BZmat(:, 1)/SQRT(norm)
     514             :       END IF
     515             : 
     516       74035 :       DO j = 1, control%max_iter
     517       74035 :          control%current_step = j
     518       74035 :          CALL transfer_local_array_to_dbcsr(vectors%input_vec, Zmat(:, j), nrow_local, control%local_comp)
     519             :          CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     520       74035 :                                            0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     521       74035 :          CALL transfer_dbcsr_to_local_array(vectors%result_vec, CZmat(:, j), nrow_local, control%local_comp)
     522     1890210 :          w_vec(:) = CZmat(:, j)
     523             : 
     524             :          ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
     525             :          CALL Gram_Schmidt_ortho(h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
     526       74035 :                                  BZmat, Zmat, control%local_comp, control%pcol_group)
     527             : 
     528             :          ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
     529             :          ! can becom a problem later on, there is probably a good check, but we don't perform it
     530             :          CALL DGKS_ortho(h_vec, ar_data%f_vec, s_vec, nrow_local, j, BZmat, &
     531       74035 :                          Zmat, control%local_comp, control%pcol_group)
     532             : 
     533       74035 :          CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
     534             :          CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     535       74035 :                                            0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     536       74035 :          CALL transfer_dbcsr_to_local_array(vectors%result_vec, v_vec, nrow_local, control%local_comp)
     537             :          norm = 0.0_dp
     538     1890210 :          norm = DOT_PRODUCT(ar_data%f_vec, v_vec)
     539       74035 :          CALL pcol_group%sum(norm)
     540             : 
     541       74035 :          IF (control%myproc == 0) control%converged = REAL(norm, dp) .LT. EPSILON(REAL(1.0, dp))
     542       74035 :          CALL control%mp_group%bcast(control%converged, 0)
     543       74035 :          IF (control%converged) EXIT
     544       71965 :          IF (j == control%max_iter - 1) EXIT
     545             : 
     546       74035 :          IF (control%local_comp) THEN
     547     3504170 :             Zmat(:, j + 1) = ar_data%f_vec/SQRT(norm); BZmat(:, j + 1) = v_vec(:)/SQRT(norm)
     548             :          END IF
     549             :       END DO
     550             : 
     551             :       ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
     552             :       ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
     553             :       ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
     554     1970829 :       ar_data%Hessenberg = 0.0_dp
     555        4469 :       IF (control%local_comp) THEN
     556             :          ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
     557    22286962 :             MATMUL(TRANSPOSE(CZmat(:, 1:control%current_step)), Zmat(:, 1:control%current_step))
     558             :       END IF
     559     3937189 :       CALL control%mp_group%sum(ar_data%Hessenberg)
     560             : 
     561     2067949 :       ar_data%local_history = Zmat
     562             :       ! broadcast the Hessenberg matrix so we don't need to care later on
     563             : 
     564        4469 :       DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (CZmat); 
     565        4469 :       DEALLOCATE (Zmat); DEALLOCATE (BZmat)
     566             : 
     567        4469 :       CALL timestop(handle)
     568             : 
     569        8938 :    END SUBROUTINE gev_build_subspace
     570             : 
     571             : ! **************************************************************************************************
     572             : !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
     573             : !>        Updates rho and C=A-rho*B accordingly.
     574             : !>        As an update scheme is used for he ev, the output ev has to be replaced
     575             : !>        with the updated one.
     576             : !>        Furthermore a convergence check is performed. The mv product could
     577             : !>        be skiiped by making clever use of the precomputed data, However,
     578             : !>        it is most likely not worth the effort.
     579             : !> \param matrix ...
     580             : !> \param matrix_arnoldi ...
     581             : !> \param vectors ...
     582             : !> \param arnoldi_env ...
     583             : ! **************************************************************************************************
     584        4469 :    SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_env)
     585             :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: matrix, matrix_arnoldi
     586             :       TYPE(m_x_v_vectors_type)                           :: vectors
     587             :       TYPE(arnoldi_env_type)                             :: arnoldi_env
     588             : 
     589             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'gev_update_data'
     590             : 
     591             :       COMPLEX(dp)                                        :: val
     592             :       INTEGER                                            :: handle, i, ind, ncol_local, nrow_local
     593             :       REAL(dp)                                           :: rnorm
     594             :       REAL(kind=dp)                                      :: norm
     595        4469 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: v_vec
     596             :       TYPE(arnoldi_control_type), POINTER                :: control
     597             :       TYPE(arnoldi_data_type), POINTER                   :: ar_data
     598             : 
     599        4469 :       CALL timeset(routineN, handle)
     600             : 
     601        4469 :       control => get_control(arnoldi_env)
     602             : 
     603        4469 :       ar_data => get_data(arnoldi_env)
     604             : 
     605             :       ! compute the new shift, hack around the problem templating the conversion
     606        4469 :       val = ar_data%evals(control%selected_ind(1))
     607        4469 :       ar_data%rho_scale = ar_data%rho_scale + REAL(val, dp)
     608             :       ! compute the new eigenvector / initial guess for the next arnoldi loop
     609      103174 :       ar_data%x_vec = 0.0_dp
     610       78504 :       DO i = 1, control%current_step
     611       74035 :          val = ar_data%revec(i, control%selected_ind(1))
     612     1894679 :          ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*REAL(val, dp)
     613             :       END DO
     614             :       ! ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
     615             :       !                   ar_data%revec(1:control%current_step,control%selected_ind(1)))
     616             : 
     617             :       ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
     618        4469 :       CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
     619        4469 :       CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_dp, -ar_data%rho_scale)
     620             : 
     621             :       ! compute convergence and set the correct eigenvalue and eigenvector
     622        4469 :       CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
     623        4469 :       IF (ncol_local > 0) THEN
     624       13378 :          ALLOCATE (v_vec(nrow_local))
     625        4469 :          CALL compute_norms(ar_data%x_vec, norm, rnorm, control%pcol_group)
     626      103174 :          v_vec(:) = ar_data%x_vec(:)/rnorm
     627             :       END IF
     628        4469 :       CALL transfer_local_array_to_dbcsr(vectors%input_vec, v_vec, nrow_local, control%local_comp)
     629             :       CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
     630        4469 :                                         0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
     631        4469 :       CALL transfer_dbcsr_to_local_array(vectors%result_vec, v_vec, nrow_local, control%local_comp)
     632        4469 :       IF (ncol_local > 0) THEN
     633        4469 :          CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
     634             :          ! check convergence
     635        4469 :          control%converged = rnorm .LT. control%threshold
     636        4469 :          DEALLOCATE (v_vec)
     637             :       END IF
     638             :       ! and broadcast the real eigenvalue
     639        4469 :       CALL control%mp_group%bcast(control%converged, 0)
     640        4469 :       ind = control%selected_ind(1)
     641        4469 :       CALL control%mp_group%bcast(ar_data%rho_scale, 0)
     642             : 
     643             :       ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
     644        4469 :       ar_data%evals(ind) = ar_data%rho_scale
     645             : 
     646        4469 :       CALL timestop(handle)
     647             : 
     648        4469 :    END SUBROUTINE gev_update_data
     649             : 
     650             : ! **************************************************************************************************
     651             : !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
     652             : !> \param vec ...
     653             : !> \param array ...
     654             : !> \param n ...
     655             : !> \param is_local ...
     656             : ! **************************************************************************************************
     657     1012368 :    SUBROUTINE transfer_dbcsr_to_local_array(vec, array, n, is_local)
     658             :       TYPE(dbcsr_type)                                   :: vec
     659             :       REAL(kind=dp), DIMENSION(:)                        :: array
     660             :       INTEGER                                            :: n
     661             :       LOGICAL                                            :: is_local
     662             : 
     663     1012368 :       REAL(kind=dp), DIMENSION(:), POINTER               :: data_vec
     664             : 
     665     2024736 :       data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_dp)
     666    13311365 :       IF (is_local) array(1:n) = data_vec(1:n)
     667             : 
     668     1012368 :    END SUBROUTINE transfer_dbcsr_to_local_array
     669             : 
     670             : ! **************************************************************************************************
     671             : !> \brief The inverse routine transferring data back from an array to a dbcsr
     672             : !> \param vec ...
     673             : !> \param array ...
     674             : !> \param n ...
     675             : !> \param is_local ...
     676             : ! **************************************************************************************************
     677      630809 :    SUBROUTINE transfer_local_array_to_dbcsr(vec, array, n, is_local)
     678             :       TYPE(dbcsr_type)                                   :: vec
     679             :       REAL(kind=dp), DIMENSION(:)                        :: array
     680             :       INTEGER                                            :: n
     681             :       LOGICAL                                            :: is_local
     682             : 
     683      630809 :       REAL(kind=dp), DIMENSION(:), POINTER               :: data_vec
     684             : 
     685     1261618 :       data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_dp)
     686    10694823 :       IF (is_local) data_vec(1:n) = array(1:n)
     687             : 
     688      630809 :    END SUBROUTINE transfer_local_array_to_dbcsr
     689             : 
     690             : ! **************************************************************************************************
     691             : !> \brief Gram-Schmidt in matrix vector form
     692             : !> \param h_vec ...
     693             : !> \param f_vec ...
     694             : !> \param s_vec ...
     695             : !> \param w_vec ...
     696             : !> \param nrow_local ...
     697             : !> \param j ...
     698             : !> \param local_history ...
     699             : !> \param reorth_mat ...
     700             : !> \param local_comp ...
     701             : !> \param pcol_group ...
     702             : ! **************************************************************************************************
     703      543130 :    SUBROUTINE Gram_Schmidt_ortho(h_vec, f_vec, s_vec, w_vec, nrow_local, &
     704      543130 :                                  j, local_history, reorth_mat, local_comp, pcol_group)
     705             :       REAL(kind=dp), DIMENSION(:)                        :: h_vec, f_vec, s_vec, w_vec
     706             :       INTEGER                                            :: nrow_local, j
     707             :       REAL(kind=dp), DIMENSION(:, :)                     :: local_history, reorth_mat
     708             :       LOGICAL                                            :: local_comp
     709             :       TYPE(mp_comm_type), INTENT(IN)                     :: pcol_group
     710             : 
     711             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'Gram_Schmidt_ortho'
     712             : 
     713             :       INTEGER                                            :: handle
     714             : 
     715      543130 :       CALL timeset(routineN, handle)
     716             : 
     717             :       ! Let's do the orthonormalization, first try the Gram Schmidt scheme
     718    42771810 :       h_vec = 0.0_dp; f_vec = 0.0_dp; s_vec = 0.0_dp
     719      543130 :       IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_dp, local_history, &
     720      459444 :                                  nrow_local, w_vec, 1, 0.0_dp, h_vec, 1)
     721     9896592 :       CALL pcol_group%sum(h_vec(1:j))
     722             : 
     723      543130 :       IF (local_comp) CALL dgemv('N', nrow_local, j, 1.0_dp, reorth_mat, &
     724      459444 :                                  nrow_local, h_vec, 1, 0.0_dp, f_vec, 1)
     725     8517390 :       f_vec(:) = w_vec(:) - f_vec(:)
     726             : 
     727      543130 :       CALL timestop(handle)
     728             : 
     729      543130 :    END SUBROUTINE Gram_Schmidt_ortho
     730             : 
     731             : ! **************************************************************************************************
     732             : !> \brief Compute the  Daniel, Gragg, Kaufman and Steward correction
     733             : !> \param h_vec ...
     734             : !> \param f_vec ...
     735             : !> \param s_vec ...
     736             : !> \param nrow_local ...
     737             : !> \param j ...
     738             : !> \param local_history ...
     739             : !> \param reorth_mat ...
     740             : !> \param local_comp ...
     741             : !> \param pcol_group ...
     742             : ! **************************************************************************************************
     743      543130 :    SUBROUTINE DGKS_ortho(h_vec, f_vec, s_vec, nrow_local, j, &
     744     1086260 :                          local_history, reorth_mat, local_comp, pcol_group)
     745             :       REAL(kind=dp), DIMENSION(:)                        :: h_vec, f_vec, s_vec
     746             :       INTEGER                                            :: nrow_local, j
     747             :       REAL(kind=dp), DIMENSION(:, :)                     :: local_history, reorth_mat
     748             :       LOGICAL                                            :: local_comp
     749             :       TYPE(mp_comm_type), INTENT(IN)                     :: pcol_group
     750             : 
     751             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'DGKS_ortho'
     752             : 
     753             :       INTEGER                                            :: handle
     754             : 
     755      543130 :       CALL timeset(routineN, handle)
     756             : 
     757      543130 :       IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_dp, local_history, &
     758      459444 :                                  nrow_local, f_vec, 1, 0.0_dp, s_vec, 1)
     759     9896592 :       CALL pcol_group%sum(s_vec(1:j))
     760      543130 :       IF (local_comp) CALL dgemv('N', nrow_local, j, -1.0_dp, reorth_mat, &
     761      459444 :                                  nrow_local, s_vec, 1, 1.0_dp, f_vec, 1)
     762     5219861 :       h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
     763             : 
     764      543130 :       CALL timestop(handle)
     765             : 
     766      543130 :    END SUBROUTINE DGKS_ortho
     767             : 
     768             : ! **************************************************************************************************
     769             : !> \brief Compute the norm of a vector distributed along proc_col
     770             : !>        as local arrays. Always return the real part next to the complex rep.
     771             : !> \param vec ...
     772             : !> \param norm ...
     773             : !> \param rnorm ...
     774             : !> \param pcol_group ...
     775             : ! **************************************************************************************************
     776      854500 :    SUBROUTINE compute_norms(vec, norm, rnorm, pcol_group)
     777             :       REAL(kind=dp), DIMENSION(:)                        :: vec
     778             :       REAL(kind=dp)                                      :: norm
     779             :       REAL(dp)                                           :: rnorm
     780             :       TYPE(mp_comm_type), INTENT(IN)                     :: pcol_group
     781             : 
     782             :       ! the norm is computed along the processor column
     783     9313077 :       norm = DOT_PRODUCT(vec, vec)
     784      854500 :       CALL pcol_group%sum(norm)
     785      854500 :       rnorm = SQRT(REAL(norm, dp))
     786      854500 :       norm = rnorm
     787             : 
     788      854500 :    END SUBROUTINE compute_norms
     789             : 
     790         250 : END MODULE arnoldi_methods

Generated by: LCOV version 1.15