LCOV - code coverage report
Current view: top level - src/fm - cp_fm_diag.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4c33f95) Lines: 344 414 83.1 %
Date: 2025-01-30 06:53:08 Functions: 12 12 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief used for collecting some of the diagonalization schemes available for
      10             : !>      cp_fm_type. cp_fm_power also moved here as it is very related
      11             : !> \note
      12             : !>      first version : most routines imported
      13             : !> \par History
      14             : !>      - unused Jacobi routines removed, cosmetics (05.04.06,MK)
      15             : !> \author Joost VandeVondele (2003-08)
      16             : ! **************************************************************************************************
      17             : MODULE cp_fm_diag
      18             : 
      19             :    USE cp_blacs_types, ONLY: cp_blacs_type
      20             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21             :    USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
      22             :                                  cp_fm_gemm, &
      23             :                                  cp_fm_scale, &
      24             :                                  cp_fm_syrk, &
      25             :                                  cp_fm_triangular_invert, &
      26             :                                  cp_fm_triangular_multiply, &
      27             :                                  cp_fm_uplo_to_full
      28             :    USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
      29             :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
      30             :                                cp_fm_redistribute_start
      31             :    USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
      32             :                          finalize_elpa_library, &
      33             :                          initialize_elpa_library, &
      34             :                          set_elpa_kernel, &
      35             :                          set_elpa_print, &
      36             :                          set_elpa_qr
      37             :    USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
      38             :                                  cp_fm_general_cusolver
      39             : #if defined(__DLAF)
      40             :    USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
      41             : #endif
      42             :    USE cp_fm_types, ONLY: cp_fm_get_info, &
      43             :                           cp_fm_set_element, &
      44             :                           cp_fm_to_fm, &
      45             :                           cp_fm_type, &
      46             :                           cp_fm_create, &
      47             :                           cp_fm_get_info, &
      48             :                           cp_fm_release, &
      49             :                           cp_fm_set_all, &
      50             :                           cp_fm_to_fm, &
      51             :                           cp_fm_to_fm_submat, &
      52             :                           cp_fm_type
      53             :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
      54             :                            cp_fm_struct_create, &
      55             :                            cp_fm_struct_release, &
      56             :                            cp_fm_struct_type
      57             :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      58             :                               cp_get_default_logger, &
      59             :                               cp_logger_get_default_io_unit, &
      60             :                               cp_logger_type, &
      61             :                               cp_to_string
      62             :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      63             :                               cp_logger_get_default_unit_nr, &
      64             :                               cp_logger_get_unit_nr, &
      65             :                               cp_logger_type
      66             :    USE kinds, ONLY: default_string_length, &
      67             :                     dp
      68             :    USE machine, ONLY: default_output_unit, &
      69             :                       m_memory
      70             : #if defined (__parallel)
      71             :    USE message_passing, ONLY: mp_comm_type
      72             : #endif
      73             : #if defined (__HAS_IEEE_EXCEPTIONS)
      74             :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      75             :                               ieee_set_halting_mode, &
      76             :                               IEEE_ALL
      77             : #endif
      78             : #include "../base/base_uses.f90"
      79             : 
      80             :    IMPLICIT NONE
      81             : 
      82             :    PRIVATE
      83             : 
      84             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
      85             : 
      86             :    REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
      87             : 
      88             :    ! The following saved variables are diagonalization global
      89             :    ! Stores the default library for diagonalization
      90             :    INTEGER, SAVE, PUBLIC   :: diag_type = 0
      91             :    ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
      92             :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      93             :    INTEGER, SAVE       :: elpa_neigvec_min = 0
      94             : #if defined(__DLAF)
      95             :    ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
      96             :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      97             :    INTEGER, SAVE, PUBLIC    :: dlaf_neigvec_min = 0
      98             : #endif
      99             :    ! Threshold value for the orthonormality check of the eigenvectors obtained
     100             :    ! after a diagonalization. A negative value disables the check.
     101             :    REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
     102             : 
     103             :    ! Constants for the diag_type above
     104             :    INTEGER, PARAMETER, PUBLIC  :: FM_DIAG_TYPE_SCALAPACK = 101, &
     105             :                                   FM_DIAG_TYPE_ELPA = 102, &
     106             :                                   FM_DIAG_TYPE_CUSOLVER = 103, &
     107             :                                   FM_DIAG_TYPE_DLAF = 104
     108             : #if defined(__CUSOLVERMP)
     109             :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
     110             : #elif defined(__ELPA)
     111             :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
     112             : #elif defined(__DLAF)
     113             :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_DLAF
     114             : #else
     115             :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
     116             : #endif
     117             : 
     118             :    ! Public subroutines
     119             :    PUBLIC :: choose_eigv_solver, &
     120             :              cp_fm_block_jacobi, &
     121             :              cp_fm_power, &
     122             :              cp_fm_syevd, &
     123             :              cp_fm_syevx, &
     124             :              cp_fm_svd, &
     125             :              cp_fm_geeig, &
     126             :              cp_fm_geeig_canon, &
     127             :              diag_init, &
     128             :              diag_finalize
     129             : 
     130             : CONTAINS
     131             : 
     132             : ! **************************************************************************************************
     133             : !> \brief Setup the diagonalization library to be used
     134             : !> \param diag_lib diag_library flag from GLOBAL section in input
     135             : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
     136             : !>                         to ScaLAPACK was applied, .FALSE. otherwise.
     137             : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
     138             : !> \param elpa_neigvec_min_input ...
     139             : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
     140             : !>                diagonalization procedure of suitably sized matrices
     141             : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
     142             : !>                   be printed
     143             : !> \param elpa_qr_unsafe logical that enables potentially unsafe ELPA options
     144             : !> \param dlaf_neigvec_min_input ...
     145             : !> \param eps_check_diag_input ...
     146             : !> \par History
     147             : !>      - Add support for DLA-Future (05.09.2023, RMeli)
     148             : !> \author  MI 11.2013
     149             : ! **************************************************************************************************
     150        9801 :    SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
     151             :                         elpa_print, elpa_qr_unsafe, dlaf_neigvec_min_input, eps_check_diag_input)
     152             :       CHARACTER(LEN=*), INTENT(IN)                       :: diag_lib
     153             :       LOGICAL, INTENT(OUT)                               :: fallback_applied
     154             :       INTEGER, INTENT(IN)                                :: elpa_kernel, elpa_neigvec_min_input
     155             :       LOGICAL, INTENT(IN)                                :: elpa_qr, elpa_print, elpa_qr_unsafe
     156             :       INTEGER, INTENT(IN)                                :: dlaf_neigvec_min_input
     157             :       REAL(KIND=dp), INTENT(IN)                          :: eps_check_diag_input
     158             : 
     159             :       LOGICAL, SAVE                                      :: initialized = .FALSE.
     160             : 
     161        9801 :       fallback_applied = .FALSE.
     162             : 
     163        9801 :       IF (diag_lib == "ScaLAPACK") THEN
     164         190 :          diag_type = FM_DIAG_TYPE_SCALAPACK
     165        9611 :       ELSE IF (diag_lib == "ELPA") THEN
     166             : #if defined (__ELPA)
     167             :          ! ELPA is requested and available
     168        9611 :          diag_type = FM_DIAG_TYPE_ELPA
     169             : #else
     170             :          ! ELPA library requested but not linked, switch back to SL
     171             :          diag_type = FM_DIAG_TYPE_SCALAPACK
     172             :          fallback_applied = .TRUE.
     173             : #endif
     174           0 :       ELSE IF (diag_lib == "cuSOLVER") THEN
     175           0 :          diag_type = FM_DIAG_TYPE_CUSOLVER
     176           0 :       ELSE IF (diag_lib == "DLAF") THEN
     177             : #if defined (__DLAF)
     178             :          diag_type = FM_DIAG_TYPE_DLAF
     179             : #else
     180           0 :          CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
     181             : #endif
     182             :       ELSE
     183           0 :          CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
     184             :       END IF
     185             : 
     186             :       ! Initialization of requested diagonalization library
     187        9801 :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
     188        9014 :          CALL initialize_elpa_library()
     189        9014 :          CALL set_elpa_kernel(elpa_kernel)
     190        9014 :          CALL set_elpa_qr(elpa_qr, elpa_qr_unsafe)
     191        9014 :          CALL set_elpa_print(elpa_print)
     192        9014 :          initialized = .TRUE.
     193             :       END IF
     194             : 
     195        9801 :       elpa_neigvec_min = elpa_neigvec_min_input
     196             : #if defined(__DLAF)
     197             :       dlaf_neigvec_min = dlaf_neigvec_min_input
     198             : #else
     199             :       MARK_USED(dlaf_neigvec_min_input)
     200             : #endif
     201        9801 :       eps_check_diag = eps_check_diag_input
     202             : 
     203        9801 :    END SUBROUTINE diag_init
     204             : 
     205             : ! **************************************************************************************************
     206             : !> \brief Finalize the diagonalization library
     207             : ! **************************************************************************************************
     208        9591 :    SUBROUTINE diag_finalize()
     209             : #if defined (__ELPA)
     210        9591 :       IF (diag_type == FM_DIAG_TYPE_ELPA) &
     211        9401 :          CALL finalize_elpa_library()
     212             : #endif
     213        9591 :    END SUBROUTINE diag_finalize
     214             : 
     215             : ! **************************************************************************************************
     216             : !> \brief   Choose the Eigensolver depending on which library is available
     217             : !>          ELPA seems to be unstable for small systems
     218             : !> \param matrix ...
     219             : !> \param eigenvectors ...
     220             : !> \param eigenvalues ...
     221             : !> \param info ...
     222             : !> \par     info If present returns error code and prevents program stops.
     223             : !>               Works currently only for cp_fm_syevd with ScaLAPACK.
     224             : !>               Other solvers will end the program regardless of PRESENT(info).
     225             : !> \par History
     226             : !>      - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
     227             : ! **************************************************************************************************
     228      164735 :    SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
     229             : 
     230             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     231             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     232             :       INTEGER, INTENT(OUT), OPTIONAL                     :: info
     233             : 
     234             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
     235             : 
     236             :       ! Sample peak memory
     237      164735 :       CALL m_memory()
     238             : 
     239      164735 :       IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
     240             : 
     241      164735 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     242        4160 :          CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     243             : 
     244      160575 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     245      160575 :          IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
     246             :             ! We don't trust ELPA with very small matrices.
     247       99775 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     248             :          ELSE
     249       60800 :             CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     250             :          END IF
     251             : 
     252           0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     253           0 :          IF (matrix%matrix_struct%nrow_global < 64) THEN
     254             :             ! We don't trust cuSolver with very small matrices.
     255           0 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     256             :          ELSE
     257           0 :             CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
     258             :          END IF
     259             : 
     260             : #if defined(__DLAF)
     261             :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     262             :          IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
     263             :             ! Use ScaLAPACK for small matrices
     264             :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     265             :          ELSE
     266             :             CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
     267             :          END IF
     268             : #endif
     269             : 
     270             :       ELSE
     271           0 :          CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
     272             :       END IF
     273             : 
     274      164735 :       CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
     275             : 
     276      164735 :    END SUBROUTINE choose_eigv_solver
     277             : 
     278             : ! **************************************************************************************************
     279             : !> \brief   Check result of diagonalization, i.e. the orthonormality of the eigenvectors
     280             : !> \param matrix Work matrix
     281             : !> \param eigenvectors Eigenvectors to be checked
     282             : !> \param nvec ...
     283             : ! **************************************************************************************************
     284      271268 :    SUBROUTINE check_diag(matrix, eigenvectors, nvec)
     285             : 
     286             :       TYPE(cp_fm_type), INTENT(IN)                    :: matrix, eigenvectors
     287             :       INTEGER, INTENT(IN)                                :: nvec
     288             : 
     289             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_diag'
     290             : 
     291             :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     292             :       REAL(KIND=dp)                                      :: eps_abort, eps_warning
     293             :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     294             :       LOGICAL                                            :: check_eigenvectors
     295             : #if defined(__parallel)
     296             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     297             :       INTEGER                                            :: il, jl, ipcol, iprow, &
     298             :                                                             mypcol, myprow, npcol, nprow
     299             :       INTEGER, DIMENSION(9)                              :: desca
     300             : #endif
     301             : 
     302      271268 :       CALL timeset(routineN, handle)
     303             : 
     304      271268 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     305        8320 :          diag_type_name = "SYEVD"
     306      262948 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     307      262948 :          diag_type_name = "ELPA"
     308           0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     309           0 :          diag_type_name = "CUSOLVER"
     310           0 :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     311           0 :          diag_type_name = "DLAF"
     312             :       ELSE
     313           0 :          CPABORT("Unknown diag_type")
     314             :       END IF
     315             : 
     316      271268 :       output_unit = default_output_unit
     317             : 
     318      271268 :       eps_warning = eps_check_diag_default
     319             : #if defined(__CHECK_DIAG)
     320             :       check_eigenvectors = .TRUE.
     321             :       IF (eps_check_diag >= 0.0_dp) THEN
     322             :          eps_warning = eps_check_diag
     323             :       END IF
     324             : #else
     325      271268 :       IF (eps_check_diag >= 0.0_dp) THEN
     326         242 :          check_eigenvectors = .TRUE.
     327         242 :          eps_warning = eps_check_diag
     328             :       ELSE
     329             :          check_eigenvectors = .FALSE.
     330             :       END IF
     331             : #endif
     332      271268 :       eps_abort = 10.0_dp*eps_warning
     333             : 
     334      271268 :       IF (check_eigenvectors) THEN
     335             : #if defined(__parallel)
     336         242 :          nrow = eigenvectors%matrix_struct%nrow_global
     337         242 :          ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     338         242 :          CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
     339         242 :          context => matrix%matrix_struct%context
     340         242 :          myprow = context%mepos(1)
     341         242 :          mypcol = context%mepos(2)
     342         242 :          nprow = context%num_pe(1)
     343         242 :          npcol = context%num_pe(2)
     344        2420 :          desca(:) = matrix%matrix_struct%descriptor(:)
     345        2792 :          DO i = 1, ncol
     346       41810 :             DO j = 1, ncol
     347       39018 :                CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     348       41568 :                IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     349       19509 :                   IF (i == j) THEN
     350        1275 :                      IF (ABS(matrix%local_data(il, jl) - 1.0_dp) > eps_warning) THEN
     351             :                         WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     352           0 :                            "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     353           0 :                            "Matrix element (", i, ", ", j, ") = ", matrix%local_data(il, jl), &
     354           0 :                            "The deviation from the expected value 1 is", ABS(matrix%local_data(il, jl) - 1.0_dp)
     355           0 :                         IF (ABS(matrix%local_data(il, jl) - 1.0_dp) > eps_abort) THEN
     356           0 :                            CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     357             :                         ELSE
     358           0 :                            CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     359             :                         END IF
     360             :                      END IF
     361             :                   ELSE
     362       18234 :                      IF (ABS(matrix%local_data(il, jl)) > eps_warning) THEN
     363             :                         WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     364           0 :                            "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     365           0 :                            "Matrix element (", i, ", ", j, ") = ", matrix%local_data(il, jl), &
     366           0 :                            "The deviation from the expected value 0 is", ABS(matrix%local_data(il, jl))
     367           0 :                         IF (ABS(matrix%local_data(il, jl)) > eps_abort) THEN
     368           0 :                            CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     369             :                         ELSE
     370           0 :                            CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     371             :                         END IF
     372             :                      END IF
     373             :                   END IF
     374             :                END IF
     375             :             END DO
     376             :          END DO
     377             : #else
     378             :          nrow = SIZE(eigenvectors%local_data, 1)
     379             :          ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
     380             :          CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
     381             :                     eigenvectors%local_data(1, 1), nrow, &
     382             :                     eigenvectors%local_data(1, 1), nrow, &
     383             :                     0.0_dp, matrix%local_data(1, 1), nrow)
     384             :          DO i = 1, ncol
     385             :             DO j = 1, ncol
     386             :                IF (i == j) THEN
     387             :                   IF (ABS(matrix%local_data(i, j) - 1.0_dp) > eps_warning) THEN
     388             :                      WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     389             :                         "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     390             :                         "Matrix element (", i, ", ", j, ") = ", matrix%local_data(i, j), &
     391             :                         "The deviation from the expected value 1 is", ABS(matrix%local_data(i, j) - 1.0_dp)
     392             :                      IF (ABS(matrix%local_data(i, j) - 1.0_dp) > eps_abort) THEN
     393             :                         CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     394             :                      ELSE
     395             :                         CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     396             :                      END IF
     397             :                   END IF
     398             :                ELSE
     399             :                   IF (ABS(matrix%local_data(i, j)) > eps_warning) THEN
     400             :                      WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     401             :                         "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     402             :                         "Matrix element (", i, ", ", j, ") = ", matrix%local_data(i, j), &
     403             :                         "The deviation from the expected value 0 is", ABS(matrix%local_data(i, j))
     404             :                      IF (ABS(matrix%local_data(i, j)) > eps_abort) THEN
     405             :                         CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     406             :                      ELSE
     407             :                         CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     408             :                      END IF
     409             :                   END IF
     410             :                END IF
     411             :             END DO
     412             :          END DO
     413             : #endif
     414             :       END IF
     415             : 
     416      271268 :       CALL timestop(handle)
     417             : 
     418      271268 :    END SUBROUTINE check_diag
     419             : 
     420             : ! **************************************************************************************************
     421             : !> \brief   Computes all eigenvalues and vectors of a real symmetric matrix
     422             : !>          significantly faster than syevx, scales also much better.
     423             : !>          Needs workspace to allocate all the eigenvectors
     424             : !> \param matrix ...
     425             : !> \param eigenvectors ...
     426             : !> \param eigenvalues ...
     427             : !> \param info ...
     428             : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     429             : !> \par     info If present returns error code and prevents program stops.
     430             : !>               Works currently only for scalapack.
     431             : !>               Other solvers will end the program regardless of PRESENT(info).
     432             : ! **************************************************************************************************
     433      106493 :    SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     434             : 
     435             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     436             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     437             :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     438             : 
     439             :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd'
     440             : 
     441             :       INTEGER                                  :: handle, myinfo, n, nmo
     442             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
     443             : #if defined(__parallel)
     444             :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     445             : #else
     446             :       CHARACTER(LEN=2*default_string_length)   :: message
     447             :       INTEGER                                  :: liwork, lwork, nl
     448             :       INTEGER, DIMENSION(:), POINTER           :: iwork
     449             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m
     450             :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     451             : #endif
     452             : 
     453      106493 :       CALL timeset(routineN, handle)
     454             : 
     455      106493 :       myinfo = 0
     456             : 
     457      106493 :       n = matrix%matrix_struct%nrow_global
     458      319479 :       ALLOCATE (eig(n))
     459             : 
     460             : #if defined(__parallel)
     461             :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     462             :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     463             :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     464             :       ! to the original matrix if no redistribution is required
     465      106493 :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
     466             : 
     467             :       ! Call scalapack on CPUs that hold the new matrix
     468      106493 :       IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
     469       53910 :          IF (PRESENT(info)) THEN
     470        1107 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
     471             :          ELSE
     472       52803 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
     473             :          END IF
     474             :       END IF
     475             :       ! Redistribute results and clean up
     476      106493 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     477             : #else
     478             :       ! Retrieve the optimal work array sizes first
     479             :       lwork = -1
     480             :       liwork = -1
     481             :       m => matrix%local_data
     482             :       eig(:) = 0.0_dp
     483             : 
     484             :       ALLOCATE (work(1))
     485             :       work(:) = 0.0_dp
     486             :       ALLOCATE (iwork(1))
     487             :       iwork(:) = 0
     488             :       nl = SIZE(m, 1)
     489             : 
     490             :       CALL dsyevd('V', 'U', n, m(1, 1), nl, eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     491             : 
     492             :       IF (myinfo /= 0) THEN
     493             :          WRITE (message, "(A,I0,A)") "ERROR in DSYEVD: Work space query failed (INFO = ", myinfo, ")"
     494             :          IF (PRESENT(info)) THEN
     495             :             CPWARN(TRIM(message))
     496             :          ELSE
     497             :             CPABORT(TRIM(message))
     498             :          END IF
     499             :       END IF
     500             : 
     501             :       ! Reallocate work arrays and perform diagonalisation
     502             :       lwork = INT(work(1))
     503             :       DEALLOCATE (work)
     504             :       ALLOCATE (work(lwork))
     505             :       work(:) = 0.0_dp
     506             : 
     507             :       liwork = iwork(1)
     508             :       DEALLOCATE (iwork)
     509             :       ALLOCATE (iwork(liwork))
     510             :       iwork(:) = 0
     511             : 
     512             :       CALL dsyevd('V', 'U', n, m(1, 1), nl, eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     513             : 
     514             :       IF (myinfo /= 0) THEN
     515             :          WRITE (message, "(A,I0,A)") "ERROR in DSYEVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     516             :          IF (PRESENT(info)) THEN
     517             :             CPWARN(TRIM(message))
     518             :          ELSE
     519             :             CPABORT(TRIM(message))
     520             :          END IF
     521             :       END IF
     522             : 
     523             :       CALL cp_fm_to_fm(matrix, eigenvectors)
     524             : 
     525             :       DEALLOCATE (iwork)
     526             :       DEALLOCATE (work)
     527             : #endif
     528             : 
     529      106493 :       IF (PRESENT(info)) info = myinfo
     530             : 
     531      106493 :       nmo = SIZE(eigenvalues, 1)
     532      106493 :       IF (nmo > n) THEN
     533        4860 :          eigenvalues(1:n) = eig(1:n)
     534             :       ELSE
     535      743842 :          eigenvalues(1:nmo) = eig(1:nmo)
     536             :       END IF
     537             : 
     538      106493 :       DEALLOCATE (eig)
     539             : 
     540      106493 :       CALL check_diag(matrix, eigenvectors, n)
     541             : 
     542      106493 :       CALL timestop(handle)
     543             : 
     544      212986 :    END SUBROUTINE cp_fm_syevd
     545             : 
     546             : ! **************************************************************************************************
     547             : !> \brief ...
     548             : !> \param matrix ...
     549             : !> \param eigenvectors ...
     550             : !> \param eigenvalues ...
     551             : !> \param info ...
     552             : ! **************************************************************************************************
     553       53910 :    SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
     554             : 
     555             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     556             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     557             :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     558             : 
     559             :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd_base'
     560             : 
     561             :       CHARACTER(LEN=2*default_string_length)   :: message
     562             :       INTEGER                                  :: handle, myinfo
     563             : #if defined(__parallel)
     564             :       TYPE(cp_blacs_env_type), POINTER         :: context
     565             :       INTEGER                                  :: liwork, lwork, &
     566             :                                                   mypcol, myprow, n
     567             :       INTEGER, DIMENSION(9)                    :: descm, descv
     568       53910 :       INTEGER, DIMENSION(:), POINTER           :: iwork
     569       53910 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     570       53910 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m, v
     571             : #if defined (__HAS_IEEE_EXCEPTIONS)
     572             :       LOGICAL, DIMENSION(5)                    :: halt
     573             : #endif
     574             : #endif
     575             : 
     576       53910 :       CALL timeset(routineN, handle)
     577             : 
     578       53910 :       myinfo = 0
     579             : 
     580             : #if defined(__parallel)
     581             : 
     582       53910 :       n = matrix%matrix_struct%nrow_global
     583       53910 :       m => matrix%local_data
     584       53910 :       context => matrix%matrix_struct%context
     585       53910 :       myprow = context%mepos(1)
     586       53910 :       mypcol = context%mepos(2)
     587      539100 :       descm(:) = matrix%matrix_struct%descriptor(:)
     588             : 
     589       53910 :       v => eigenvectors%local_data
     590      539100 :       descv(:) = eigenvectors%matrix_struct%descriptor(:)
     591             : 
     592       53910 :       liwork = 7*n + 8*context%num_pe(2) + 2
     593      161730 :       ALLOCATE (iwork(liwork))
     594             : 
     595             :       ! Work space query
     596       53910 :       lwork = -1
     597       53910 :       ALLOCATE (work(1))
     598             : 
     599             :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     600       53910 :                    work(1), lwork, iwork(1), liwork, myinfo)
     601             : 
     602       53910 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     603           0 :          WRITE (message, "(A,I0,A)") "ERROR in PDSYEVD: Work space query failed (INFO = ", myinfo, ")"
     604           0 :          IF (PRESENT(info)) THEN
     605           0 :             CPWARN(TRIM(message))
     606             :          ELSE
     607           0 :             CPABORT(TRIM(message))
     608             :          END IF
     609             :       END IF
     610             : 
     611             :       ! look here for a PDORMTR warning :-)
     612             :       ! this routine seems to need more workspace than reported
     613             :       ! (? lapack bug ...)
     614             :       ! arbitrary additional memory  ... we give 100000 more words
     615             :       ! (it seems to depend on the block size used)
     616             : 
     617       53910 :       lwork = NINT(work(1) + 100000)
     618             :       ! lwork = NINT(work(1))
     619       53910 :       DEALLOCATE (work)
     620      161730 :       ALLOCATE (work(lwork))
     621             : 
     622             :       ! Scalapack takes advantage of IEEE754 exceptions for speedup.
     623             :       ! Therefore, we disable floating point traps temporarily.
     624             : #if defined (__HAS_IEEE_EXCEPTIONS)
     625             :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     626             :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     627             : #endif
     628             : 
     629             :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     630       53910 :                    work(1), lwork, iwork(1), liwork, myinfo)
     631             : 
     632             : #if defined (__HAS_IEEE_EXCEPTIONS)
     633             :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     634             : #endif
     635       53910 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     636           0 :          WRITE (message, "(A,I0,A)") "ERROR in PDSYEVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     637           0 :          IF (PRESENT(info)) THEN
     638           0 :             CPWARN(TRIM(message))
     639             :          ELSE
     640           0 :             CPABORT(TRIM(message))
     641             :          END IF
     642             :       END IF
     643             : 
     644       53910 :       IF (PRESENT(info)) info = myinfo
     645             : 
     646       53910 :       DEALLOCATE (work)
     647       53910 :       DEALLOCATE (iwork)
     648             : #else
     649             :       MARK_USED(matrix)
     650             :       MARK_USED(eigenvectors)
     651             :       MARK_USED(eigenvalues)
     652             :       myinfo = -1
     653             :       IF (PRESENT(info)) info = myinfo
     654             :       message = "ERROR in "//TRIM(routineN)// &
     655             :                 ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support"
     656             :       CPABORT(TRIM(message))
     657             : #endif
     658             : 
     659       53910 :       CALL timestop(handle)
     660             : 
     661       53910 :    END SUBROUTINE cp_fm_syevd_base
     662             : 
     663             : ! **************************************************************************************************
     664             : !> \brief   compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
     665             : !>          If eigenvectors are required this routine will replicate a full matrix on each CPU...
     666             : !>          if more than a handful of vectors are needed, use cp_fm_syevd instead
     667             : !> \param matrix ...
     668             : !> \param eigenvectors ...
     669             : !> \param eigenvalues ...
     670             : !> \param neig ...
     671             : !> \param work_syevx ...
     672             : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     673             : !>          neig   is the number of vectors needed (default all)
     674             : !>          work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
     675             : !>                     reducing this saves time, but might cause the routine to fail
     676             : ! **************************************************************************************************
     677          40 :    SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
     678             : 
     679             :       ! Diagonalise the symmetric n by n matrix using the LAPACK library.
     680             : 
     681             :       TYPE(cp_fm_type), INTENT(IN)                 :: matrix
     682             :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)       :: eigenvectors
     683             :       REAL(KIND=dp), OPTIONAL, INTENT(IN)          :: work_syevx
     684             :       INTEGER, INTENT(IN), OPTIONAL                :: neig
     685             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)     :: eigenvalues
     686             : 
     687             :       CHARACTER(LEN=*), PARAMETER                  :: routineN = "cp_fm_syevx"
     688             : 
     689             : #if defined(__parallel)
     690             :       REAL(KIND=dp), PARAMETER                     :: orfac = -1.0_dp
     691             : #endif
     692             :       REAL(KIND=dp), PARAMETER                     :: vl = 0.0_dp, &
     693             :                                                       vu = 0.0_dp
     694             : 
     695             :       TYPE(cp_blacs_env_type), POINTER             :: context
     696             :       TYPE(cp_logger_type), POINTER                :: logger
     697             :       CHARACTER(LEN=1)                             :: job_type
     698             :       REAL(KIND=dp)                                :: abstol, work_syevx_local
     699             :       INTEGER                                      :: handle, info, &
     700             :                                                       liwork, lwork, m, mypcol, &
     701             :                                                       myprow, n, nb, npcol, &
     702             :                                                       nprow, output_unit, &
     703             :                                                       neig_local
     704             :       LOGICAL                                      :: ionode, needs_evecs
     705          40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: ifail, iwork
     706          40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: w, work
     707          40 :       REAL(KIND=dp), DIMENSION(:, :), POINTER      :: a, z
     708             : 
     709             :       REAL(KIND=dp), EXTERNAL                      :: dlamch
     710             : 
     711             : #if defined(__parallel)
     712             :       INTEGER                                      :: nn, np0, npe, nq0, nz
     713             :       INTEGER, DIMENSION(9)                        :: desca, descz
     714          40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: iclustr
     715          40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: gap
     716             :       INTEGER, EXTERNAL                            :: iceil, numroc
     717             : #if defined (__HAS_IEEE_EXCEPTIONS)
     718             :       LOGICAL, DIMENSION(5)                        :: halt
     719             : #endif
     720             : #else
     721             :       INTEGER                                      :: nla, nlz
     722             :       INTEGER, EXTERNAL                            :: ilaenv
     723             : #endif
     724             : 
     725             :       ! by default all
     726          40 :       n = matrix%matrix_struct%nrow_global
     727          40 :       neig_local = n
     728          40 :       IF (PRESENT(neig)) neig_local = neig
     729          40 :       IF (neig_local == 0) RETURN
     730             : 
     731          40 :       CALL timeset(routineN, handle)
     732             : 
     733          40 :       needs_evecs = PRESENT(eigenvectors)
     734             : 
     735          40 :       logger => cp_get_default_logger()
     736          40 :       ionode = logger%para_env%is_source()
     737          40 :       n = matrix%matrix_struct%nrow_global
     738             : 
     739             :       ! by default allocate all needed space
     740          40 :       work_syevx_local = 1.0_dp
     741          40 :       IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
     742             : 
     743             :       ! set scalapack job type
     744          40 :       IF (needs_evecs) THEN
     745          40 :          job_type = "V"
     746             :       ELSE
     747           0 :          job_type = "N"
     748             :       END IF
     749             : 
     750             :       ! target the most accurate calculation of the eigenvalues
     751          40 :       abstol = 2.0_dp*dlamch("S")
     752             : 
     753          40 :       context => matrix%matrix_struct%context
     754          40 :       myprow = context%mepos(1)
     755          40 :       mypcol = context%mepos(2)
     756          40 :       nprow = context%num_pe(1)
     757          40 :       npcol = context%num_pe(2)
     758             : 
     759         120 :       ALLOCATE (w(n))
     760         560 :       eigenvalues(:) = 0.0_dp
     761             : #if defined(__parallel)
     762             : 
     763          40 :       IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
     764           0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
     765             :       END IF
     766             : 
     767          40 :       a => matrix%local_data
     768         400 :       desca(:) = matrix%matrix_struct%descriptor(:)
     769             : 
     770          40 :       IF (needs_evecs) THEN
     771          40 :          z => eigenvectors%local_data
     772         400 :          descz(:) = eigenvectors%matrix_struct%descriptor(:)
     773             :       ELSE
     774             :          ! z will not be referenced
     775           0 :          z => matrix%local_data
     776           0 :          descz = desca
     777             :       END IF
     778             : 
     779             :       ! Get the optimal work storage size
     780             : 
     781          40 :       npe = nprow*npcol
     782          40 :       nb = matrix%matrix_struct%nrow_block
     783          40 :       nn = MAX(n, nb, 2)
     784          40 :       np0 = numroc(nn, nb, 0, 0, nprow)
     785          40 :       nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
     786             : 
     787          40 :       IF (needs_evecs) THEN
     788             :          lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
     789          40 :                  INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
     790             :       ELSE
     791           0 :          lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
     792             :       END IF
     793          40 :       liwork = 6*MAX(N, npe + 1, 4)
     794             : 
     795         120 :       ALLOCATE (gap(npe))
     796         120 :       gap = 0.0_dp
     797         120 :       ALLOCATE (iclustr(2*npe))
     798         200 :       iclustr = 0
     799         120 :       ALLOCATE (ifail(n))
     800         560 :       ifail = 0
     801         120 :       ALLOCATE (iwork(liwork))
     802         120 :       ALLOCATE (work(lwork))
     803             : 
     804             :       ! Scalapack takes advantage of IEEE754 exceptions for speedup.
     805             :       ! Therefore, we disable floating point traps temporarily.
     806             : #if defined (__HAS_IEEE_EXCEPTIONS)
     807             :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     808             :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     809             : #endif
     810             : 
     811             :       CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
     812          40 :                    z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
     813             : 
     814             : #if defined (__HAS_IEEE_EXCEPTIONS)
     815             :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     816             : #endif
     817             : 
     818             :       ! Error handling
     819             : 
     820          40 :       IF (info /= 0) THEN
     821           0 :          IF (ionode) THEN
     822           0 :             output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     823             :             WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     824           0 :                "info    = ", info, &
     825           0 :                "lwork   = ", lwork, &
     826           0 :                "liwork  = ", liwork, &
     827           0 :                "nz      = ", nz
     828           0 :             IF (info > 0) THEN
     829             :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     830           0 :                   "ifail   = ", ifail
     831             :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     832           0 :                   "iclustr = ", iclustr
     833             :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
     834           0 :                   "gap     = ", gap
     835             :             END IF
     836             :          END IF
     837           0 :          CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
     838             :       END IF
     839             : 
     840             :       ! Release work storage
     841             : 
     842          40 :       DEALLOCATE (gap)
     843          40 :       DEALLOCATE (iclustr)
     844          40 :       DEALLOCATE (ifail)
     845          40 :       DEALLOCATE (iwork)
     846          40 :       DEALLOCATE (work)
     847             : 
     848             : #else
     849             : 
     850             :       a => matrix%local_data
     851             :       IF (needs_evecs) THEN
     852             :          z => eigenvectors%local_data
     853             :       ELSE
     854             :          ! z will not be referenced
     855             :          z => matrix%local_data
     856             :       END IF
     857             : 
     858             :       ! Get the optimal work storage size
     859             : 
     860             :       nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
     861             :                ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
     862             : 
     863             :       lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
     864             :       liwork = 5*n
     865             : 
     866             :       ALLOCATE (ifail(n))
     867             :       ifail = 0
     868             :       ALLOCATE (iwork(liwork))
     869             :       ALLOCATE (work(lwork))
     870             :       info = 0
     871             :       nla = SIZE(a, 1)
     872             :       nlz = SIZE(z, 1)
     873             : 
     874             :       CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
     875             :                   abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
     876             : 
     877             :       ! Error handling
     878             : 
     879             :       IF (info /= 0) THEN
     880             :          output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     881             :          WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     882             :             "info    = ", info
     883             :          IF (info > 0) THEN
     884             :             WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     885             :                "ifail   = ", ifail
     886             :          END IF
     887             :          CPABORT("Error in DSYEVX (ScaLAPACK)")
     888             :       END IF
     889             : 
     890             :       ! Release work storage
     891             : 
     892             :       DEALLOCATE (ifail)
     893             :       DEALLOCATE (iwork)
     894             :       DEALLOCATE (work)
     895             : 
     896             : #endif
     897         400 :       eigenvalues(1:neig_local) = w(1:neig_local)
     898          40 :       DEALLOCATE (w)
     899             : 
     900          40 :       IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
     901             : 
     902          40 :       CALL timestop(handle)
     903             : 
     904         120 :    END SUBROUTINE cp_fm_syevx
     905             : 
     906             : ! **************************************************************************************************
     907             : !> \brief decomposes a quadratic matrix into its singular value decomposition
     908             : !> \param matrix_a ...
     909             : !> \param matrix_eigvl ...
     910             : !> \param matrix_eigvr_t ...
     911             : !> \param eigval ...
     912             : !> \param info ...
     913             : !> \author Maximilian Graml
     914             : ! **************************************************************************************************
     915         100 :    SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
     916             : 
     917             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     918             :       TYPE(cp_fm_type), INTENT(INOUT)          :: matrix_eigvl, matrix_eigvr_t
     919             :       REAL(KIND=dp), DIMENSION(:), POINTER, &
     920             :          INTENT(INOUT)                         :: eigval
     921             :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     922             : 
     923             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
     924             : 
     925             :       CHARACTER(LEN=2*default_string_length)   :: message
     926             :       INTEGER                                  :: handle, n, m, myinfo, lwork
     927         100 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     928             :       TYPE(cp_fm_type)                         :: matrix_lu
     929         100 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
     930             : 
     931             : #if defined(__parallel)
     932             :       INTEGER, DIMENSION(9)                    :: desca, descu, descvt
     933             : #endif
     934         100 :       CALL timeset(routineN, handle)
     935             : 
     936             :       CALL cp_fm_create(matrix=matrix_lu, &
     937             :                         matrix_struct=matrix_a%matrix_struct, &
     938         100 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     939         100 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     940         100 :       a => matrix_lu%local_data
     941         100 :       m = matrix_lu%matrix_struct%nrow_global
     942         100 :       n = matrix_lu%matrix_struct%ncol_global
     943             :       ! Assert that incoming matrix is quadratic
     944         100 :       CPASSERT(m == n)
     945             : 
     946             :       ! Prepare for workspace queries
     947         100 :       myinfo = 0
     948         100 :       lwork = -1
     949         100 :       ALLOCATE (work(1))
     950         200 :       work(:) = 0.0_dp
     951             : #if defined(__parallel)
     952             :       ! To do: That might need a redistribution check as in cp_fm_syevd
     953        1000 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     954        1000 :       descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
     955        1000 :       descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
     956             : 
     957             :       ! Workspace query
     958             :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     959         100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     960             : 
     961         100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     962           0 :          WRITE (message, "(A,I0,A)") "ERROR in PDGESVD: Work space query failed (INFO = ", myinfo, ")"
     963           0 :          IF (PRESENT(info)) THEN
     964           0 :             CPWARN(TRIM(message))
     965             :          ELSE
     966           0 :             CPABORT(TRIM(message))
     967             :          END IF
     968             :       END IF
     969             : 
     970         100 :       lwork = INT(work(1))
     971         100 :       DEALLOCATE (work)
     972         300 :       ALLOCATE (work(lwork))
     973             :       ! SVD
     974             :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     975         100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     976             : 
     977         100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     978           0 :          WRITE (message, "(A,I0,A)") "ERROR in PDGESVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     979           0 :          IF (PRESENT(info)) THEN
     980           0 :             CPWARN(TRIM(message))
     981             :          ELSE
     982           0 :             CPABORT(TRIM(message))
     983             :          END IF
     984             :       END IF
     985             : #else
     986             :       ! Workspace query
     987             :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     988             :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     989             : 
     990             :       IF (myinfo /= 0) THEN
     991             :          WRITE (message, "(A,I0,A)") "ERROR in DGESVD: Work space query failed (INFO = ", myinfo, ")"
     992             :          IF (PRESENT(info)) THEN
     993             :             CPWARN(TRIM(message))
     994             :          ELSE
     995             :             CPABORT(TRIM(message))
     996             :          END IF
     997             :       END IF
     998             : 
     999             :       ! SVD
    1000             :       lwork = INT(work(1))
    1001             :       DEALLOCATE (work)
    1002             :       ALLOCATE (work(lwork))
    1003             :       work(:) = 0.0_dp
    1004             : 
    1005             :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
    1006             :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
    1007             : 
    1008             :       IF (myinfo /= 0) THEN
    1009             :          WRITE (message, "(A,I0,A)") "ERROR in DGESVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
    1010             :          IF (PRESENT(info)) THEN
    1011             :             CPWARN(TRIM(message))
    1012             :          ELSE
    1013             :             CPABORT(TRIM(message))
    1014             :          END IF
    1015             :       END IF
    1016             : 
    1017             : #endif
    1018             :       ! Release intermediary matrices
    1019         100 :       DEALLOCATE (work)
    1020         100 :       CALL cp_fm_release(matrix_lu)
    1021             : 
    1022         100 :       IF (PRESENT(info)) info = myinfo
    1023             : 
    1024         100 :       CALL timestop(handle)
    1025         100 :    END SUBROUTINE cp_fm_svd
    1026             : 
    1027             : ! **************************************************************************************************
    1028             : !> \brief ...
    1029             : !> \param matrix ...
    1030             : !> \param work ...
    1031             : !> \param exponent ...
    1032             : !> \param threshold ...
    1033             : !> \param n_dependent ...
    1034             : !> \param verbose ...
    1035             : !> \param eigvals ...
    1036             : ! **************************************************************************************************
    1037        1280 :    SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
    1038             : 
    1039             :       ! Raise the real symmetric n by n matrix to the power given by
    1040             :       ! the exponent. All eigenvectors with a corresponding eigenvalue lower
    1041             :       ! than threshold are quenched. result in matrix
    1042             : 
    1043             :       ! - Creation (29.03.1999, Matthias Krack)
    1044             :       ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
    1045             : 
    1046             :       TYPE(cp_fm_type), INTENT(IN)               :: matrix, work
    1047             :       REAL(KIND=dp), INTENT(IN)                  :: exponent, threshold
    1048             :       INTEGER, INTENT(OUT)                       :: n_dependent
    1049             :       LOGICAL, INTENT(IN), OPTIONAL              :: verbose
    1050             :       REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
    1051             :          OPTIONAL                                :: eigvals
    1052             : 
    1053             :       CHARACTER(LEN=*), PARAMETER                :: routineN = 'cp_fm_power'
    1054             : 
    1055             :       INTEGER                                    :: handle, icol_global, &
    1056             :                                                     mypcol, myprow, &
    1057             :                                                     ncol_global, npcol, nprow, &
    1058             :                                                     nrow_global
    1059             :       LOGICAL                                    :: my_verbose
    1060             :       REAL(KIND=dp)                              :: condition_number, f, p
    1061             :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE   :: eigenvalues
    1062        1280 :       REAL(KIND=dp), DIMENSION(:, :), POINTER    :: eigenvectors
    1063             :       TYPE(cp_blacs_env_type), POINTER           :: context
    1064             : 
    1065             : #if defined(__parallel)
    1066             :       INTEGER           :: icol_local, ipcol, iprow, irow_global, &
    1067             :                            irow_local
    1068             : #endif
    1069             : 
    1070        1280 :       CALL timeset(routineN, handle)
    1071             : 
    1072        1280 :       my_verbose = .FALSE.
    1073        1280 :       IF (PRESENT(verbose)) my_verbose = verbose
    1074             : 
    1075        1280 :       context => matrix%matrix_struct%context
    1076        1280 :       myprow = context%mepos(1)
    1077        1280 :       mypcol = context%mepos(2)
    1078        1280 :       nprow = context%num_pe(1)
    1079        1280 :       npcol = context%num_pe(2)
    1080        1280 :       n_dependent = 0
    1081        1280 :       p = 0.5_dp*exponent
    1082             : 
    1083        1280 :       nrow_global = matrix%matrix_struct%nrow_global
    1084        1280 :       ncol_global = matrix%matrix_struct%ncol_global
    1085             : 
    1086        3840 :       ALLOCATE (eigenvalues(ncol_global))
    1087             : 
    1088       31193 :       eigenvalues(:) = 0.0_dp
    1089             : 
    1090             :       ! Compute the eigenvectors and eigenvalues
    1091             : 
    1092        1280 :       CALL choose_eigv_solver(matrix, work, eigenvalues)
    1093             : 
    1094        1280 :       IF (PRESENT(eigvals)) THEN
    1095         354 :          eigvals(1) = eigenvalues(1)
    1096         354 :          eigvals(2) = eigenvalues(ncol_global)
    1097             :       END IF
    1098             : 
    1099             : #if defined(__parallel)
    1100        1280 :       eigenvectors => work%local_data
    1101             : 
    1102             :       ! Build matrix**exponent with eigenvector quenching
    1103             : 
    1104       31193 :       DO icol_global = 1, ncol_global
    1105             : 
    1106       31193 :          IF (eigenvalues(icol_global) < threshold) THEN
    1107             : 
    1108          38 :             n_dependent = n_dependent + 1
    1109             : 
    1110          38 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1111             : 
    1112          38 :             IF (mypcol == ipcol) THEN
    1113          38 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1114        1422 :                DO irow_global = 1, nrow_global
    1115        1384 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1116        1422 :                   IF (myprow == iprow) THEN
    1117         692 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1118         692 :                      eigenvectors(irow_local, icol_local) = 0.0_dp
    1119             :                   END IF
    1120             :                END DO
    1121             :             END IF
    1122             : 
    1123             :          ELSE
    1124             : 
    1125       29875 :             f = eigenvalues(icol_global)**p
    1126             : 
    1127       29875 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1128             : 
    1129       29875 :             IF (mypcol == ipcol) THEN
    1130       29875 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1131     1908080 :                DO irow_global = 1, nrow_global
    1132     1878205 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1133     1908080 :                   IF (myprow == iprow) THEN
    1134      983177 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1135             :                      eigenvectors(irow_local, icol_local) = &
    1136      983177 :                         f*eigenvectors(irow_local, icol_local)
    1137             :                   END IF
    1138             :                END DO
    1139             :             END IF
    1140             : 
    1141             :          END IF
    1142             : 
    1143             :       END DO
    1144             : 
    1145             : #else
    1146             : 
    1147             :       eigenvectors => work%local_data
    1148             : 
    1149             :       ! Build matrix**exponent with eigenvector quenching
    1150             : 
    1151             :       DO icol_global = 1, ncol_global
    1152             : 
    1153             :          IF (eigenvalues(icol_global) < threshold) THEN
    1154             : 
    1155             :             n_dependent = n_dependent + 1
    1156             :             eigenvectors(1:nrow_global, icol_global) = 0.0_dp
    1157             : 
    1158             :          ELSE
    1159             : 
    1160             :             f = eigenvalues(icol_global)**p
    1161             :             eigenvectors(1:nrow_global, icol_global) = &
    1162             :                f*eigenvectors(1:nrow_global, icol_global)
    1163             : 
    1164             :          END IF
    1165             : 
    1166             :       END DO
    1167             : 
    1168             : #endif
    1169        1280 :       CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
    1170        1280 :       CALL cp_fm_uplo_to_full(matrix, work)
    1171             : 
    1172             :       ! Print some warnings/notes
    1173        1280 :       IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
    1174           0 :          condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
    1175             :          WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
    1176           0 :             "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
    1177           0 :             "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
    1178           0 :             "CP_FM_POWER: condition number:   ", condition_number
    1179           0 :          IF (eigenvalues(1) <= 0.0_dp) THEN
    1180             :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1181           0 :                "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
    1182             :          END IF
    1183           0 :          IF (condition_number > 1.0E12_dp) THEN
    1184             :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1185           0 :                "WARNING: high condition number => possibly ill-conditioned matrix"
    1186             :          END IF
    1187             :       END IF
    1188             : 
    1189        1280 :       DEALLOCATE (eigenvalues)
    1190             : 
    1191        1280 :       CALL timestop(handle)
    1192             : 
    1193        1280 :    END SUBROUTINE cp_fm_power
    1194             : 
    1195             : ! **************************************************************************************************
    1196             : !> \brief ...
    1197             : !> \param matrix ...
    1198             : !> \param eigenvectors ...
    1199             : !> \param eigval ...
    1200             : !> \param thresh ...
    1201             : !> \param start_sec_block ...
    1202             : ! **************************************************************************************************
    1203          18 :    SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, &
    1204             :                                  start_sec_block)
    1205             : 
    1206             :       ! Calculates block diagonalization of a full symmetric matrix
    1207             :       ! It has its origin in cp_fm_syevx. This routine rotates only elements
    1208             :       ! which are larger than a threshold values "thresh".
    1209             :       ! start_sec_block is the start of the second block.
    1210             :       ! IT DOES ONLY ONE SWEEP!
    1211             : 
    1212             :       ! - Creation (07.10.2002, Martin Fengler)
    1213             :       ! - Cosmetics (05.04.06, MK)
    1214             : 
    1215             :       TYPE(cp_fm_type), INTENT(IN)              :: eigenvectors, matrix
    1216             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)   :: eigval
    1217             :       INTEGER, INTENT(IN)                       :: start_sec_block
    1218             :       REAL(KIND=dp), INTENT(IN)                 :: thresh
    1219             : 
    1220             :       CHARACTER(len=*), PARAMETER               :: routineN = 'cp_fm_block_jacobi'
    1221             : 
    1222             :       INTEGER :: handle
    1223             :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a, ev
    1224             : 
    1225             :       REAL(KIND=dp) :: tan_theta, tau, c, s
    1226             :       INTEGER  :: q, p, N
    1227          18 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE  :: c_ip
    1228             : 
    1229             : #if defined(__parallel)
    1230             :       TYPE(cp_blacs_env_type), POINTER :: context
    1231             : 
    1232             :       INTEGER :: myprow, mypcol, nprow, npcol, block_dim_row, block_dim_col, &
    1233             :                  info, ev_row_block_size, iam, mynumrows, mype, npe, &
    1234             :                  q_loc
    1235          18 :       REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE  :: a_loc, ev_loc
    1236             :       INTEGER, DIMENSION(9)                        :: desca, descz, desc_a_block, &
    1237             :                                                       desc_ev_loc
    1238             :       TYPE(mp_comm_type):: allgrp
    1239             :       TYPE(cp_blacs_type) :: ictxt_loc
    1240             :       INTEGER, EXTERNAL :: numroc
    1241             : #endif
    1242             : 
    1243             :       ! -------------------------------------------------------------------------
    1244             : 
    1245          18 :       CALL timeset(routineN, handle)
    1246             : 
    1247             : #if defined(__parallel)
    1248          18 :       context => matrix%matrix_struct%context
    1249          18 :       allgrp = matrix%matrix_struct%para_env
    1250             : 
    1251          18 :       myprow = context%mepos(1)
    1252          18 :       mypcol = context%mepos(2)
    1253          18 :       nprow = context%num_pe(1)
    1254          18 :       npcol = context%num_pe(2)
    1255             : 
    1256          18 :       N = matrix%matrix_struct%nrow_global
    1257             : 
    1258          18 :       A => matrix%local_data
    1259         180 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1260          18 :       EV => eigenvectors%local_data
    1261         180 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1262             : 
    1263             :       ! Copy the block to be rotated to the master process firstly and broadcast to all processes
    1264             :       ! start_sec_block defines where the second block starts!
    1265             :       ! Block will be processed together with the OO block
    1266             : 
    1267          18 :       block_dim_row = start_sec_block - 1
    1268          18 :       block_dim_col = N - block_dim_row
    1269          72 :       ALLOCATE (A_loc(block_dim_row, block_dim_col))
    1270             : 
    1271          18 :       mype = matrix%matrix_struct%para_env%mepos
    1272          18 :       npe = matrix%matrix_struct%para_env%num_pe
    1273             :       ! Get a new context
    1274          18 :       CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', NPROW*NPCOL, 1)
    1275             : 
    1276             :       CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
    1277          18 :                     block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
    1278             : 
    1279             :       CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
    1280          18 :                     A_loc, 1, 1, desc_a_block, context%get_handle())
    1281             :       ! Only the master (root) process received data yet
    1282          18 :       CALL allgrp%bcast(A_loc, 0)
    1283             : 
    1284             :       ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
    1285             :       ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
    1286             : 
    1287             :       ! Initialize distribution of the eigenvectors
    1288          18 :       iam = mype
    1289          18 :       ev_row_block_size = n/(nprow*npcol)
    1290          18 :       mynumrows = NUMROC(N, ev_row_block_size, iam, 0, NPROW*NPCOL)
    1291             : 
    1292         108 :       ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
    1293             : 
    1294             :       CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
    1295          18 :                     mynumrows, info)
    1296             : 
    1297          18 :       CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
    1298             : 
    1299             :       ! Start block diagonalization of matrix
    1300             : 
    1301          18 :       q_loc = 0
    1302             : 
    1303        1170 :       DO q = start_sec_block, N
    1304        1152 :          q_loc = q_loc + 1
    1305      148626 :          DO p = 1, (start_sec_block - 1)
    1306             : 
    1307      148608 :             IF (ABS(A_loc(p, q_loc)) > thresh) THEN
    1308             : 
    1309      117566 :                tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
    1310             : 
    1311      117566 :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1312             : 
    1313             :                ! Cos(theta)
    1314      117566 :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1315      117566 :                s = tan_theta*c
    1316             : 
    1317             :                ! Calculate eigenvectors: Q*J
    1318             :                ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
    1319             :                ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
    1320             :                ! EV(:,p) = c_ip
    1321             :                ! EV(:,q) = c_iq
    1322      117566 :                CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
    1323      117566 :                CALL dscal(mynumrows, c, EV_loc(1, p), 1)
    1324      117566 :                CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
    1325      117566 :                CALL dscal(mynumrows, c, EV_loc(1, q), 1)
    1326      117566 :                CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
    1327             : 
    1328             :             END IF
    1329             : 
    1330             :          END DO
    1331             :       END DO
    1332             : 
    1333             :       ! Copy eigenvectors back to the original distribution
    1334          18 :       CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
    1335             : 
    1336             :       ! Release work storage
    1337          18 :       DEALLOCATE (A_loc, EV_loc, c_ip)
    1338             : 
    1339          18 :       CALL ictxt_loc%gridexit()
    1340             : 
    1341             : #else
    1342             : 
    1343             :       N = matrix%matrix_struct%nrow_global
    1344             : 
    1345             :       ALLOCATE (c_ip(N)) ! Local eigenvalue vector
    1346             : 
    1347             :       A => matrix%local_data ! Contains the Matrix to be worked on
    1348             :       EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
    1349             : 
    1350             :       ! Start matrix diagonalization
    1351             : 
    1352             :       tan_theta = 0.0_dp
    1353             :       tau = 0.0_dp
    1354             : 
    1355             :       DO q = start_sec_block, N
    1356             :          DO p = 1, (start_sec_block - 1)
    1357             : 
    1358             :             IF (ABS(A(p, q)) > thresh) THEN
    1359             : 
    1360             :                tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
    1361             : 
    1362             :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1363             : 
    1364             :                ! Cos(theta)
    1365             :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1366             :                s = tan_theta*c
    1367             : 
    1368             :                ! Calculate eigenvectors: Q*J
    1369             :                ! c_ip = c*EV(:,p) - s*EV(:,q)
    1370             :                ! c_iq = s*EV(:,p) + c*EV(:,q)
    1371             :                ! EV(:,p) = c_ip
    1372             :                ! EV(:,q) = c_iq
    1373             :                CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
    1374             :                CALL dscal(N, c, EV(1, p), 1)
    1375             :                CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
    1376             :                CALL dscal(N, c, EV(1, q), 1)
    1377             :                CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
    1378             : 
    1379             :             END IF
    1380             : 
    1381             :          END DO
    1382             :       END DO
    1383             : 
    1384             :       ! Release work storage
    1385             : 
    1386             :       DEALLOCATE (c_ip)
    1387             : 
    1388             : #endif
    1389             : 
    1390          18 :       CALL timestop(handle)
    1391             : 
    1392          90 :    END SUBROUTINE cp_fm_block_jacobi
    1393             : 
    1394             : ! **************************************************************************************************
    1395             : !> \brief General Eigenvalue Problem  AX = BXE
    1396             : !>        Single option version: Cholesky decomposition of B
    1397             : !> \param amatrix ...
    1398             : !> \param bmatrix ...
    1399             : !> \param eigenvectors ...
    1400             : !> \param eigenvalues ...
    1401             : !> \param work ...
    1402             : ! **************************************************************************************************
    1403         268 :    SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
    1404             : 
    1405             :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1406             :       REAL(KIND=dp), DIMENSION(:)                        :: eigenvalues
    1407             :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1408             : 
    1409             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig'
    1410             : 
    1411             :       INTEGER                                            :: handle, nao, nmo
    1412             : 
    1413         268 :       CALL timeset(routineN, handle)
    1414             : 
    1415         268 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1416         268 :       nmo = SIZE(eigenvalues)
    1417             : 
    1418         268 :       IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. nao >= 64) THEN
    1419             :          ! Use cuSolverMP generalized eigenvalue solver for large matrices
    1420           0 :          CALL cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
    1421             : #if defined(__DLAF)
    1422             :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
    1423             :          ! Use DLA-Future generalized eigenvalue solver for large matrices
    1424             :          CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
    1425             : #endif
    1426             :       ELSE
    1427             :          ! Cholesky decompose S=U(T)U
    1428         268 :          CALL cp_fm_cholesky_decompose(bmatrix)
    1429             :          ! Invert to get U^(-1)
    1430         268 :          CALL cp_fm_triangular_invert(bmatrix)
    1431             :          ! Reduce to get U^(-T) * H * U^(-1)
    1432         268 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
    1433         268 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
    1434             :          ! Diagonalize
    1435             :          CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
    1436         268 :                                  eigenvalues=eigenvalues)
    1437             :          ! Restore vectors C = U^(-1) * C*
    1438         268 :          CALL cp_fm_triangular_multiply(bmatrix, work)
    1439         268 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1440             :       END IF
    1441             : 
    1442         268 :       CALL timestop(handle)
    1443             : 
    1444         268 :    END SUBROUTINE cp_fm_geeig
    1445             : 
    1446             : ! **************************************************************************************************
    1447             : !> \brief General Eigenvalue Problem  AX = BXE
    1448             : !>        Use canonical diagonalization : U*s**(-1/2)
    1449             : !> \param amatrix ...
    1450             : !> \param bmatrix ...
    1451             : !> \param eigenvectors ...
    1452             : !> \param eigenvalues ...
    1453             : !> \param work ...
    1454             : !> \param epseig ...
    1455             : ! **************************************************************************************************
    1456          76 :    SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
    1457             : 
    1458             :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1459             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1460             :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1461             :       REAL(KIND=dp), INTENT(IN)                          :: epseig
    1462             : 
    1463             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_canon'
    1464             : 
    1465             :       INTEGER                                            :: handle, i, icol, irow, nao, nc, ncol, &
    1466             :                                                             nmo, nx
    1467             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: evals
    1468             : 
    1469          76 :       CALL timeset(routineN, handle)
    1470             : 
    1471             :       ! Test sizees
    1472          76 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1473          76 :       nmo = SIZE(eigenvalues)
    1474         228 :       ALLOCATE (evals(nao))
    1475             : 
    1476             :       ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
    1477          76 :       CALL cp_fm_scale(-1.0_dp, bmatrix)
    1478          76 :       CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
    1479        4820 :       evals(:) = -evals(:)
    1480          76 :       nc = nao
    1481        4528 :       DO i = 1, nao
    1482        4528 :          IF (evals(i) < epseig) THEN
    1483          40 :             nc = i - 1
    1484          40 :             EXIT
    1485             :          END IF
    1486             :       END DO
    1487          76 :       CPASSERT(nc /= 0)
    1488             : 
    1489          76 :       IF (nc /= nao) THEN
    1490          40 :          IF (nc < nmo) THEN
    1491             :             ! Copy NULL space definition to last vectors of eigenvectors (if needed)
    1492           0 :             ncol = nmo - nc
    1493           0 :             CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
    1494             :          END IF
    1495             :          ! Set NULL space in eigenvector matrix of S to zero
    1496         332 :          DO icol = nc + 1, nao
    1497       36172 :             DO irow = 1, nao
    1498       36132 :                CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
    1499             :             END DO
    1500             :          END DO
    1501             :          ! Set small eigenvalues to a dummy save value
    1502         332 :          evals(nc + 1:nao) = 1.0_dp
    1503             :       END IF
    1504             :       ! Calculate U*s**(-1/2)
    1505        4820 :       evals(:) = 1.0_dp/SQRT(evals(:))
    1506          76 :       CALL cp_fm_column_scale(work, evals)
    1507             :       ! Reduce to get U^(-T) * H * U^(-1)
    1508          76 :       CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
    1509          76 :       CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
    1510          76 :       IF (nc /= nao) THEN
    1511             :          ! set diagonal values to save large value
    1512         332 :          DO icol = nc + 1, nao
    1513         332 :             CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
    1514             :          END DO
    1515             :       END IF
    1516             :       ! Diagonalize
    1517          76 :       CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
    1518          76 :       nx = MIN(nc, nmo)
    1519             :       ! Restore vectors C = U^(-1) * C*
    1520          76 :       CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
    1521             : 
    1522          76 :       DEALLOCATE (evals)
    1523             : 
    1524          76 :       CALL timestop(handle)
    1525             : 
    1526          76 :    END SUBROUTINE cp_fm_geeig_canon
    1527             : 
    1528             : END MODULE cp_fm_diag

Generated by: LCOV version 1.15