LCOV - code coverage report
Current view: top level - src/fm - cp_fm_elpa.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 134 156 85.9 %
Date: 2024-11-22 07:00:40 Functions: 7 7 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Wrapper for ELPA
      10             : !> \author Ole Schuett
      11             : ! **************************************************************************************************
      12             : MODULE cp_fm_elpa
      13             :    USE cp_log_handling, ONLY: cp_to_string
      14             :    USE machine, ONLY: m_cpuid, &
      15             :                       MACHINE_X86, &
      16             :                       MACHINE_CPU_GENERIC, &
      17             :                       MACHINE_X86_SSE4, &
      18             :                       MACHINE_X86_AVX, &
      19             :                       MACHINE_X86_AVX2
      20             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21             :    USE cp_fm_basic_linalg, ONLY: cp_fm_upper_to_full
      22             :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
      23             :                                cp_fm_redistribute_end, &
      24             :                                cp_fm_redistribute_info
      25             :    USE cp_fm_struct, ONLY: cp_fm_struct_get
      26             :    USE cp_fm_types, ONLY: cp_fm_type, &
      27             :                           cp_fm_to_fm, &
      28             :                           cp_fm_release, &
      29             :                           cp_fm_create, &
      30             :                           cp_fm_write_info
      31             :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      32             :                               cp_logger_get_default_io_unit, &
      33             :                               cp_logger_type
      34             :    USE kinds, ONLY: default_string_length, &
      35             :                     dp
      36             :    USE message_passing, ONLY: mp_comm_type
      37             :    USE OMP_LIB, ONLY: omp_get_max_threads
      38             : 
      39             : #include "../base/base_uses.f90"
      40             : 
      41             : #if defined (__ELPA)
      42             :    USE elpa_constants, ONLY: ELPA_2STAGE_REAL_INVALID, &
      43             :                              ELPA_2STAGE_REAL_DEFAULT, &
      44             :                              ELPA_2STAGE_REAL_GENERIC, &
      45             :                              ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      46             :                              ELPA_2STAGE_REAL_BGP, &
      47             :                              ELPA_2STAGE_REAL_BGQ, &
      48             :                              ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      49             :                              ELPA_2STAGE_REAL_SSE_BLOCK2, &
      50             :                              ELPA_2STAGE_REAL_SSE_BLOCK4, &
      51             :                              ELPA_2STAGE_REAL_SSE_BLOCK6, &
      52             :                              ELPA_2STAGE_REAL_AVX_BLOCK2, &
      53             :                              ELPA_2STAGE_REAL_AVX_BLOCK4, &
      54             :                              ELPA_2STAGE_REAL_AVX_BLOCK6, &
      55             :                              ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      56             :                              ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      57             :                              ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      58             :                              ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      59             :                              ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      60             :                              ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      61             :                              ELPA_2STAGE_REAL_NVIDIA_GPU, &
      62             :                              ELPA_2STAGE_REAL_AMD_GPU, &
      63             :                              ELPA_2STAGE_REAL_INTEL_GPU_SYCL
      64             : 
      65             :    USE elpa, ONLY: elpa_t, elpa_solver_2stage, &
      66             :                    elpa_init, elpa_uninit, &
      67             :                    elpa_allocate, elpa_deallocate, elpa_ok
      68             : #endif
      69             : 
      70             :    IMPLICIT NONE
      71             : 
      72             :    PRIVATE
      73             : 
      74             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
      75             : 
      76             : #if defined(__ELPA)
      77             :    INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
      78             :                                         ELPA_2STAGE_REAL_INVALID, & ! auto
      79             :                                         ELPA_2STAGE_REAL_GENERIC, &
      80             :                                         ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      81             :                                         ELPA_2STAGE_REAL_BGP, &
      82             :                                         ELPA_2STAGE_REAL_BGQ, &
      83             :                                         ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      84             :                                         ELPA_2STAGE_REAL_SSE_BLOCK2, &
      85             :                                         ELPA_2STAGE_REAL_SSE_BLOCK4, &
      86             :                                         ELPA_2STAGE_REAL_SSE_BLOCK6, &
      87             :                                         ELPA_2STAGE_REAL_AVX_BLOCK2, &
      88             :                                         ELPA_2STAGE_REAL_AVX_BLOCK4, &
      89             :                                         ELPA_2STAGE_REAL_AVX_BLOCK6, &
      90             :                                         ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      91             :                                         ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      92             :                                         ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      93             :                                         ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      94             :                                         ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      95             :                                         ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      96             :                                         ELPA_2STAGE_REAL_NVIDIA_GPU, &
      97             :                                         ELPA_2STAGE_REAL_AMD_GPU, &
      98             :                                         ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
      99             : 
     100             :    CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     101             :       elpa_kernel_names = [CHARACTER(len=14) :: &
     102             :                            "AUTO", &
     103             :                            "GENERIC", &
     104             :                            "GENERIC_SIMPLE", &
     105             :                            "BGP", &
     106             :                            "BGQ", &
     107             :                            "SSE", &
     108             :                            "SSE_BLOCK2", &
     109             :                            "SSE_BLOCK4", &
     110             :                            "SSE_BLOCK6", &
     111             :                            "AVX_BLOCK2", &
     112             :                            "AVX_BLOCK4", &
     113             :                            "AVX_BLOCK6", &
     114             :                            "AVX2_BLOCK2", &
     115             :                            "AVX2_BLOCK4", &
     116             :                            "AVX2_BLOCK6", &
     117             :                            "AVX512_BLOCK2", &
     118             :                            "AVX512_BLOCK4", &
     119             :                            "AVX512_BLOCK6", &
     120             :                            "NVIDIA_GPU", &
     121             :                            "AMD_GPU", &
     122             :                            "INTEL_GPU"]
     123             : 
     124             :    CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     125             :       elpa_kernel_descriptions = [CHARACTER(len=44) :: &
     126             :                                   "Automatically selected kernel", &
     127             :                                   "Generic kernel", &
     128             :                                   "Simplified generic kernel", &
     129             :                                   "Kernel optimized for IBM BGP", &
     130             :                                   "Kernel optimized for IBM BGQ", &
     131             :                                   "Kernel optimized for x86_64/SSE", &
     132             :                                   "Kernel optimized for x86_64/SSE (block=2)", &
     133             :                                   "Kernel optimized for x86_64/SSE (block=4)", &
     134             :                                   "Kernel optimized for x86_64/SSE (block=6)", &
     135             :                                   "Kernel optimized for Intel AVX (block=2)", &
     136             :                                   "Kernel optimized for Intel AVX (block=4)", &
     137             :                                   "Kernel optimized for Intel AVX (block=6)", &
     138             :                                   "Kernel optimized for Intel AVX2 (block=2)", &
     139             :                                   "Kernel optimized for Intel AVX2 (block=4)", &
     140             :                                   "Kernel optimized for Intel AVX2 (block=6)", &
     141             :                                   "Kernel optimized for Intel AVX-512 (block=2)", &
     142             :                                   "Kernel optimized for Intel AVX-512 (block=4)", &
     143             :                                   "Kernel optimized for Intel AVX-512 (block=6)", &
     144             :                                   "Kernel targeting Nvidia GPUs", &
     145             :                                   "Kernel targeting AMD GPUs", &
     146             :                                   "Kernel targeting Intel GPUs"]
     147             : 
     148             : #else
     149             :    INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
     150             :    CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
     151             :    CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
     152             : #endif
     153             : 
     154             : #if defined(__ELPA)
     155             :    INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
     156             : #endif
     157             :    LOGICAL, SAVE :: elpa_qr = .FALSE., &
     158             :                     elpa_qr_unsafe = .FALSE., &
     159             :                     elpa_should_print = .FALSE.
     160             : 
     161             :    PUBLIC :: cp_fm_diag_elpa, &
     162             :              set_elpa_kernel, &
     163             :              set_elpa_qr, &
     164             :              set_elpa_print, &
     165             :              elpa_kernel_ids, &
     166             :              elpa_kernel_names, &
     167             :              elpa_kernel_descriptions, &
     168             :              initialize_elpa_library, &
     169             :              finalize_elpa_library
     170             : 
     171             : CONTAINS
     172             : 
     173             : ! **************************************************************************************************
     174             : !> \brief Initialize the ELPA library
     175             : ! **************************************************************************************************
     176        8939 :    SUBROUTINE initialize_elpa_library()
     177             : #if defined(__ELPA)
     178        8939 :       IF (elpa_init(20180525) /= elpa_ok) &
     179           0 :          CPABORT("The linked ELPA library does not support the required API version")
     180             : #else
     181             :       CPABORT("Initialization of ELPA library requested but not enabled during build")
     182             : #endif
     183        8939 :    END SUBROUTINE
     184             : 
     185             : ! **************************************************************************************************
     186             : !> \brief Finalize the ELPA library
     187             : ! **************************************************************************************************
     188        8729 :    SUBROUTINE finalize_elpa_library()
     189             : #if defined(__ELPA)
     190        8729 :       CALL elpa_uninit()
     191             : #else
     192             :       CPABORT("Finalization of ELPA library requested but not enabled during build")
     193             : #endif
     194        8729 :    END SUBROUTINE
     195             : 
     196             : ! **************************************************************************************************
     197             : !> \brief Sets the active ELPA kernel.
     198             : !> \param requested_kernel one of the elpa_kernel_ids
     199             : ! **************************************************************************************************
     200        8939 :    SUBROUTINE set_elpa_kernel(requested_kernel)
     201             :       INTEGER, INTENT(IN)                                :: requested_kernel
     202             : 
     203             : #if defined (__ELPA)
     204             :       INTEGER                                            :: cpuid
     205             : 
     206        8939 :       elpa_kernel = requested_kernel
     207             : 
     208             :       ! Resolve AUTO kernel.
     209        8939 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     210        8933 :          cpuid = m_cpuid()
     211        8933 :          IF ((MACHINE_CPU_GENERIC .LT. cpuid) .AND. (cpuid .LE. MACHINE_X86)) THEN
     212           0 :             SELECT CASE (cpuid)
     213             :             CASE (MACHINE_X86_SSE4)
     214           0 :                elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
     215             :             CASE (MACHINE_X86_AVX)
     216           0 :                elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
     217             :             CASE (MACHINE_X86_AVX2)
     218        8933 :                elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
     219             :             CASE DEFAULT
     220        8933 :                elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
     221             :             END SELECT
     222             :          END IF
     223             : 
     224             :          ! Prefer GPU kernel if available.
     225             : #if defined (__ELPA_NVIDIA_GPU)
     226             :          elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
     227             : #endif
     228             : #if defined (__ELPA_AMD_GPU)
     229             :          elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
     230             : #endif
     231             : #if defined (__ELPA_INTEL_GPU)
     232             :          elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
     233             : #endif
     234             : 
     235             :          ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
     236        8933 :          IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     237           0 :             elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
     238             :          END IF
     239             :       END IF
     240             : #else
     241             :       MARK_USED(requested_kernel)
     242             : #endif
     243        8939 :    END SUBROUTINE set_elpa_kernel
     244             : 
     245             : ! **************************************************************************************************
     246             : !> \brief Sets flags that determines if ELPA should try to use QR during diagonalization
     247             : !>        If use_qr = .TRUE., the QR step is performed only if the size of the input matrix is
     248             : !>        suitable. Check cp_fm_diag_elpa for further details.
     249             : !> \param use_qr the logical flag
     250             : !> \param use_qr_unsafe logical which determines if block size checks should be bypassed for some
     251             : !>                      ELPA versions, potentially leading to incorrect eigenvalues
     252             : ! **************************************************************************************************
     253        8939 :    SUBROUTINE set_elpa_qr(use_qr, use_qr_unsafe)
     254             :       LOGICAL, INTENT(IN)                                :: use_qr, use_qr_unsafe
     255             : 
     256        8939 :       elpa_qr = use_qr
     257        8939 :       elpa_qr_unsafe = use_qr_unsafe
     258        8939 :    END SUBROUTINE set_elpa_qr
     259             : 
     260             : ! **************************************************************************************************
     261             : !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
     262             : !>        should be printed when the diagonalization routine is called.
     263             : !> \param flag the logical flag
     264             : ! **************************************************************************************************
     265        8939 :    SUBROUTINE set_elpa_print(flag)
     266             :       LOGICAL, INTENT(IN)                                :: flag
     267             : 
     268        8939 :       elpa_should_print = flag
     269        8939 :    END SUBROUTINE set_elpa_print
     270             : 
     271             : ! **************************************************************************************************
     272             : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
     273             : !> \param matrix the matrix that is diagonalized
     274             : !> \param eigenvectors eigenvectors of the input matrix
     275             : !> \param eigenvalues eigenvalues of the input matrix
     276             : ! **************************************************************************************************
     277       59878 :    SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     278             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     279             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     280             : 
     281             : #if defined(__ELPA)
     282             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
     283             : 
     284             :       INTEGER                                  :: handle
     285             :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     286             :       TYPE(cp_fm_redistribute_info)            :: rdinfo
     287             : 
     288       59878 :       CALL timeset(routineN, handle)
     289             : 
     290             :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     291             :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     292             :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     293             :       ! to the original matrix if no redistribution is required.
     294             :       ! With ELPA, we have to make sure that all processor columns have nonzero width
     295             :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
     296       59878 :                                     caller_is_elpa=.TRUE., redist_info=rdinfo)
     297             : 
     298             :       ! Call ELPA on CPUs that hold the new matrix
     299       59878 :       IF (ASSOCIATED(matrix_new%matrix_struct)) &
     300       35129 :          CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
     301             : 
     302             :       ! Redistribute results and clean up
     303       59878 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
     304             : 
     305       59878 :       CALL timestop(handle)
     306             : #else
     307             :       MARK_USED(matrix)
     308             :       MARK_USED(eigenvectors)
     309             :       MARK_USED(eigenvalues)
     310             : 
     311             :       CPABORT("CP2K compiled without the ELPA library.")
     312             : #endif
     313       59878 :    END SUBROUTINE cp_fm_diag_elpa
     314             : 
     315             : #if defined(__ELPA)
     316             : ! **************************************************************************************************
     317             : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
     318             : !> \param matrix the matrix that is diagonalized
     319             : !> \param eigenvectors eigenvectors of the input matrix
     320             : !> \param eigenvalues eigenvalues of the input matrix
     321             : !> \param rdinfo ...
     322             : ! **************************************************************************************************
     323       35129 :    SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
     324             : 
     325             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     326             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     327             :       TYPE(cp_fm_redistribute_info), INTENT(IN)          :: rdinfo
     328             : 
     329             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
     330             : 
     331             :       INTEGER                                            :: handle
     332             : 
     333             :       CLASS(elpa_t), POINTER                   :: elpa_obj
     334             :       CHARACTER(len=default_string_length)     :: kernel_name
     335             :       TYPE(mp_comm_type) :: group
     336             :       INTEGER                                  :: i, &
     337             :                                                   mypcol, myprow, n, &
     338             :                                                   n_rows, n_cols, &
     339             :                                                   nblk, neig, io_unit, &
     340             :                                                   success
     341             :       LOGICAL                                  :: use_qr, check_eigenvalues
     342       35129 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
     343             :       TYPE(cp_blacs_env_type), POINTER         :: context
     344             :       TYPE(cp_fm_type)                         :: matrix_noqr, eigenvectors_noqr
     345             :       TYPE(cp_logger_type), POINTER            :: logger
     346             :       REAL(KIND=dp), PARAMETER                 :: th = 1.0E-14_dp
     347       35129 :       INTEGER, DIMENSION(:), POINTER           :: ncol_locals
     348             : 
     349       35129 :       CALL timeset(routineN, handle)
     350       35129 :       NULLIFY (logger)
     351       35129 :       NULLIFY (ncol_locals)
     352             : 
     353       35129 :       check_eigenvalues = .FALSE.
     354             : 
     355       35129 :       logger => cp_get_default_logger()
     356       35129 :       io_unit = cp_logger_get_default_io_unit(logger)
     357             : 
     358       35129 :       n = matrix%matrix_struct%nrow_global
     359       35129 :       context => matrix%matrix_struct%context
     360       35129 :       group = matrix%matrix_struct%para_env
     361             : 
     362       35129 :       myprow = context%mepos(1)
     363       35129 :       mypcol = context%mepos(2)
     364             : 
     365             :       ! elpa needs the full matrix
     366       35129 :       CALL cp_fm_upper_to_full(matrix, eigenvectors)
     367             : 
     368             :       CALL cp_fm_struct_get(matrix%matrix_struct, &
     369             :                             local_leading_dimension=n_rows, &
     370             :                             ncol_local=n_cols, &
     371             :                             nrow_block=nblk, &
     372       35129 :                             ncol_locals=ncol_locals)
     373             : 
     374             :       ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
     375       70258 :       IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
     376           0 :          CALL rdinfo%write(io_unit)
     377           0 :          CALL cp_fm_write_info(matrix, io_unit)
     378           0 :          CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
     379             :       END IF
     380             : 
     381       35129 :       neig = SIZE(eigenvalues, 1)
     382             :       ! Decide if matrix is suitable for ELPA to use QR
     383             :       ! The definition of what is considered a suitable matrix depends on the ELPA version
     384             :       ! The relevant ELPA files to check are
     385             :       !     - Proper matrix order:  src/elpa2/elpa2_template.F90
     386             :       !     - Proper block size:    test/Fortran/test.F90
     387             :       ! Note that the names of these files might change in different ELPA versions
     388             :       ! Matrix order must be even
     389       35129 :       use_qr = elpa_qr .AND. (MODULO(n, 2) .EQ. 0)
     390             :       ! Matrix order and block size must be greater than or equal to 64
     391       35129 :       IF (.NOT. elpa_qr_unsafe) &
     392       35125 :          use_qr = use_qr .AND. (n .GE. 64) .AND. (nblk .GE. 64)
     393             : 
     394             :       ! Check if eigenvalues computed with ELPA_QR_UNSAFE should be verified
     395           8 :       IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
     396           2 :          check_eigenvalues = .TRUE.
     397             : 
     398       35129 :       CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
     399             : 
     400       35129 :       IF (check_eigenvalues) THEN
     401             :          ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
     402          12 :          ALLOCATE (eval_noqr(n))
     403           4 :          CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
     404           4 :          CALL cp_fm_to_fm(matrix, matrix_noqr)
     405           4 :          CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
     406           8 :          CALL cp_fm_upper_to_full(matrix_noqr, eigenvectors_noqr)
     407             :       END IF
     408             : 
     409       35129 :       IF (io_unit > 0 .AND. elpa_should_print) THEN
     410             :          WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
     411         119 :             "ELPA| Matrix diagonalization information"
     412             : 
     413             :          ! Find name for given kernel id.
     414             :          ! In case ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
     415         119 :          kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
     416        2618 :          DO i = 1, SIZE(elpa_kernel_ids)
     417        2618 :             IF (elpa_kernel_ids(i) == elpa_kernel) THEN
     418         119 :                kernel_name = elpa_kernel_names(i)
     419             :             END IF
     420             :          END DO
     421             : 
     422             :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
     423         119 :             "ELPA| Matrix order (NA) ", n, &
     424         119 :             "ELPA| Matrix block size (NBLK) ", nblk, &
     425         119 :             "ELPA| Number of eigenvectors (NEV) ", neig, &
     426         119 :             "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
     427         238 :             "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
     428             :          WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
     429         119 :             "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
     430         119 :          IF (elpa_qr) THEN
     431             :             WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     432           4 :                "ELPA| QR step requested ", "YES"
     433             :          ELSE
     434             :             WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     435         115 :                "ELPA| QR step requested ", "NO"
     436             :          END IF
     437             : 
     438         119 :          IF (elpa_qr) THEN
     439           4 :             IF (elpa_qr_unsafe) THEN
     440             :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     441           2 :                   "ELPA| Use potentially unsafe QR ", "YES"
     442             :             ELSE
     443             :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     444           2 :                   "ELPA| Use potentially unsafe QR ", "NO"
     445             :             END IF
     446           4 :             IF (use_qr) THEN
     447             :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     448           4 :                   "ELPA| Matrix is suitable for QR ", "YES"
     449             :             ELSE
     450             :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     451           0 :                   "ELPA| Matrix is suitable for QR ", "NO"
     452             :             END IF
     453           4 :             IF (.NOT. use_qr) THEN
     454           0 :                IF (MODULO(n, 2) /= 0) THEN
     455             :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     456           0 :                      "ELPA| Matrix order is NOT even"
     457             :                END IF
     458           0 :                IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
     459             :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     460           0 :                      "ELPA| Matrix block size is NOT 64 or greater"
     461             :                END IF
     462             :             ELSE
     463           4 :                IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
     464             :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     465           2 :                      "ELPA| Matrix block size check was bypassed"
     466             :                END IF
     467             :             END IF
     468             :          END IF
     469             :       END IF
     470             : 
     471             :       ! the full eigenvalues vector is needed
     472      105387 :       ALLOCATE (eval(n))
     473             : 
     474       35129 :       elpa_obj => elpa_allocate()
     475             : 
     476       35129 :       CALL elpa_obj%set("na", n, success)
     477       35129 :       CPASSERT(success == elpa_ok)
     478             : 
     479       35129 :       CALL elpa_obj%set("nev", neig, success)
     480       35129 :       CPASSERT(success == elpa_ok)
     481             : 
     482       35129 :       CALL elpa_obj%set("local_nrows", n_rows, success)
     483       35129 :       CPASSERT(success == elpa_ok)
     484             : 
     485       35129 :       CALL elpa_obj%set("local_ncols", n_cols, success)
     486       35129 :       CPASSERT(success == elpa_ok)
     487             : 
     488       35129 :       CALL elpa_obj%set("nblk", nblk, success)
     489       35129 :       CPASSERT(success == elpa_ok)
     490             : 
     491       35129 :       CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
     492       35129 :       CPASSERT(success == elpa_ok)
     493             : 
     494       35129 :       CALL elpa_obj%set("process_row", myprow, success)
     495       35129 :       CPASSERT(success == elpa_ok)
     496             : 
     497       35129 :       CALL elpa_obj%set("process_col", mypcol, success)
     498       35129 :       CPASSERT(success == elpa_ok)
     499             : 
     500       35129 :       success = elpa_obj%setup()
     501       35129 :       CPASSERT(success == elpa_ok)
     502             : 
     503       35129 :       CALL elpa_obj%set("solver", elpa_solver_2stage, success)
     504       35129 :       CPASSERT(success == elpa_ok)
     505             : 
     506             :       ! enabling the GPU must happen before setting the kernel
     507       35129 :       IF (elpa_kernel == ELPA_2STAGE_REAL_NVIDIA_GPU) THEN
     508           0 :          CALL elpa_obj%set("nvidia-gpu", 1, success)
     509           0 :          CPASSERT(success == elpa_ok)
     510             :       END IF
     511       35129 :       IF (elpa_kernel == ELPA_2STAGE_REAL_AMD_GPU) THEN
     512           0 :          CALL elpa_obj%set("amd-gpu", 1, success)
     513           0 :          CPASSERT(success == elpa_ok)
     514             :       END IF
     515       35129 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INTEL_GPU_SYCL) THEN
     516           0 :          CALL elpa_obj%set("intel-gpu", 1, success)
     517           0 :          CPASSERT(success == elpa_ok)
     518             :       END IF
     519             : 
     520       35129 :       CALL elpa_obj%set("real_kernel", elpa_kernel, success)
     521       35129 :       CPWARN_IF(success /= elpa_ok, "Setting real_kernel for ELPA failed")
     522             : 
     523       35129 :       IF (use_qr) THEN
     524           8 :          CALL elpa_obj%set("qr", 1, success)
     525           8 :          CPASSERT(success == elpa_ok)
     526             :       END IF
     527             : 
     528             :       ! Set number of threads only when ELPA was built with OpenMP support.
     529       35129 :       IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
     530       35129 :          CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
     531       35129 :          CPASSERT(success == elpa_ok)
     532             :       END IF
     533             : 
     534       35129 :       CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
     535       35129 :       IF (success /= elpa_ok) &
     536           0 :          CPABORT("ELPA failed to diagonalize a matrix")
     537             : 
     538       35129 :       IF (check_eigenvalues) THEN
     539             :          ! run again without QR
     540           4 :          CALL elpa_obj%set("qr", 0, success)
     541           4 :          CPASSERT(success == elpa_ok)
     542             : 
     543           4 :          CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
     544           4 :          IF (success /= elpa_ok) &
     545           0 :             CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
     546             : 
     547         340 :          IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) .GT. th)) &
     548           0 :             CPABORT("Eigenvalues calculated with QR decomp. in ELPA are wrong. Disable ELPA_QR_UNSAFE.")
     549             : 
     550           4 :          DEALLOCATE (eval_noqr)
     551           4 :          CALL cp_fm_release(matrix_noqr)
     552           4 :          CALL cp_fm_release(eigenvectors_noqr)
     553             :       END IF
     554             : 
     555       35129 :       CALL elpa_deallocate(elpa_obj, success)
     556       35129 :       CPASSERT(success == elpa_ok)
     557             : 
     558     1207551 :       eigenvalues(1:neig) = eval(1:neig)
     559       35129 :       DEALLOCATE (eval)
     560             : 
     561       35129 :       CALL timestop(handle)
     562             : 
     563      140516 :    END SUBROUTINE cp_fm_diag_elpa_base
     564             : #endif
     565             : 
     566             : END MODULE cp_fm_elpa

Generated by: LCOV version 1.15