LCOV - CP2K Regtests (git:15a58fb) - src/fm/cp_fm_basic

LCOV - code coverage report

Current view:	top level - src/fm - cp_fm_basic_linalg.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:15a58fb)	Lines:	624	858	72.7 %
Date:	2025-02-18 08:24:35	Functions:	30	43	69.8 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief basic linear algebra operations for full matrices
      10             : !> \par History
      11             : !>      08.2002 split out of qs_blacs [fawzi]
      12             : !> \author Fawzi Mohamed
      13             : ! **************************************************************************************************
      14             : MODULE cp_fm_basic_linalg
      15             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      16             :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
      17             :    USE cp_fm_types, ONLY: &
      18             :       cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
      19             :       cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
      20             :       cp_fm_type
      21             :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      22             :                               cp_to_string
      23             :    USE kahan_sum, ONLY: accurate_dot_product, &
      24             :                         accurate_sum
      25             :    USE kinds, ONLY: dp, &
      26             :                     int_8, &
      27             :                     sp
      28             :    USE machine, ONLY: m_memory
      29             :    USE mathlib, ONLY: get_pseudo_inverse_svd, &
      30             :                       invert_matrix
      31             :    USE message_passing, ONLY: mp_comm_type
      32             : #include "../base/base_uses.f90"
      33             : 
      34             :    IMPLICIT NONE
      35             :    PRIVATE
      36             : 
      37             :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      38             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
      39             : 
      40             :    PUBLIC :: cp_fm_scale, & ! scale a matrix
      41             :              cp_fm_scale_and_add, & ! scale and add two matrices
      42             :              cp_fm_geadd, & ! general addition
      43             :              cp_fm_column_scale, & ! scale columns of a matrix
      44             :              cp_fm_row_scale, & ! scale rows of a matrix
      45             :              cp_fm_trace, & ! trace of the transpose(A)*B
      46             :              cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
      47             :              cp_fm_norm, & ! different norms of A
      48             :              cp_fm_schur_product, & ! schur product
      49             :              cp_fm_transpose, & ! transpose a matrix
      50             :              cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
      51             :              cp_fm_syrk, & ! rank k update
      52             :              cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
      53             :              cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
      54             :              cp_fm_gemm, & ! multiply two matrices
      55             :              cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
      56             :              cp_fm_invert, & ! computes the inverse and determinant
      57             :              cp_fm_frobenius_norm, & ! frobenius norm
      58             :              cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
      59             :              cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
      60             :              cp_fm_solve, & ! solves the equation  A*B=C A and C are input
      61             :              cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
      62             :              cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
      63             :              cp_fm_potrf, & ! Cholesky decomposition
      64             :              cp_fm_potri, & ! Invert triangular matrix
      65             :              cp_fm_rot_rows, & ! rotates two rows
      66             :              cp_fm_rot_cols, & ! rotates two columns
      67             :              cp_fm_cholesky_restore, & ! apply Cholesky decomposition
      68             :              cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
      69             :              cp_fm_det, & ! determinant of a real matrix with correct sign
      70             :              cp_fm_matvec ! matrix-vector multiplication (vector replicated)
      71             : 
      72             :    REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
      73             :    REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
      74             : 
      75             :    INTERFACE cp_fm_trace
      76             :       MODULE PROCEDURE cp_fm_trace_a0b0t0
      77             :       MODULE PROCEDURE cp_fm_trace_a1b0t1_a
      78             :       MODULE PROCEDURE cp_fm_trace_a1b0t1_p
      79             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
      80             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
      81             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
      82             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
      83             :    END INTERFACE cp_fm_trace
      84             : 
      85             :    INTERFACE cp_fm_contracted_trace
      86             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
      87             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
      88             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
      89             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
      90             :    END INTERFACE cp_fm_contracted_trace
      91             : CONTAINS
      92             : 
      93             : ! **************************************************************************************************
      94             : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
      95             : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
      96             : ! **************************************************************************************************
      97           0 :    SUBROUTINE cp_fm_det(matrix_a, det_a)
      98             : 
      99             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     100             :       REAL(KIND=dp), INTENT(OUT)               :: det_a
     101             :       REAL(KIND=dp)                            :: determinant
     102             :       TYPE(cp_fm_type)                         :: matrix_lu
     103             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     104             :       INTEGER                                  :: n, i, info, P
     105           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     106             :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     107             : 
     108             : #if defined(__parallel)
     109             :       INTEGER                                  :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
     110             :       INTEGER, DIMENSION(9)                    :: desca
     111             : #endif
     112             : 
     113             :       CALL cp_fm_create(matrix=matrix_lu, &
     114             :                         matrix_struct=matrix_a%matrix_struct, &
     115           0 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     116           0 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     117             : 
     118           0 :       a => matrix_lu%local_data
     119           0 :       n = matrix_lu%matrix_struct%nrow_global
     120           0 :       ALLOCATE (ipivot(n))
     121           0 :       ipivot(:) = 0
     122           0 :       P = 0
     123           0 :       ALLOCATE (diag(n))
     124           0 :       diag(:) = 0.0_dp
     125             : #if defined(__parallel)
     126             :       ! Use LU decomposition
     127           0 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     128           0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     129           0 :       CALL cp_fm_get_diag(matrix_lu, diag)
     130           0 :       determinant = PRODUCT(diag)
     131           0 :       myprow = matrix_lu%matrix_struct%context%mepos(1)
     132           0 :       nprow = matrix_lu%matrix_struct%context%num_pe(1)
     133           0 :       npcol = matrix_lu%matrix_struct%context%num_pe(2)
     134           0 :       nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
     135           0 :       nrow_block = matrix_lu%matrix_struct%nrow_block
     136           0 :       DO irow_local = 1, nrow_local
     137           0 :          i = matrix_lu%matrix_struct%row_indices(irow_local)
     138           0 :          IF (ipivot(irow_local) /= i) P = P + 1
     139             :       END DO
     140           0 :       CALL matrix_lu%matrix_struct%para_env%sum(P)
     141             :       ! very important fix
     142           0 :       P = P/npcol
     143             : #else
     144             :       CALL dgetrf(n, n, a, n, ipivot, info)
     145             :       CALL cp_fm_get_diag(matrix_lu, diag)
     146             :       determinant = PRODUCT(diag)
     147             :       DO i = 1, n
     148             :          IF (ipivot(i) /= i) P = P + 1
     149             :       END DO
     150             : #endif
     151           0 :       DEALLOCATE (ipivot)
     152           0 :       DEALLOCATE (diag)
     153           0 :       CALL cp_fm_release(matrix_lu)
     154           0 :       det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
     155           0 :    END SUBROUTINE cp_fm_det
     156             : 
     157             : ! **************************************************************************************************
     158             : !> \brief calc A <- alpha*A + beta*B
     159             : !>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
     160             : !>      scale A)
     161             : !> \param alpha ...
     162             : !> \param matrix_a ...
     163             : !> \param beta ...
     164             : !> \param matrix_b ...
     165             : ! **************************************************************************************************
     166     1064712 :    SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
     167             : 
     168             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     169             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
     170             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     171             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_b
     172             : 
     173             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
     174             : 
     175             :       INTEGER                                            :: handle, size_a, size_b
     176             :       REAL(KIND=dp)                                      :: my_beta
     177     1064712 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     178     1064712 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     179             : 
     180     1064712 :       CALL timeset(routineN, handle)
     181             : 
     182     1064712 :       my_beta = 0.0_dp
     183     1064712 :       IF (PRESENT(matrix_b)) my_beta = 1.0_dp
     184     1064712 :       IF (PRESENT(beta)) my_beta = beta
     185     1064712 :       NULLIFY (a, b)
     186             : 
     187     1064712 :       IF (PRESENT(beta)) THEN
     188     1064712 :          CPASSERT(PRESENT(matrix_b))
     189     1064712 :          IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
     190           0 :             CPWARN("Bad use of routine. Call cp_fm_scale instead")
     191           0 :             CALL cp_fm_scale(alpha + beta, matrix_a)
     192           0 :             CALL timestop(handle)
     193           0 :             RETURN
     194             :          END IF
     195             :       END IF
     196             : 
     197     1064712 :       a => matrix_a%local_data
     198     1064712 :       a_sp => matrix_a%local_data_sp
     199             : 
     200     1064712 :       IF (matrix_a%use_sp) THEN
     201           0 :          size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
     202             :       ELSE
     203     1064712 :          size_a = SIZE(a, 1)*SIZE(a, 2)
     204             :       END IF
     205             : 
     206     1064712 :       IF (alpha .NE. 1.0_dp) THEN
     207       78842 :          IF (matrix_a%use_sp) THEN
     208           0 :             CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
     209             :          ELSE
     210       78842 :             CALL dscal(size_a, alpha, a, 1)
     211             :          END IF
     212             :       END IF
     213     1064712 :       IF (my_beta .NE. 0.0_dp) THEN
     214     1057324 :          IF (matrix_a%matrix_struct%context .NE. matrix_b%matrix_struct%context) &
     215           0 :             CPABORT("Matrices must be in the same blacs context")
     216             : 
     217     1057324 :          IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
     218             :                                      matrix_b%matrix_struct)) THEN
     219             : 
     220     1057324 :             b => matrix_b%local_data
     221     1057324 :             b_sp => matrix_b%local_data_sp
     222     1057324 :             IF (matrix_b%use_sp) THEN
     223           0 :                size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
     224             :             ELSE
     225     1057324 :                size_b = SIZE(b, 1)*SIZE(b, 2)
     226             :             END IF
     227     1057324 :             IF (size_a .NE. size_b) &
     228           0 :                CPABORT("Matrices must have same local sizes")
     229             : 
     230     1057324 :             IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     231           0 :                CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
     232     1057324 :             ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     233           0 :                CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
     234     1057324 :             ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     235           0 :                CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
     236             :             ELSE
     237     1057324 :                CALL daxpy(size_a, my_beta, b, 1, a, 1)
     238             :             END IF
     239             : 
     240             :          ELSE
     241             : #ifdef __parallel
     242           0 :             CPABORT("to do (pdscal,pdcopy,pdaxpy)")
     243             : #else
     244             :             CPABORT("")
     245             : #endif
     246             :          END IF
     247             : 
     248             :       END IF
     249             : 
     250     1064712 :       CALL timestop(handle)
     251             : 
     252     1064712 :    END SUBROUTINE cp_fm_scale_and_add
     253             : 
     254             : ! **************************************************************************************************
     255             : !> \brief interface to BLACS geadd:
     256             : !>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
     257             : !>        where opt(matrix_a) can be either:
     258             : !>              'N':  matrix_a
     259             : !>              'T':  matrix_a^T
     260             : !>              'C':  matrix_a^H (Hermitian conjugate)
     261             : !>        note that this is a level three routine, use cp_fm_scale_and_add if that
     262             : !>        is sufficient for your needs
     263             : !> \param alpha  : complex scalar
     264             : !> \param trans  : 'N' normal, 'T' transposed
     265             : !> \param matrix_a : input matrix_a
     266             : !> \param beta   : complex scalar
     267             : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
     268             : !> \author  Lianheng Tong
     269             : ! **************************************************************************************************
     270          96 :    SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
     271             :       REAL(KIND=dp), INTENT(IN) :: alpha, beta
     272             :       CHARACTER, INTENT(IN) :: trans
     273             :       TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
     274             : 
     275             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
     276             : 
     277             :       INTEGER :: nrow_global, ncol_global, handle
     278             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
     279             : #if defined(__parallel)
     280             :       INTEGER, DIMENSION(9) :: desca, descb
     281             : #elif !defined(__MKL)
     282             :       INTEGER :: ii, jj
     283             : #endif
     284             : 
     285          96 :       CALL timeset(routineN, handle)
     286             : 
     287          96 :       nrow_global = matrix_a%matrix_struct%nrow_global
     288          96 :       ncol_global = matrix_a%matrix_struct%ncol_global
     289          96 :       CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
     290          96 :       CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
     291             : 
     292          96 :       aa => matrix_a%local_data
     293          96 :       bb => matrix_b%local_data
     294             : 
     295             : #if defined(__parallel)
     296         960 :       desca = matrix_a%matrix_struct%descriptor
     297         960 :       descb = matrix_b%matrix_struct%descriptor
     298             :       CALL pdgeadd(trans, &
     299             :                    nrow_global, &
     300             :                    ncol_global, &
     301             :                    alpha, &
     302             :                    aa, &
     303             :                    1, 1, &
     304             :                    desca, &
     305             :                    beta, &
     306             :                    bb, &
     307             :                    1, 1, &
     308          96 :                    descb)
     309             : #elif defined(__MKL)
     310             :       CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
     311             :                         alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
     312             : #else
     313             :       ! dgeadd is not a standard BLAS function, although it is implemented
     314             :       ! in some libraries like OpenBLAS, so not going to use it here
     315             :       SELECT CASE (trans)
     316             :       CASE ('T')
     317             :          DO jj = 1, ncol_global
     318             :             DO ii = 1, nrow_global
     319             :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
     320             :             END DO
     321             :          END DO
     322             :       CASE DEFAULT
     323             :          DO jj = 1, ncol_global
     324             :             DO ii = 1, nrow_global
     325             :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
     326             :             END DO
     327             :          END DO
     328             :       END SELECT
     329             : #endif
     330             : 
     331          96 :       CALL timestop(handle)
     332             : 
     333          96 :    END SUBROUTINE cp_fm_geadd
     334             : 
     335             : ! **************************************************************************************************
     336             : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
     337             : !>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
     338             : !> \param matrix_a ...
     339             : !> \param almost_determinant ...
     340             : !> \param correct_sign ...
     341             : !> \par History
     342             : !>      added correct_sign 02.07 (fschiff)
     343             : !> \author Joost VandeVondele
     344             : !> \note
     345             : !>      - matrix_a is overwritten
     346             : !>      - the sign of the determinant might be wrong
     347             : !>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
     348             : !>      - one should be able to find out if ipivot is an even or an odd permutation...
     349             : !>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
     350             : !>      - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
     351             : ! **************************************************************************************************
     352           0 :    SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
     353             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     354             :       REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
     355             :       LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
     356             : 
     357             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
     358             : 
     359             :       INTEGER                                  :: handle, i, info, n
     360           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     361             :       REAL(KIND=dp)                            :: determinant
     362             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     363             : #if defined(__parallel)
     364             :       INTEGER, DIMENSION(9)                    :: desca
     365           0 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     366             : #else
     367             :       INTEGER                                  :: lda
     368             : #endif
     369             : 
     370           0 :       CALL timeset(routineN, handle)
     371             : 
     372           0 :       a => matrix_a%local_data
     373           0 :       n = matrix_a%matrix_struct%nrow_global
     374           0 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
     375             : 
     376             : #if defined(__parallel)
     377             :       MARK_USED(correct_sign)
     378           0 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     379           0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     380             : 
     381           0 :       ALLOCATE (diag(n))
     382           0 :       diag(:) = 0.0_dp
     383           0 :       CALL cp_fm_get_diag(matrix_a, diag)
     384           0 :       determinant = 1.0_dp
     385           0 :       DO i = 1, n
     386           0 :          determinant = determinant*diag(i)
     387             :       END DO
     388           0 :       DEALLOCATE (diag)
     389             : #else
     390             :       lda = SIZE(a, 1)
     391             :       CALL dgetrf(n, n, a, lda, ipivot, info)
     392             :       determinant = 1.0_dp
     393             :       IF (correct_sign) THEN
     394             :          DO i = 1, n
     395             :             IF (ipivot(i) .NE. i) THEN
     396             :                determinant = -determinant*a(i, i)
     397             :             ELSE
     398             :                determinant = determinant*a(i, i)
     399             :             END IF
     400             :          END DO
     401             :       ELSE
     402             :          DO i = 1, n
     403             :             determinant = determinant*a(i, i)
     404             :          END DO
     405             :       END IF
     406             : #endif
     407             :       ! info is allowed to be zero
     408             :       ! this does just signal a zero diagonal element
     409           0 :       DEALLOCATE (ipivot)
     410           0 :       almost_determinant = determinant ! notice that the sign is random
     411           0 :       CALL timestop(handle)
     412           0 :    END SUBROUTINE cp_fm_lu_decompose
     413             : 
     414             : ! **************************************************************************************************
     415             : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
     416             : !> \param transa : 'N' -> normal   'T' -> transpose
     417             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     418             : !> \param transb ...
     419             : !> \param m ...
     420             : !> \param n ...
     421             : !> \param k ...
     422             : !> \param alpha ...
     423             : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
     424             : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
     425             : !> \param beta ...
     426             : !> \param matrix_c : m x n matrix
     427             : !> \param a_first_col ...
     428             : !> \param a_first_row ...
     429             : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
     430             : !> \param b_first_row ...
     431             : !> \param c_first_col ...
     432             : !> \param c_first_row ...
     433             : !> \author Matthias Krack
     434             : !> \note
     435             : !>      matrix_c should have no overlap with matrix_a, matrix_b
     436             : ! **************************************************************************************************
     437         550 :    SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
     438             :                          matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
     439             :                          c_first_col, c_first_row)
     440             : 
     441             :       CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
     442             :       INTEGER, INTENT(IN)                      :: m, n, k
     443             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     444             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     445             :       REAL(KIND=dp), INTENT(IN)                :: beta
     446             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     447             :       INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
     448             :                                                   b_first_col, b_first_row, &
     449             :                                                   c_first_col, c_first_row
     450             : 
     451             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
     452             : 
     453             :       INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
     454             :                                                   j_b, j_c
     455             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     456         550 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp, b_sp, c_sp
     457             : #if defined(__parallel)
     458             :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     459             : #else
     460             :       INTEGER                                  :: lda, ldb, ldc
     461             : #endif
     462             : 
     463         550 :       CALL timeset(routineN, handle)
     464             : 
     465             :       !sample peak memory
     466         550 :       CALL m_memory()
     467             : 
     468         550 :       a => matrix_a%local_data
     469         550 :       b => matrix_b%local_data
     470         550 :       c => matrix_c%local_data
     471             : 
     472         550 :       a_sp => matrix_a%local_data_sp
     473         550 :       b_sp => matrix_b%local_data_sp
     474         550 :       c_sp => matrix_c%local_data_sp
     475             : 
     476         550 :       i_a = 1
     477         550 :       IF (PRESENT(a_first_row)) i_a = a_first_row
     478             : 
     479         550 :       j_a = 1
     480         550 :       IF (PRESENT(a_first_col)) j_a = a_first_col
     481             : 
     482         550 :       i_b = 1
     483         550 :       IF (PRESENT(b_first_row)) i_b = b_first_row
     484             : 
     485         550 :       j_b = 1
     486         550 :       IF (PRESENT(b_first_col)) j_b = b_first_col
     487             : 
     488         550 :       i_c = 1
     489         550 :       IF (PRESENT(c_first_row)) i_c = c_first_row
     490             : 
     491         550 :       j_c = 1
     492         550 :       IF (PRESENT(c_first_col)) j_c = c_first_col
     493             : 
     494             : #if defined(__parallel)
     495             : 
     496        5500 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     497        5500 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     498        5500 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     499             : 
     500         550 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     501             : 
     502             :          CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
     503           0 :                      descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
     504             : 
     505         550 :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     506             : 
     507             :          CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
     508         550 :                      descb, beta, c, i_c, j_c, descc)
     509             : 
     510             :       ELSE
     511           0 :          CPABORT("Mixed precision gemm NYI")
     512             :       END IF
     513             : #else
     514             : 
     515             :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     516             : 
     517             :          lda = SIZE(a_sp, 1)
     518             :          ldb = SIZE(b_sp, 1)
     519             :          ldc = SIZE(c_sp, 1)
     520             : 
     521             :          CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
     522             :                     REAL(beta, sp), c_sp(i_c, j_c), ldc)
     523             : 
     524             :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     525             : 
     526             :          lda = SIZE(a, 1)
     527             :          ldb = SIZE(b, 1)
     528             :          ldc = SIZE(c, 1)
     529             : 
     530             :          CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
     531             : 
     532             :       ELSE
     533             :          CPABORT("Mixed precision gemm NYI")
     534             :       END IF
     535             : 
     536             : #endif
     537         550 :       CALL timestop(handle)
     538             : 
     539         550 :    END SUBROUTINE cp_fm_gemm
     540             : 
     541             : ! **************************************************************************************************
     542             : !> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
     543             : !>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
     544             : !>      where matrix_a is symmetric
     545             : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
     546             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     547             : !> \param uplo triangular format
     548             : !> \param m ...
     549             : !> \param n ...
     550             : !> \param alpha ...
     551             : !> \param matrix_a : m x m matrix
     552             : !> \param matrix_b : m x n matrix
     553             : !> \param beta ...
     554             : !> \param matrix_c : m x n matrix
     555             : !> \author Matthias Krack
     556             : !> \note
     557             : !>      matrix_c should have no overlap with matrix_a, matrix_b
     558             : !>      all matrices in QS are triangular according to uplo
     559             : !>      matrix_a is always an m x m matrix
     560             : !>      typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
     561             : ! **************************************************************************************************
     562      146908 :    SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
     563             : 
     564             :       CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
     565             :       INTEGER, INTENT(IN)                      :: m, n
     566             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     567             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     568             :       REAL(KIND=dp), INTENT(IN)                :: beta
     569             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     570             : 
     571             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
     572             : 
     573             :       INTEGER                                  :: handle
     574      146908 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     575             : #if defined(__parallel)
     576             :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     577             : #else
     578             :       INTEGER                                  :: lda, ldb, ldc
     579             : #endif
     580             : 
     581      146908 :       CALL timeset(routineN, handle)
     582             : 
     583      146908 :       a => matrix_a%local_data
     584      146908 :       b => matrix_b%local_data
     585      146908 :       c => matrix_c%local_data
     586             : 
     587             : #if defined(__parallel)
     588             : 
     589     1469080 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     590     1469080 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     591     1469080 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     592             : 
     593      146908 :       CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
     594             : 
     595             : #else
     596             : 
     597             :       lda = matrix_a%matrix_struct%local_leading_dimension
     598             :       ldb = matrix_b%matrix_struct%local_leading_dimension
     599             :       ldc = matrix_c%matrix_struct%local_leading_dimension
     600             : 
     601             :       CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
     602             : 
     603             : #endif
     604      146908 :       CALL timestop(handle)
     605             : 
     606      146908 :    END SUBROUTINE cp_fm_symm
     607             : 
     608             : ! **************************************************************************************************
     609             : !> \brief computes the Frobenius norm of matrix_a
     610             : !> \brief computes the Frobenius norm of matrix_a
     611             : !> \param matrix_a : m x n matrix
     612             : !> \return ...
     613             : !> \author VW
     614             : ! **************************************************************************************************
     615        7926 :    FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
     616             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     617             :       REAL(KIND=dp)                            :: norm
     618             : 
     619             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
     620             : 
     621             :       INTEGER                                  :: handle, size_a
     622        7926 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     623             :       REAL(KIND=dp), EXTERNAL                  :: DDOT
     624             : #if defined(__parallel)
     625             :       TYPE(mp_comm_type)                       :: group
     626             : #endif
     627             : 
     628        7926 :       CALL timeset(routineN, handle)
     629             : 
     630             :       norm = 0.0_dp
     631        7926 :       a => matrix_a%local_data
     632        7926 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     633        7926 :       norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
     634             : #if defined(__parallel)
     635        7926 :       group = matrix_a%matrix_struct%para_env
     636        7926 :       CALL group%sum(norm)
     637             : #endif
     638        7926 :       norm = SQRT(norm)
     639             : 
     640        7926 :       CALL timestop(handle)
     641             : 
     642        7926 :    END FUNCTION cp_fm_frobenius_norm
     643             : 
     644             : ! **************************************************************************************************
     645             : !> \brief performs a rank-k update of a symmetric matrix_c
     646             : !>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
     647             : !> \param uplo : 'U'   ('L')
     648             : !> \param trans : 'N'  ('T')
     649             : !> \param k : number of cols to use in matrix_a
     650             : !>      ia,ja ::  1,1 (could be used for selecting subblock of a)
     651             : !> \param alpha ...
     652             : !> \param matrix_a ...
     653             : !> \param ia ...
     654             : !> \param ja ...
     655             : !> \param beta ...
     656             : !> \param matrix_c ...
     657             : !> \author Matthias Krack
     658             : ! **************************************************************************************************
     659        7374 :    SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
     660             :       CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
     661             :       INTEGER, INTENT(IN)                      :: k
     662             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     663             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     664             :       INTEGER, INTENT(IN)                      :: ia, ja
     665             :       REAL(KIND=dp), INTENT(IN)                :: beta
     666             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     667             : 
     668             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
     669             : 
     670             :       INTEGER                                  :: handle, n
     671        7374 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
     672             : #if defined(__parallel)
     673             :       INTEGER, DIMENSION(9)                    :: desca, descc
     674             : #else
     675             :       INTEGER                                  :: lda, ldc
     676             : #endif
     677             : 
     678        7374 :       CALL timeset(routineN, handle)
     679             : 
     680        7374 :       n = matrix_c%matrix_struct%nrow_global
     681             : 
     682        7374 :       a => matrix_a%local_data
     683        7374 :       c => matrix_c%local_data
     684             : 
     685             : #if defined(__parallel)
     686             : 
     687       73740 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     688       73740 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     689             : 
     690        7374 :       CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
     691             : 
     692             : #else
     693             : 
     694             :       lda = SIZE(a, 1)
     695             :       ldc = SIZE(c, 1)
     696             : 
     697             :       CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
     698             : 
     699             : #endif
     700        7374 :       CALL timestop(handle)
     701             : 
     702        7374 :    END SUBROUTINE cp_fm_syrk
     703             : 
     704             : ! **************************************************************************************************
     705             : !> \brief computes the schur product of two matrices
     706             : !>       c_ij = a_ij * b_ij
     707             : !> \param matrix_a ...
     708             : !> \param matrix_b ...
     709             : !> \param matrix_c ...
     710             : !> \author Joost VandeVondele
     711             : ! **************************************************************************************************
     712        8624 :    SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
     713             : 
     714             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b, matrix_c
     715             : 
     716             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
     717             : 
     718             :       INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
     719             :                                                             myprow, ncol_local, npcol, nprow, &
     720             :                                                             nrow_local
     721        8624 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
     722             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     723             : 
     724        8624 :       CALL timeset(routineN, handle)
     725             : 
     726        8624 :       context => matrix_a%matrix_struct%context
     727        8624 :       myprow = context%mepos(1)
     728        8624 :       mypcol = context%mepos(2)
     729        8624 :       nprow = context%num_pe(1)
     730        8624 :       npcol = context%num_pe(2)
     731             : 
     732        8624 :       a => matrix_a%local_data
     733        8624 :       b => matrix_b%local_data
     734        8624 :       c => matrix_c%local_data
     735             : 
     736        8624 :       nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
     737        8624 :       ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
     738             : 
     739       95822 :       DO icol_local = 1, ncol_local
     740     6656427 :          DO irow_local = 1, nrow_local
     741     6647803 :             c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
     742             :          END DO
     743             :       END DO
     744             : 
     745        8624 :       CALL timestop(handle)
     746             : 
     747        8624 :    END SUBROUTINE cp_fm_schur_product
     748             : 
     749             : ! **************************************************************************************************
     750             : !> \brief returns the trace of matrix_a^T matrix_b, i.e
     751             : !>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
     752             : !> \param matrix_a a matrix
     753             : !> \param matrix_b another matrix
     754             : !> \param trace ...
     755             : !> \par History
     756             : !>      11.06.2001 Creation (Matthias Krack)
     757             : !>      12.2002 added doc [fawzi]
     758             : !> \author Matthias Krack
     759             : !> \note
     760             : !>      note the transposition of matrix_a!
     761             : ! **************************************************************************************************
     762      696972 :    SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
     763             : 
     764             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b
     765             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     766             : 
     767             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
     768             : 
     769             :       INTEGER                                            :: handle, mypcol, myprow, ncol_local, &
     770             :                                                             npcol, nprow, nrow_local
     771      696972 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     772      696972 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     773             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     774             :       TYPE(mp_comm_type)                                 :: group
     775             : 
     776      696972 :       CALL timeset(routineN, handle)
     777             : 
     778      696972 :       context => matrix_a%matrix_struct%context
     779      696972 :       myprow = context%mepos(1)
     780      696972 :       mypcol = context%mepos(2)
     781      696972 :       nprow = context%num_pe(1)
     782      696972 :       npcol = context%num_pe(2)
     783             : 
     784      696972 :       group = matrix_a%matrix_struct%para_env
     785             : 
     786      696972 :       a => matrix_a%local_data
     787      696972 :       b => matrix_b%local_data
     788             : 
     789      696972 :       a_sp => matrix_a%local_data_sp
     790      696972 :       b_sp => matrix_b%local_data_sp
     791             : 
     792      696972 :       nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
     793      696972 :       ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
     794             : 
     795             :       ! cries for an accurate_dot_product
     796      696972 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     797             :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
     798           0 :                                    b_sp(1:nrow_local, 1:ncol_local), dp))
     799      696972 :       ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     800             :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
     801           0 :                               b(1:nrow_local, 1:ncol_local))
     802      696972 :       ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     803             :          trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
     804           0 :                               REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
     805             :       ELSE
     806             :          trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
     807      696972 :                                       b(1:nrow_local, 1:ncol_local))
     808             :       END IF
     809             : 
     810      696972 :       CALL group%sum(trace)
     811             : 
     812      696972 :       CALL timestop(handle)
     813             : 
     814      696972 :    END SUBROUTINE cp_fm_trace_a0b0t0
     815             : 
     816             :    #:mute
     817             :       #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
     818             :    #:endmute
     819             : 
     820             : ! **************************************************************************************************
     821             : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
     822             : !> \param matrix_a list of A matrices
     823             : !> \param matrix_b B matrix
     824             : !> \param trace    computed traces
     825             : !> \par History
     826             : !>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
     827             : !> \note \parblock
     828             : !>      Computing the trace requires collective communication between involved MPI processes
     829             : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     830             : !>      the amount of time wasted in such synchronisation by performing one large collective
     831             : !>      operation which involves all the matrices in question.
     832             : !>
     833             : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
     834             : !>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
     835             : !>      'matrix_b' is a single matrix.
     836             : !>      \endparblock
     837             : ! **************************************************************************************************
     838             :    #:for longname, shortname, appendix in types
     839        3066 :       SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
     840             :          TYPE(${longname}$), DIMENSION(:), INTENT(IN)       :: matrix_a
     841             :          TYPE(cp_fm_type), INTENT(IN)                       :: matrix_b
     842             :          REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     843             : 
     844             :          CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
     845             : 
     846             :          INTEGER                                            :: handle, imatrix, n_matrices, &
     847             :                                                                ncols_local, nrows_local
     848             :          LOGICAL                                            :: use_sp_a, use_sp_b
     849        3066 :          REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     850        3066 :          REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     851             :          TYPE(mp_comm_type)                                 :: group
     852             : 
     853        3066 :          CALL timeset(routineN, handle)
     854             : 
     855        3066 :          n_matrices = SIZE(trace)
     856        3066 :          CPASSERT(SIZE(matrix_a) == n_matrices)
     857             : 
     858        3066 :          CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
     859        3066 :          use_sp_b = matrix_b%use_sp
     860             : 
     861        3066 :          IF (use_sp_b) THEN
     862           0 :             ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
     863             :          ELSE
     864        3066 :             ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
     865             :          END IF
     866             : 
     867             : !$OMP PARALLEL DO DEFAULT(NONE), &
     868             : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
     869             : !$OMP             SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
     870        3066 : !$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
     871             : 
     872             :          DO imatrix = 1, n_matrices
     873             : 
     874             :             use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
     875             : 
     876             :             ! assume that the matrices A(i) and B have identical shapes and distribution schemes
     877             :             IF (use_sp_a .AND. use_sp_b) THEN
     878             :                ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
     879             :                trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     880             :             ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     881             :                ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
     882             :                trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     883             :             ELSE
     884             :                CPABORT("Matrices A and B are of different types")
     885             :             END IF
     886             :          END DO
     887             : !$OMP END PARALLEL DO
     888             : 
     889        3066 :          group = matrix_b%matrix_struct%para_env
     890       19110 :          CALL group%sum(trace)
     891             : 
     892        3066 :          CALL timestop(handle)
     893        3066 :       END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
     894             :    #:endfor
     895             : 
     896             : ! **************************************************************************************************
     897             : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
     898             : !> \param matrix_a list of A matrices
     899             : !> \param matrix_b list of B matrices
     900             : !> \param trace    computed traces
     901             : !> \param accurate ...
     902             : !> \par History
     903             : !>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
     904             : !> \note \parblock
     905             : !>      Computing the trace requires collective communication between involved MPI processes
     906             : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     907             : !>      the amount of time wasted in such synchronisation by performing one large collective
     908             : !>      operation which involves all the matrices in question.
     909             : !>
     910             : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
     911             : !>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
     912             : !>      \endparblock
     913             : ! **************************************************************************************************
     914             :    #:for longname1, shortname1, appendix1 in types
     915             :       #:for longname2, shortname2, appendix2 in types
     916      139276 :          SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     917             :             TYPE(${longname1}$), DIMENSION(:), INTENT(IN)      :: matrix_a
     918             :             TYPE(${longname2}$), DIMENSION(:), INTENT(IN)      :: matrix_b
     919             :             REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     920             :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     921             : 
     922             :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
     923             : 
     924             :             INTEGER                                            :: handle, imatrix, n_matrices, &
     925             :                                                                   ncols_local, nrows_local
     926             :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
     927      139276 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     928      139276 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     929             :             TYPE(mp_comm_type)                                 :: group
     930             : 
     931      139276 :             CALL timeset(routineN, handle)
     932             : 
     933      139276 :             n_matrices = SIZE(trace)
     934      139276 :             CPASSERT(SIZE(matrix_a) == n_matrices)
     935      139276 :             CPASSERT(SIZE(matrix_b) == n_matrices)
     936             : 
     937      139276 :             use_accurate_sum = .TRUE.
     938      139276 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
     939             : 
     940             : !$OMP PARALLEL DO DEFAULT(NONE), &
     941             : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
     942             : !$OMP             PRIVATE(nrows_local, use_sp_a, use_sp_b), &
     943      139276 : !$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
     944             :             DO imatrix = 1, n_matrices
     945             :                CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
     946             : 
     947             :                use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
     948             :                use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
     949             : 
     950             :                ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
     951             :                IF (use_sp_a .AND. use_sp_b) THEN
     952             :                   ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
     953             :                   ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
     954             :                   IF (use_accurate_sum) THEN
     955             :                      trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     956             :                   ELSE
     957             :                      trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
     958             :                   END IF
     959             :                ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     960             :                   ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
     961             :                   ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
     962             :                   IF (use_accurate_sum) THEN
     963             :                      trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     964             :                   ELSE
     965             :                      trace(imatrix) = SUM(ldata_a*ldata_b)
     966             :                   END IF
     967             :                ELSE
     968             :                   CPABORT("Matrices A and B are of different types")
     969             :                END IF
     970             :             END DO
     971             : !$OMP END PARALLEL DO
     972             : 
     973      139276 :             group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
     974      464832 :             CALL group%sum(trace)
     975             : 
     976      139276 :             CALL timestop(handle)
     977      139276 :          END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
     978             :       #:endfor
     979             :    #:endfor
     980             : 
     981             : ! **************************************************************************************************
     982             : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
     983             : !> \param matrix_a list of A matrices
     984             : !> \param matrix_b list of B matrices
     985             : !> \param trace    computed traces
     986             : !> \param accurate ...
     987             : ! **************************************************************************************************
     988             :    #:for longname1, shortname1, appendix1 in types
     989             :       #:for longname2, shortname2, appendix2 in types
     990       13338 :          SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     991             :             TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN)   :: matrix_a
     992             :             TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN)   :: matrix_b
     993             :             REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: trace
     994             :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     995             : 
     996             :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
     997             : 
     998             :             INTEGER                                            :: handle, ia, ib, iz, na, nb, ncols_local, &
     999             :                                                                   nrows_local, nz
    1000             :             INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
    1001             :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
    1002             :             REAL(kind=dp)                                      :: t
    1003       13338 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
    1004       13338 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
    1005             :             TYPE(mp_comm_type)                                 :: group
    1006             : 
    1007       13338 :             CALL timeset(routineN, handle)
    1008             : 
    1009       13338 :             nz = SIZE(matrix_a, 1)
    1010       13338 :             CPASSERT(SIZE(matrix_b, 1) == nz)
    1011             : 
    1012       13338 :             na = SIZE(matrix_a, 2)
    1013       13338 :             nb = SIZE(matrix_b, 2)
    1014       13338 :             CPASSERT(SIZE(trace, 1) == na)
    1015       13338 :             CPASSERT(SIZE(trace, 2) == nb)
    1016             : 
    1017       13338 :             use_accurate_sum = .TRUE.
    1018       13338 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
    1019             : 
    1020             :             ! here we use one running index (itrace) instead of two (ia, ib) in order to
    1021             :             ! improve load balance between shared-memory threads
    1022       13338 :             ntraces = na*nb
    1023       13338 :             na8 = INT(na, kind=int_8)
    1024             : 
    1025             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1026             : !$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
    1027             : !$OMP             PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
    1028       13338 : !$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
    1029             :             DO itrace = 1, ntraces
    1030             :                ib8 = (itrace - 1)/na8
    1031             :                ia = INT(itrace - ib8*na8)
    1032             :                ib = INT(ib8) + 1
    1033             : 
    1034             :                t = 0.0_dp
    1035             :                DO iz = 1, nz
    1036             :                   CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
    1037             :                   use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
    1038             :                   use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
    1039             : 
    1040             :                   ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
    1041             :                   IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1042             :                      ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1043             :                      ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1044             :                      IF (use_accurate_sum) THEN
    1045             :                         t = t + accurate_dot_product(ldata_a, ldata_b)
    1046             :                      ELSE
    1047             :                         t = t + SUM(ldata_a*ldata_b)
    1048             :                      END IF
    1049             :                   ELSE IF (use_sp_a .AND. use_sp_b) THEN
    1050             :                      ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1051             :                      ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1052             :                      IF (use_accurate_sum) THEN
    1053             :                         t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1054             :                      ELSE
    1055             :                         t = t + SUM(ldata_a_sp*ldata_b_sp)
    1056             :                      END IF
    1057             :                   ELSE
    1058             :                      CPABORT("Matrices A and B are of different types")
    1059             :                   END IF
    1060             :                END DO
    1061             :                trace(ia, ib) = t
    1062             :             END DO
    1063             : !$OMP END PARALLEL DO
    1064             : 
    1065       13338 :             group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
    1066      623498 :             CALL group%sum(trace)
    1067             : 
    1068       13338 :             CALL timestop(handle)
    1069       13338 :          END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
    1070             :       #:endfor
    1071             :    #:endfor
    1072             : 
    1073             : ! **************************************************************************************************
    1074             : !> \brief multiplies in place by a triangular matrix:
    1075             : !>       matrix_b = alpha op(triangular_matrix) matrix_b
    1076             : !>      or (if side='R')
    1077             : !>       matrix_b = alpha matrix_b op(triangular_matrix)
    1078             : !>      op(triangular_matrix) is:
    1079             : !>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
    1080             : !>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
    1081             : !>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
    1082             : !>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
    1083             : !> \param triangular_matrix the triangular matrix that multiplies the other
    1084             : !> \param matrix_b the matrix that gets multiplied and stores the result
    1085             : !> \param side on which side of matrix_b stays op(triangular_matrix)
    1086             : !>        (defaults to 'L')
    1087             : !> \param transpose_tr if the triangular matrix should be transposed
    1088             : !>        (defaults to false)
    1089             : !> \param invert_tr if the triangular matrix should be inverted
    1090             : !>        (defaults to false)
    1091             : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
    1092             : !>        lower ('L') triangle (defaults to 'U')
    1093             : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
    1094             : !>        be assumed to be 1 (defaults to false)
    1095             : !> \param n_rows the number of rows of the result (defaults to
    1096             : !>        size(matrix_b,1))
    1097             : !> \param n_cols the number of columns of the result (defaults to
    1098             : !>        size(matrix_b,2))
    1099             : !> \param alpha ...
    1100             : !> \par History
    1101             : !>      08.2002 created [fawzi]
    1102             : !> \author Fawzi Mohamed
    1103             : !> \note
    1104             : !>      needs an mpi env
    1105             : ! **************************************************************************************************
    1106      107166 :    SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
    1107             :                                         transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
    1108             :                                         alpha)
    1109             :       TYPE(cp_fm_type), INTENT(IN)                       :: triangular_matrix, matrix_b
    1110             :       CHARACTER, INTENT(IN), OPTIONAL                    :: side
    1111             :       LOGICAL, INTENT(IN), OPTIONAL                      :: transpose_tr, invert_tr
    1112             :       CHARACTER, INTENT(IN), OPTIONAL                    :: uplo_tr
    1113             :       LOGICAL, INTENT(IN), OPTIONAL                      :: unit_diag_tr
    1114             :       INTEGER, INTENT(IN), OPTIONAL                      :: n_rows, n_cols
    1115             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
    1116             : 
    1117             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
    1118             : 
    1119             :       CHARACTER                                          :: side_char, transa, unit_diag, uplo
    1120             :       INTEGER                                            :: handle, m, n
    1121             :       LOGICAL                                            :: invert
    1122             :       REAL(KIND=dp)                                      :: al
    1123             : 
    1124       53583 :       CALL timeset(routineN, handle)
    1125       53583 :       side_char = 'L'
    1126       53583 :       unit_diag = 'N'
    1127       53583 :       uplo = 'U'
    1128       53583 :       transa = 'N'
    1129       53583 :       invert = .FALSE.
    1130       53583 :       al = 1.0_dp
    1131       53583 :       CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
    1132       53583 :       IF (PRESENT(side)) side_char = side
    1133       53583 :       IF (PRESENT(invert_tr)) invert = invert_tr
    1134       53583 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1135       53583 :       IF (PRESENT(unit_diag_tr)) THEN
    1136           0 :          IF (unit_diag_tr) THEN
    1137           0 :             unit_diag = 'U'
    1138             :          ELSE
    1139             :             unit_diag = 'N'
    1140             :          END IF
    1141             :       END IF
    1142       53583 :       IF (PRESENT(transpose_tr)) THEN
    1143        3564 :          IF (transpose_tr) THEN
    1144        1296 :             transa = 'T'
    1145             :          ELSE
    1146             :             transa = 'N'
    1147             :          END IF
    1148             :       END IF
    1149       53583 :       IF (PRESENT(alpha)) al = alpha
    1150       53583 :       IF (PRESENT(n_rows)) m = n_rows
    1151       53583 :       IF (PRESENT(n_cols)) n = n_cols
    1152             : 
    1153       53583 :       IF (invert) THEN
    1154             : 
    1155             : #if defined(__parallel)
    1156             :          CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1157             :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1158             :                      triangular_matrix%matrix_struct%descriptor, &
    1159             :                      matrix_b%local_data(1, 1), 1, 1, &
    1160       43399 :                      matrix_b%matrix_struct%descriptor(1))
    1161             : #else
    1162             :          CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1163             :                     triangular_matrix%local_data(1, 1), &
    1164             :                     SIZE(triangular_matrix%local_data, 1), &
    1165             :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1166             : #endif
    1167             : 
    1168             :       ELSE
    1169             : 
    1170             : #if defined(__parallel)
    1171             :          CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1172             :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1173             :                      triangular_matrix%matrix_struct%descriptor, &
    1174             :                      matrix_b%local_data(1, 1), 1, 1, &
    1175       10184 :                      matrix_b%matrix_struct%descriptor(1))
    1176             : #else
    1177             :          CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1178             :                     triangular_matrix%local_data(1, 1), &
    1179             :                     SIZE(triangular_matrix%local_data, 1), &
    1180             :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1181             : #endif
    1182             : 
    1183             :       END IF
    1184             : 
    1185       53583 :       CALL timestop(handle)
    1186       53583 :    END SUBROUTINE cp_fm_triangular_multiply
    1187             : 
    1188             : ! **************************************************************************************************
    1189             : !> \brief scales a matrix
    1190             : !>      matrix_a = alpha * matrix_b
    1191             : !> \param alpha ...
    1192             : !> \param matrix_a ...
    1193             : !> \note
    1194             : !>      use cp_fm_set_all to zero (avoids problems with nan)
    1195             : ! **************************************************************************************************
    1196       80905 :    SUBROUTINE cp_fm_scale(alpha, matrix_a)
    1197             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1198             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    1199             : 
    1200             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_scale'
    1201             : 
    1202             :       INTEGER                                            :: handle, size_a
    1203             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    1204             : 
    1205       80905 :       CALL timeset(routineN, handle)
    1206             : 
    1207             :       NULLIFY (a)
    1208             : 
    1209       80905 :       a => matrix_a%local_data
    1210       80905 :       size_a = SIZE(a, 1)*SIZE(a, 2)
    1211             : 
    1212       80905 :       CALL DSCAL(size_a, alpha, a, 1)
    1213             : 
    1214       80905 :       CALL timestop(handle)
    1215             : 
    1216       80905 :    END SUBROUTINE cp_fm_scale
    1217             : 
    1218             : ! **************************************************************************************************
    1219             : !> \brief transposes a matrix
    1220             : !>      matrixt = matrix ^ T
    1221             : !> \param matrix ...
    1222             : !> \param matrixt ...
    1223             : !> \note
    1224             : !>      all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
    1225             : ! **************************************************************************************************
    1226       26486 :    SUBROUTINE cp_fm_transpose(matrix, matrixt)
    1227             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, matrixt
    1228             : 
    1229             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
    1230             : 
    1231             :       INTEGER                                  :: handle, ncol_global, &
    1232             :                                                   nrow_global, ncol_globalt, nrow_globalt
    1233       13243 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
    1234             : #if defined(__parallel)
    1235             :       INTEGER, DIMENSION(9)                    :: desca, descc
    1236             : #elif !defined(__MKL)
    1237             :       INTEGER                                  :: i, j
    1238             : #endif
    1239             : 
    1240       13243 :       nrow_global = matrix%matrix_struct%nrow_global
    1241       13243 :       ncol_global = matrix%matrix_struct%ncol_global
    1242       13243 :       nrow_globalt = matrixt%matrix_struct%nrow_global
    1243       13243 :       ncol_globalt = matrixt%matrix_struct%ncol_global
    1244           0 :       CPASSERT(nrow_global == ncol_globalt)
    1245       13243 :       CPASSERT(nrow_globalt == ncol_global)
    1246             : 
    1247       13243 :       CALL timeset(routineN, handle)
    1248             : 
    1249       13243 :       a => matrix%local_data
    1250       13243 :       c => matrixt%local_data
    1251             : 
    1252             : #if defined(__parallel)
    1253      132430 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1254      132430 :       descc(:) = matrixt%matrix_struct%descriptor(:)
    1255       13243 :       CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
    1256             : #elif defined(__MKL)
    1257             :       CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
    1258             : #else
    1259             :       DO j = 1, ncol_global
    1260             :          DO i = 1, nrow_global
    1261             :             c(j, i) = a(i, j)
    1262             :          END DO
    1263             :       END DO
    1264             : #endif
    1265       13243 :       CALL timestop(handle)
    1266             : 
    1267       13243 :    END SUBROUTINE cp_fm_transpose
    1268             : 
    1269             : ! **************************************************************************************************
    1270             : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
    1271             : !> \param matrix the triangular matrix as input, the full matrix as output
    1272             : !> \param work a matrix of the same size as matrix
    1273             : !> \param uplo triangular format; defaults to 'U'
    1274             : !> \author Matthias Krack
    1275             : !> \note
    1276             : !>       the opposite triangular part is irrelevant
    1277             : ! **************************************************************************************************
    1278      340690 :    SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
    1279             : 
    1280             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, work
    1281             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1282             : 
    1283             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
    1284             : 
    1285             :       CHARACTER                                :: myuplo
    1286             :       INTEGER                                  :: handle, icol_global, irow_global, &
    1287             :                                                   mypcol, myprow, ncol_global, &
    1288             :                                                   npcol, nprow, nrow_global
    1289      170345 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1290      170345 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1291             :       TYPE(cp_blacs_env_type), POINTER         :: context
    1292             : 
    1293             : #if defined(__parallel)
    1294             :       INTEGER                                  :: icol_local, irow_local, &
    1295             :                                                   ncol_block, ncol_local, &
    1296             :                                                   nrow_block, nrow_local
    1297             :       INTEGER, DIMENSION(9)                    :: desca, descc
    1298      170345 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: c
    1299      170345 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: c_sp
    1300             : #endif
    1301             : 
    1302      170345 :       myuplo = 'U'
    1303           0 :       IF (PRESENT(uplo)) myuplo = uplo
    1304             : 
    1305      170345 :       nrow_global = matrix%matrix_struct%nrow_global
    1306      170345 :       ncol_global = matrix%matrix_struct%ncol_global
    1307      170345 :       CPASSERT(nrow_global == ncol_global)
    1308      170345 :       nrow_global = work%matrix_struct%nrow_global
    1309      170345 :       ncol_global = work%matrix_struct%ncol_global
    1310      170345 :       CPASSERT(nrow_global == ncol_global)
    1311      170345 :       CPASSERT(matrix%use_sp .EQV. work%use_sp)
    1312             : 
    1313      170345 :       CALL timeset(routineN, handle)
    1314             : 
    1315      170345 :       context => matrix%matrix_struct%context
    1316      170345 :       myprow = context%mepos(1)
    1317      170345 :       mypcol = context%mepos(2)
    1318      170345 :       nprow = context%num_pe(1)
    1319      170345 :       npcol = context%num_pe(2)
    1320             : 
    1321             : #if defined(__parallel)
    1322             : 
    1323      170345 :       nrow_block = matrix%matrix_struct%nrow_block
    1324      170345 :       ncol_block = matrix%matrix_struct%ncol_block
    1325             : 
    1326      170345 :       nrow_local = matrix%matrix_struct%nrow_locals(myprow)
    1327      170345 :       ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
    1328             : 
    1329      170345 :       a => work%local_data
    1330      170345 :       a_sp => work%local_data_sp
    1331     1703450 :       desca(:) = work%matrix_struct%descriptor(:)
    1332      170345 :       c => matrix%local_data
    1333      170345 :       c_sp => matrix%local_data_sp
    1334     1703450 :       descc(:) = matrix%matrix_struct%descriptor(:)
    1335             : 
    1336     5895796 :       DO icol_local = 1, ncol_local
    1337     5725451 :          icol_global = matrix%matrix_struct%col_indices(icol_local)
    1338   259147197 :          DO irow_local = 1, nrow_local
    1339   253251401 :             irow_global = matrix%matrix_struct%row_indices(irow_local)
    1340   258976852 :             IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
    1341   124976066 :                IF (matrix%use_sp) THEN
    1342           0 :                   c_sp(irow_local, icol_local) = 0.0_sp
    1343             :                ELSE
    1344   124976066 :                   c(irow_local, icol_local) = 0.0_dp
    1345             :                END IF
    1346   128275335 :             ELSE IF (irow_global == icol_global) THEN
    1347     3299269 :                IF (matrix%use_sp) THEN
    1348           0 :                   c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
    1349             :                ELSE
    1350     3299269 :                   c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
    1351             :                END IF
    1352             :             END IF
    1353             :          END DO
    1354             :       END DO
    1355             : 
    1356     5895796 :       DO icol_local = 1, ncol_local
    1357   259147197 :       DO irow_local = 1, nrow_local
    1358   258976852 :          IF (matrix%use_sp) THEN
    1359           0 :             a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
    1360             :          ELSE
    1361   253251401 :             a(irow_local, icol_local) = c(irow_local, icol_local)
    1362             :          END IF
    1363             :       END DO
    1364             :       END DO
    1365             : 
    1366      170345 :       IF (matrix%use_sp) THEN
    1367           0 :          CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
    1368             :       ELSE
    1369      170345 :          CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
    1370             :       END IF
    1371             : 
    1372             : #else
    1373             : 
    1374             :       a => matrix%local_data
    1375             :       a_sp => matrix%local_data_sp
    1376             : 
    1377             :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1378             :          DO irow_global = 1, nrow_global
    1379             :          DO icol_global = irow_global + 1, ncol_global
    1380             :             IF (matrix%use_sp) THEN
    1381             :                a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
    1382             :             ELSE
    1383             :                a(icol_global, irow_global) = a(irow_global, icol_global)
    1384             :             END IF
    1385             :          END DO
    1386             :          END DO
    1387             :       ELSE
    1388             :          DO icol_global = 1, ncol_global
    1389             :          DO irow_global = icol_global + 1, nrow_global
    1390             :             IF (matrix%use_sp) THEN
    1391             :                a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
    1392             :             ELSE
    1393             :                a(irow_global, icol_global) = a(icol_global, irow_global)
    1394             :             END IF
    1395             :          END DO
    1396             :          END DO
    1397             :       END IF
    1398             : 
    1399             : #endif
    1400      170345 :       CALL timestop(handle)
    1401             : 
    1402      170345 :    END SUBROUTINE cp_fm_uplo_to_full
    1403             : 
    1404             : ! **************************************************************************************************
    1405             : !> \brief scales column i of matrix a with scaling(i)
    1406             : !> \param matrixa ...
    1407             : !> \param scaling : an array used for scaling the columns,
    1408             : !>                  SIZE(scaling) determines the number of columns to be scaled
    1409             : !> \author Joost VandeVondele
    1410             : !> \note
    1411             : !>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
    1412             : !>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
    1413             : !>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
    1414             : !>      where every vector has a different prefactor
    1415             : ! **************************************************************************************************
    1416      126530 :    SUBROUTINE cp_fm_column_scale(matrixa, scaling)
    1417             :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1418             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1419             : 
    1420             :       INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
    1421             :                                                   npcol, nprow
    1422      126530 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1423      126530 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1424             : #if defined(__parallel)
    1425             :       INTEGER                                  :: icol_global, icol_local, &
    1426             :                                                   ipcol, iprow, irow_local
    1427             : #else
    1428             :       INTEGER                                  :: i
    1429             : #endif
    1430             : 
    1431      126530 :       myprow = matrixa%matrix_struct%context%mepos(1)
    1432      126530 :       mypcol = matrixa%matrix_struct%context%mepos(2)
    1433      126530 :       nprow = matrixa%matrix_struct%context%num_pe(1)
    1434      126530 :       npcol = matrixa%matrix_struct%context%num_pe(2)
    1435             : 
    1436      126530 :       ncol_global = matrixa%matrix_struct%ncol_global
    1437             : 
    1438      126530 :       a => matrixa%local_data
    1439      126530 :       a_sp => matrixa%local_data_sp
    1440      126530 :       IF (matrixa%use_sp) THEN
    1441           0 :          n = SIZE(a_sp, 1)
    1442             :       ELSE
    1443      126530 :          n = SIZE(a, 1)
    1444             :       END IF
    1445      126530 :       k = MIN(SIZE(scaling), ncol_global)
    1446             : 
    1447             : #if defined(__parallel)
    1448             : 
    1449     2129901 :       DO icol_global = 1, k
    1450             :          CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
    1451             :                       nprow, npcol, myprow, mypcol, &
    1452     2003371 :                       irow_local, icol_local, iprow, ipcol)
    1453     2129901 :          IF ((ipcol == mypcol)) THEN
    1454     2003371 :             IF (matrixa%use_sp) THEN
    1455           0 :                CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
    1456             :             ELSE
    1457     2003371 :                CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
    1458             :             END IF
    1459             :          END IF
    1460             :       END DO
    1461             : #else
    1462             :       DO i = 1, k
    1463             :          IF (matrixa%use_sp) THEN
    1464             :             CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
    1465             :          ELSE
    1466             :             CALL DSCAL(n, scaling(i), a(:, i), 1)
    1467             :          END IF
    1468             :       END DO
    1469             : #endif
    1470      126530 :    END SUBROUTINE cp_fm_column_scale
    1471             : 
    1472             : ! **************************************************************************************************
    1473             : !> \brief scales row i of matrix a with scaling(i)
    1474             : !> \param matrixa ...
    1475             : !> \param scaling : an array used for scaling the columns,
    1476             : !> \author JGH
    1477             : !> \note
    1478             : ! **************************************************************************************************
    1479        6276 :    SUBROUTINE cp_fm_row_scale(matrixa, scaling)
    1480             :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1481             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1482             : 
    1483             :       INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
    1484        6276 :       INTEGER, DIMENSION(:), POINTER           :: row_indices
    1485        6276 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1486        6276 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1487             : #if defined(__parallel)
    1488             :       INTEGER                                  :: irow_global, icol, irow
    1489             : #else
    1490             :       INTEGER                                  :: j
    1491             : #endif
    1492             : 
    1493             :       CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
    1494        6276 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1495        6276 :       CPASSERT(SIZE(scaling) == nrow_global)
    1496             : 
    1497        6276 :       a => matrixa%local_data
    1498        6276 :       a_sp => matrixa%local_data_sp
    1499        6276 :       IF (matrixa%use_sp) THEN
    1500        6276 :          n = SIZE(a_sp, 1)
    1501        6276 :          m = SIZE(a_sp, 2)
    1502             :       ELSE
    1503        6276 :          n = SIZE(a, 1)
    1504        6276 :          m = SIZE(a, 2)
    1505             :       END IF
    1506             : 
    1507             : #if defined(__parallel)
    1508       78924 :       DO icol = 1, ncol_local
    1509       78924 :          IF (matrixa%use_sp) THEN
    1510           0 :             DO irow = 1, nrow_local
    1511           0 :                irow_global = row_indices(irow)
    1512           0 :                a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
    1513             :             END DO
    1514             :          ELSE
    1515     6481782 :             DO irow = 1, nrow_local
    1516     6409134 :                irow_global = row_indices(irow)
    1517     6481782 :                a(irow, icol) = scaling(irow_global)*a(irow, icol)
    1518             :             END DO
    1519             :          END IF
    1520             :       END DO
    1521             : #else
    1522             :       IF (matrixa%use_sp) THEN
    1523             :          DO j = 1, m
    1524             :             a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
    1525             :          END DO
    1526             :       ELSE
    1527             :          DO j = 1, m
    1528             :             a(1:n, j) = scaling(1:n)*a(1:n, j)
    1529             :          END DO
    1530             :       END IF
    1531             : #endif
    1532        6276 :    END SUBROUTINE cp_fm_row_scale
    1533             : 
    1534             : ! **************************************************************************************************
    1535             : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
    1536             : !> \param matrix_a the matrix to invert
    1537             : !> \param matrix_inverse the inverse of matrix_a
    1538             : !> \param det_a the determinant of matrix_a
    1539             : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
    1540             : !>                are screened
    1541             : !> \param eigval optionally return matrix eigenvalues/singular values
    1542             : !> \par History
    1543             : !>      note of Jan Wilhelm (12.2015)
    1544             : !>      - computation of determinant corrected
    1545             : !>      - determinant only computed if det_a is present
    1546             : !>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
    1547             : !>      - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
    1548             : !> \author Florian Schiffmann(02.2007)
    1549             : ! **************************************************************************************************
    1550        1588 :    SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
    1551             : 
    1552             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_inverse
    1553             :       REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
    1554             :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
    1555             :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1556             :          INTENT(INOUT), OPTIONAL               :: eigval
    1557             : 
    1558             :       INTEGER                                  :: n
    1559        1588 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1560             :       REAL(KIND=dp)                            :: determinant, my_eps_svd
    1561             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1562             :       TYPE(cp_fm_type)                         :: matrix_lu
    1563             : 
    1564             : #if defined(__parallel)
    1565             :       TYPE(cp_fm_type)                         :: u, vt, sigma, inv_sigma_ut
    1566             :       TYPE(mp_comm_type) :: group
    1567             :       INTEGER                                  :: i, info, liwork, lwork, exponent_of_minus_one
    1568             :       INTEGER, DIMENSION(9)                    :: desca
    1569             :       LOGICAL                                  :: quenched
    1570             :       REAL(KIND=dp)                            :: alpha, beta
    1571        1588 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
    1572        1588 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
    1573             : #else
    1574             :       LOGICAL                                  :: sign
    1575             :       REAL(KIND=dp)                            :: eps1
    1576             : #endif
    1577             : 
    1578        1588 :       my_eps_svd = 0.0_dp
    1579         468 :       IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
    1580             : 
    1581             :       CALL cp_fm_create(matrix=matrix_lu, &
    1582             :                         matrix_struct=matrix_a%matrix_struct, &
    1583        1588 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1584        1588 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1585             : 
    1586        1588 :       a => matrix_lu%local_data
    1587        1588 :       n = matrix_lu%matrix_struct%nrow_global
    1588        4764 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1589       19369 :       ipivot(:) = 0
    1590             : #if defined(__parallel)
    1591        1588 :       IF (my_eps_svd .EQ. 0.0_dp) THEN
    1592             :          ! Use LU decomposition
    1593        1560 :          lwork = 3*n
    1594        1560 :          liwork = 3*n
    1595       15600 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1596        1560 :          CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1597             : 
    1598        1560 :          IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
    1599             : 
    1600        1116 :             ALLOCATE (diag(n))
    1601         916 :             diag(:) = 0.0_dp
    1602         372 :             CALL cp_fm_get_diag(matrix_lu, diag)
    1603             : 
    1604         372 :             exponent_of_minus_one = 0
    1605         372 :             determinant = 1.0_dp
    1606         916 :             DO i = 1, n
    1607         544 :                determinant = determinant*diag(i)
    1608         916 :                IF (ipivot(i) .NE. i) THEN
    1609         224 :                   exponent_of_minus_one = exponent_of_minus_one + 1
    1610             :                END IF
    1611             :             END DO
    1612         372 :             IF (PRESENT(eigval)) THEN
    1613           0 :                CPASSERT(.NOT. ASSOCIATED(eigval))
    1614           0 :                ALLOCATE (eigval(n))
    1615           0 :                eigval(:) = diag
    1616             :             END IF
    1617         372 :             DEALLOCATE (diag)
    1618             : 
    1619         372 :             group = matrix_lu%matrix_struct%para_env
    1620         372 :             CALL group%sum(exponent_of_minus_one)
    1621             : 
    1622         372 :             determinant = determinant*(-1.0_dp)**exponent_of_minus_one
    1623             : 
    1624             :          END IF
    1625             : 
    1626        1560 :          alpha = 0.0_dp
    1627        1560 :          beta = 1.0_dp
    1628        1560 :          CALL cp_fm_set_all(matrix_inverse, alpha, beta)
    1629        1560 :          CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
    1630             :       ELSE
    1631             :          ! Use singular value decomposition
    1632             :          CALL cp_fm_create(matrix=u, &
    1633             :                            matrix_struct=matrix_a%matrix_struct, &
    1634          28 :                            name="LEFT_SINGULAR_MATRIX")
    1635          28 :          CALL cp_fm_set_all(u, alpha=0.0_dp)
    1636             :          CALL cp_fm_create(matrix=vt, &
    1637             :                            matrix_struct=matrix_a%matrix_struct, &
    1638          28 :                            name="RIGHT_SINGULAR_MATRIX")
    1639          28 :          CALL cp_fm_set_all(vt, alpha=0.0_dp)
    1640          84 :          ALLOCATE (diag(n))
    1641          92 :          diag(:) = 0.0_dp
    1642         280 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1643          28 :          ALLOCATE (work(1))
    1644             :          ! Workspace query
    1645          28 :          lwork = -1
    1646             :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1647          28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1648          28 :          lwork = INT(work(1))
    1649          28 :          DEALLOCATE (work)
    1650          84 :          ALLOCATE (work(lwork))
    1651             :          ! SVD
    1652             :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1653          28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1654             :          ! info == n+1 implies homogeneity error when the number of procs is large
    1655             :          ! this likely isnt a problem, but maybe we should handle it separately
    1656          28 :          IF (info /= 0 .AND. info /= n + 1) &
    1657           0 :             CPABORT("Singular value decomposition of matrix failed.")
    1658             :          ! (Pseudo)inverse and (pseudo)determinant
    1659             :          CALL cp_fm_create(matrix=sigma, &
    1660             :                            matrix_struct=matrix_a%matrix_struct, &
    1661          28 :                            name="SINGULAR_VALUE_MATRIX")
    1662          28 :          CALL cp_fm_set_all(sigma, alpha=0.0_dp)
    1663          28 :          determinant = 1.0_dp
    1664          28 :          quenched = .FALSE.
    1665          28 :          IF (PRESENT(eigval)) THEN
    1666          28 :             CPASSERT(.NOT. ASSOCIATED(eigval))
    1667          84 :             ALLOCATE (eigval(n))
    1668         156 :             eigval(:) = diag
    1669             :          END IF
    1670          92 :          DO i = 1, n
    1671          64 :             IF (diag(i) < my_eps_svd) THEN
    1672          18 :                diag(i) = 0.0_dp
    1673          18 :                quenched = .TRUE.
    1674             :             ELSE
    1675          46 :                determinant = determinant*diag(i)
    1676          46 :                diag(i) = 1.0_dp/diag(i)
    1677             :             END IF
    1678          92 :             CALL cp_fm_set_element(sigma, i, i, diag(i))
    1679             :          END DO
    1680          28 :          DEALLOCATE (diag)
    1681          28 :          IF (quenched) &
    1682             :             CALL cp_warn(__LOCATION__, &
    1683             :                          "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
    1684          12 :                          ". At least one singular value has been quenched.")
    1685             :          ! Sigma^-1 * U^T
    1686             :          CALL cp_fm_create(matrix=inv_sigma_ut, &
    1687             :                            matrix_struct=matrix_a%matrix_struct, &
    1688          28 :                            name="SINGULAR_VALUE_MATRIX")
    1689          28 :          CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
    1690             :          CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
    1691          28 :                      u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
    1692             :          ! A^-1 = V * (Sigma^-1 * U^T)
    1693          28 :          CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
    1694             :          CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
    1695          28 :                      inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
    1696             :          ! Clean up
    1697          28 :          DEALLOCATE (work)
    1698          28 :          CALL cp_fm_release(u)
    1699          28 :          CALL cp_fm_release(vt)
    1700          28 :          CALL cp_fm_release(sigma)
    1701         140 :          CALL cp_fm_release(inv_sigma_ut)
    1702             :       END IF
    1703             : #else
    1704             :       IF (my_eps_svd .EQ. 0.0_dp) THEN
    1705             :          sign = .TRUE.
    1706             :          CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
    1707             :                             eval_error=eps1)
    1708             :          CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
    1709             :          IF (PRESENT(eigval)) &
    1710             :             CALL cp_abort(__LOCATION__, &
    1711             :                           "NYI. Eigenvalues not available for return without SCALAPACK.")
    1712             :       ELSE
    1713             :          CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
    1714             :                                      determinant, eigval)
    1715             :       END IF
    1716             : #endif
    1717        1588 :       CALL cp_fm_release(matrix_lu)
    1718        1588 :       DEALLOCATE (ipivot)
    1719        1588 :       IF (PRESENT(det_a)) det_a = determinant
    1720        1588 :    END SUBROUTINE cp_fm_invert
    1721             : 
    1722             : ! **************************************************************************************************
    1723             : !> \brief inverts a triangular matrix
    1724             : !> \param matrix_a ...
    1725             : !> \param uplo_tr triangular format; defaults to 'U'
    1726             : !> \author MI
    1727             : ! **************************************************************************************************
    1728        5062 :    SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
    1729             : 
    1730             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
    1731             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
    1732             : 
    1733             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
    1734             : 
    1735             :       CHARACTER                                :: unit_diag, uplo
    1736             :       INTEGER                                  :: handle, info, ncol_global
    1737        5062 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1738             : #if defined(__parallel)
    1739             :       INTEGER, DIMENSION(9)                    :: desca
    1740             : #endif
    1741             : 
    1742        5062 :       CALL timeset(routineN, handle)
    1743             : 
    1744        5062 :       unit_diag = 'N'
    1745        5062 :       uplo = 'U'
    1746        5062 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1747             : 
    1748        5062 :       ncol_global = matrix_a%matrix_struct%ncol_global
    1749             : 
    1750        5062 :       a => matrix_a%local_data
    1751             : 
    1752             : #if defined(__parallel)
    1753       50620 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1754             : 
    1755        5062 :       CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
    1756             : 
    1757             : #else
    1758             :       CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
    1759             : #endif
    1760             : 
    1761        5062 :       CALL timestop(handle)
    1762        5062 :    END SUBROUTINE cp_fm_triangular_invert
    1763             : 
    1764             : ! **************************************************************************************************
    1765             : !> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
    1766             : !>         the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
    1767             : !>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
    1768             : !> \param matrix_a ...
    1769             : !> \param matrix_r ...
    1770             : !> \param nrow_fact ...
    1771             : !> \param ncol_fact ...
    1772             : !> \param first_row ...
    1773             : !> \param first_col ...
    1774             : !> \author MI
    1775             : ! **************************************************************************************************
    1776       19320 :    SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
    1777             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_r
    1778             :       INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
    1779             :                                                   first_row, first_col
    1780             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1781             : 
    1782             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
    1783             : 
    1784             :       CHARACTER                                :: myuplo
    1785             :       INTEGER                                  :: handle, i, icol, info, irow, &
    1786             :                                                   j, lda, lwork, ncol, &
    1787             :                                                   ndim, nrow
    1788       19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
    1789       19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
    1790             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1791             : #if defined(__parallel)
    1792             :       INTEGER, DIMENSION(9)                    :: desca
    1793             : #endif
    1794             : 
    1795       19320 :       CALL timeset(routineN, handle)
    1796             : 
    1797       19320 :       myuplo = 'U'
    1798       19320 :       IF (PRESENT(uplo)) myuplo = uplo
    1799             : 
    1800       19320 :       ncol = matrix_a%matrix_struct%ncol_global
    1801       19320 :       nrow = matrix_a%matrix_struct%nrow_global
    1802       19320 :       lda = nrow
    1803             : 
    1804       19320 :       a => matrix_a%local_data
    1805             : 
    1806       19320 :       IF (PRESENT(nrow_fact)) nrow = nrow_fact
    1807       19320 :       IF (PRESENT(ncol_fact)) ncol = ncol_fact
    1808       19320 :       irow = 1
    1809       19320 :       IF (PRESENT(first_row)) irow = first_row
    1810       19320 :       icol = 1
    1811       19320 :       IF (PRESENT(first_col)) icol = first_col
    1812             : 
    1813       19320 :       CPASSERT(nrow >= ncol)
    1814       19320 :       ndim = SIZE(a, 2)
    1815       57960 :       ALLOCATE (tau(ndim))
    1816             : 
    1817             : #if defined(__parallel)
    1818             : 
    1819      193200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1820             : 
    1821       19320 :       lwork = -1
    1822       57960 :       ALLOCATE (work(2*ndim))
    1823       19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1824       19320 :       lwork = INT(work(1))
    1825       19320 :       DEALLOCATE (work)
    1826       57960 :       ALLOCATE (work(lwork))
    1827       19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1828             : 
    1829             : #else
    1830             :       lwork = -1
    1831             :       ALLOCATE (work(2*ndim))
    1832             :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1833             :       lwork = INT(work(1))
    1834             :       DEALLOCATE (work)
    1835             :       ALLOCATE (work(lwork))
    1836             :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1837             : 
    1838             : #endif
    1839             : 
    1840       77280 :       ALLOCATE (r_mat(ncol, ncol))
    1841       19320 :       CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
    1842       19320 :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1843       38640 :          DO i = 1, ncol
    1844       38640 :          DO j = i + 1, ncol
    1845       19320 :             r_mat(j, i) = 0.0_dp
    1846             :          END DO
    1847             :          END DO
    1848             :       ELSE
    1849           0 :          DO j = 1, ncol
    1850           0 :          DO i = j + 1, ncol
    1851           0 :             r_mat(i, j) = 0.0_dp
    1852             :          END DO
    1853             :          END DO
    1854             :       END IF
    1855       19320 :       CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
    1856             : 
    1857       19320 :       DEALLOCATE (tau, work, r_mat)
    1858             : 
    1859       19320 :       CALL timestop(handle)
    1860             : 
    1861       19320 :    END SUBROUTINE cp_fm_qr_factorization
    1862             : 
    1863             : ! **************************************************************************************************
    1864             : !> \brief computes the the solution to A*b=A_general using lu decomposition
    1865             : !> \param matrix_a input matrix; will be overwritten
    1866             : !> \param general_a contains the result
    1867             : !> \author Florian Schiffmann
    1868             : ! **************************************************************************************************
    1869        6822 :    SUBROUTINE cp_fm_solve(matrix_a, general_a)
    1870             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, general_a
    1871             : 
    1872             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
    1873             : 
    1874             :       INTEGER                                  :: handle, info, n, nrhs
    1875        6822 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1876             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
    1877             : #if defined(__parallel)
    1878             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1879             : #else
    1880             :       INTEGER                                  :: lda, ldb
    1881             : #endif
    1882             : 
    1883        6822 :       CALL timeset(routineN, handle)
    1884             : 
    1885        6822 :       a => matrix_a%local_data
    1886        6822 :       a_general => general_a%local_data
    1887        6822 :       n = matrix_a%matrix_struct%nrow_global
    1888        6822 :       nrhs = general_a%matrix_struct%ncol_global
    1889       20466 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1890             : 
    1891             : #if defined(__parallel)
    1892       68220 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1893       68220 :       descb(:) = general_a%matrix_struct%descriptor(:)
    1894        6822 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1895             :       CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
    1896        6822 :                    1, 1, descb, info)
    1897             : 
    1898             : #else
    1899             :       lda = SIZE(a, 1)
    1900             :       ldb = SIZE(a_general, 1)
    1901             :       CALL dgetrf(n, n, a, lda, ipivot, info)
    1902             :       CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
    1903             : 
    1904             : #endif
    1905             :       ! info is allowed to be zero
    1906             :       ! this does just signal a zero diagonal element
    1907        6822 :       DEALLOCATE (ipivot)
    1908        6822 :       CALL timestop(handle)
    1909        6822 :    END SUBROUTINE
    1910             : 
    1911             : ! **************************************************************************************************
    1912             : !> \brief Convenience function. Computes the matrix multiplications needed
    1913             : !>        for the multiplication of complex matrices.
    1914             : !>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
    1915             : !> \param transa : 'N' -> normal   'T' -> transpose
    1916             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
    1917             : !> \param transb ...
    1918             : !> \param m ...
    1919             : !> \param n ...
    1920             : !> \param k ...
    1921             : !> \param alpha ...
    1922             : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
    1923             : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
    1924             : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
    1925             : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
    1926             : !> \param beta ...
    1927             : !> \param C_re m x n matrix, real part
    1928             : !> \param C_im m x n matrix, imaginary part
    1929             : !> \param a_first_col ...
    1930             : !> \param a_first_row ...
    1931             : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
    1932             : !> \param b_first_row ...
    1933             : !> \param c_first_col ...
    1934             : !> \param c_first_row ...
    1935             : !> \author Samuel Andermatt
    1936             : !> \note
    1937             : !>      C should have no overlap with A, B
    1938             : ! **************************************************************************************************
    1939           0 :    SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
    1940             :                                  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
    1941             :                                  c_first_row)
    1942             :       CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
    1943             :       INTEGER, INTENT(IN)                                :: m, n, k
    1944             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1945             :       TYPE(cp_fm_type), INTENT(IN)                       :: A_re, A_im, B_re, B_im
    1946             :       REAL(KIND=dp), INTENT(IN)                          :: beta
    1947             :       TYPE(cp_fm_type), INTENT(IN)                       :: C_re, C_im
    1948             :       INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
    1949             :                                                             b_first_row, c_first_col, c_first_row
    1950             : 
    1951             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
    1952             : 
    1953             :       INTEGER                                            :: handle
    1954             : 
    1955           0 :       CALL timeset(routineN, handle)
    1956             : 
    1957             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
    1958             :                       a_first_col=a_first_col, &
    1959             :                       a_first_row=a_first_row, &
    1960             :                       b_first_col=b_first_col, &
    1961             :                       b_first_row=b_first_row, &
    1962             :                       c_first_col=c_first_col, &
    1963           0 :                       c_first_row=c_first_row)
    1964             :       CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
    1965             :                       a_first_col=a_first_col, &
    1966             :                       a_first_row=a_first_row, &
    1967             :                       b_first_col=b_first_col, &
    1968             :                       b_first_row=b_first_row, &
    1969             :                       c_first_col=c_first_col, &
    1970           0 :                       c_first_row=c_first_row)
    1971             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
    1972             :                       a_first_col=a_first_col, &
    1973             :                       a_first_row=a_first_row, &
    1974             :                       b_first_col=b_first_col, &
    1975             :                       b_first_row=b_first_row, &
    1976             :                       c_first_col=c_first_col, &
    1977           0 :                       c_first_row=c_first_row)
    1978             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
    1979             :                       a_first_col=a_first_col, &
    1980             :                       a_first_row=a_first_row, &
    1981             :                       b_first_col=b_first_col, &
    1982             :                       b_first_row=b_first_row, &
    1983             :                       c_first_col=c_first_col, &
    1984           0 :                       c_first_row=c_first_row)
    1985             : 
    1986           0 :       CALL timestop(handle)
    1987             : 
    1988           0 :    END SUBROUTINE cp_complex_fm_gemm
    1989             : 
    1990             : ! **************************************************************************************************
    1991             : !> \brief inverts a matrix using LU decomposition
    1992             : !>        the input matrix will be overwritten
    1993             : !> \param matrix   : input a general square non-singular matrix, outputs its inverse
    1994             : !> \param info_out : optional, if present outputs the info from (p)zgetri
    1995             : !> \author Lianheng Tong
    1996             : ! **************************************************************************************************
    1997           0 :    SUBROUTINE cp_fm_lu_invert(matrix, info_out)
    1998             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    1999             :       INTEGER, INTENT(OUT), OPTIONAL           :: info_out
    2000             : 
    2001             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
    2002             : 
    2003             :       INTEGER :: nrows_global, handle, info, lwork
    2004           0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
    2005             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
    2006             :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: mat_sp
    2007           0 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2008           0 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2009             : #if defined(__parallel)
    2010             :       INTEGER                                  :: liwork
    2011             :       INTEGER, DIMENSION(9)                    :: desca
    2012           0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
    2013             : #else
    2014             :       INTEGER                                  :: lda
    2015             : #endif
    2016             : 
    2017           0 :       CALL timeset(routineN, handle)
    2018             : 
    2019           0 :       mat => matrix%local_data
    2020           0 :       mat_sp => matrix%local_data_sp
    2021           0 :       nrows_global = matrix%matrix_struct%nrow_global
    2022           0 :       CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global)
    2023           0 :       ALLOCATE (ipivot(nrows_global))
    2024             :       ! do LU decomposition
    2025             : #if defined(__parallel)
    2026           0 :       desca = matrix%matrix_struct%descriptor
    2027           0 :       IF (matrix%use_sp) THEN
    2028             :          CALL psgetrf(nrows_global, nrows_global, &
    2029           0 :                       mat_sp, 1, 1, desca, ipivot, info)
    2030             :       ELSE
    2031             :          CALL pdgetrf(nrows_global, nrows_global, &
    2032           0 :                       mat, 1, 1, desca, ipivot, info)
    2033             :       END IF
    2034             : #else
    2035             :       lda = SIZE(mat, 1)
    2036             :       IF (matrix%use_sp) THEN
    2037             :          CALL sgetrf(nrows_global, nrows_global, &
    2038             :                      mat_sp, lda, ipivot, info)
    2039             :       ELSE
    2040             :          CALL dgetrf(nrows_global, nrows_global, &
    2041             :                      mat, lda, ipivot, info)
    2042             :       END IF
    2043             : #endif
    2044           0 :       IF (info /= 0) THEN
    2045           0 :          CALL cp_abort(__LOCATION__, "LU decomposition has failed")
    2046             :       END IF
    2047             :       ! do inversion
    2048           0 :       IF (matrix%use_sp) THEN
    2049           0 :          ALLOCATE (work(1))
    2050             :       ELSE
    2051           0 :          ALLOCATE (work_sp(1))
    2052             :       END IF
    2053             : #if defined(__parallel)
    2054           0 :       ALLOCATE (iwork(1))
    2055           0 :       IF (matrix%use_sp) THEN
    2056             :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2057           0 :                       ipivot, work_sp, -1, iwork, -1, info)
    2058           0 :          lwork = INT(work_sp(1))
    2059           0 :          DEALLOCATE (work_sp)
    2060           0 :          ALLOCATE (work_sp(lwork))
    2061             :       ELSE
    2062             :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2063           0 :                       ipivot, work, -1, iwork, -1, info)
    2064           0 :          lwork = INT(work(1))
    2065           0 :          DEALLOCATE (work)
    2066           0 :          ALLOCATE (work(lwork))
    2067             :       END IF
    2068           0 :       liwork = INT(iwork(1))
    2069           0 :       DEALLOCATE (iwork)
    2070           0 :       ALLOCATE (iwork(liwork))
    2071           0 :       IF (matrix%use_sp) THEN
    2072             :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2073           0 :                       ipivot, work_sp, lwork, iwork, liwork, info)
    2074             :       ELSE
    2075             :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2076           0 :                       ipivot, work, lwork, iwork, liwork, info)
    2077             :       END IF
    2078           0 :       DEALLOCATE (iwork)
    2079             : #else
    2080             :       IF (matrix%use_sp) THEN
    2081             :          CALL sgetri(nrows_global, mat_sp, lda, &
    2082             :                      ipivot, work_sp, -1, info)
    2083             :          lwork = INT(work_sp(1))
    2084             :          DEALLOCATE (work_sp)
    2085             :          ALLOCATE (work_sp(lwork))
    2086             :          CALL sgetri(nrows_global, mat_sp, lda, &
    2087             :                      ipivot, work_sp, lwork, info)
    2088             :       ELSE
    2089             :          CALL dgetri(nrows_global, mat, lda, &
    2090             :                      ipivot, work, -1, info)
    2091             :          lwork = INT(work(1))
    2092             :          DEALLOCATE (work)
    2093             :          ALLOCATE (work(lwork))
    2094             :          CALL dgetri(nrows_global, mat, lda, &
    2095             :                      ipivot, work, lwork, info)
    2096             :       END IF
    2097             : #endif
    2098           0 :       IF (matrix%use_sp) THEN
    2099           0 :          DEALLOCATE (work_sp)
    2100             :       ELSE
    2101           0 :          DEALLOCATE (work)
    2102             :       END IF
    2103           0 :       DEALLOCATE (ipivot)
    2104             : 
    2105           0 :       IF (PRESENT(info_out)) THEN
    2106           0 :          info_out = info
    2107             :       ELSE
    2108           0 :          IF (info /= 0) &
    2109           0 :             CALL cp_abort(__LOCATION__, "LU inversion has failed")
    2110             :       END IF
    2111             : 
    2112           0 :       CALL timestop(handle)
    2113             : 
    2114           0 :    END SUBROUTINE cp_fm_lu_invert
    2115             : 
    2116             : ! **************************************************************************************************
    2117             : !> \brief norm of matrix using (p)dlange
    2118             : !> \param matrix   : input a general matrix
    2119             : !> \param mode     : 'M' max abs element value,
    2120             : !>                   '1' or 'O' one norm, i.e. maximum column sum
    2121             : !>                   'I' infinity norm, i.e. maximum row sum
    2122             : !>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
    2123             : !> \return : the norm according to mode
    2124             : !> \author Lianheng Tong
    2125             : ! **************************************************************************************************
    2126         492 :    FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
    2127             :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2128             :       CHARACTER, INTENT(IN) :: mode
    2129             :       REAL(KIND=dp) :: res
    2130             : 
    2131             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
    2132             : 
    2133             :       INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
    2134             :       REAL(KIND=sp) :: res_sp
    2135             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2136             :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2137         492 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2138         492 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2139             : #if defined(__parallel)
    2140             :       INTEGER, DIMENSION(9) :: desca
    2141             : #else
    2142             :       INTEGER :: lda
    2143             : #endif
    2144             : 
    2145         492 :       CALL timeset(routineN, handle)
    2146             : 
    2147             :       CALL cp_fm_get_info(matrix=matrix, &
    2148             :                           nrow_global=nrows, &
    2149             :                           ncol_global=ncols, &
    2150             :                           nrow_local=nrows_local, &
    2151         492 :                           ncol_local=ncols_local)
    2152         492 :       aa => matrix%local_data
    2153         492 :       aa_sp => matrix%local_data_sp
    2154             : 
    2155             : #if defined(__parallel)
    2156        4920 :       desca = matrix%matrix_struct%descriptor
    2157             :       SELECT CASE (mode)
    2158             :       CASE ('M', 'm')
    2159         492 :          lwork = 1
    2160             :       CASE ('1', 'O', 'o')
    2161         492 :          lwork = ncols_local
    2162             :       CASE ('I', 'i')
    2163           0 :          lwork = nrows_local
    2164             :       CASE ('F', 'f', 'E', 'e')
    2165           0 :          lwork = 1
    2166             :       CASE DEFAULT
    2167         492 :          CPABORT("mode input is not valid")
    2168             :       END SELECT
    2169         492 :       IF (matrix%use_sp) THEN
    2170           0 :          ALLOCATE (work_sp(lwork))
    2171           0 :          res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
    2172           0 :          DEALLOCATE (work_sp)
    2173           0 :          res = REAL(res_sp, KIND=dp)
    2174             :       ELSE
    2175        1476 :          ALLOCATE (work(lwork))
    2176         492 :          res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
    2177         492 :          DEALLOCATE (work)
    2178             :       END IF
    2179             : #else
    2180             :       SELECT CASE (mode)
    2181             :       CASE ('M', 'm')
    2182             :          lwork = 1
    2183             :       CASE ('1', 'O', 'o')
    2184             :          lwork = 1
    2185             :       CASE ('I', 'i')
    2186             :          lwork = nrows
    2187             :       CASE ('F', 'f', 'E', 'e')
    2188             :          lwork = 1
    2189             :       CASE DEFAULT
    2190             :          CPABORT("mode input is not valid")
    2191             :       END SELECT
    2192             :       IF (matrix%use_sp) THEN
    2193             :          ALLOCATE (work_sp(lwork))
    2194             :          lda = SIZE(aa_sp, 1)
    2195             :          res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
    2196             :          DEALLOCATE (work_sp)
    2197             :          res = REAL(res_sp, KIND=dp)
    2198             :       ELSE
    2199             :          ALLOCATE (work(lwork))
    2200             :          lda = SIZE(aa, 1)
    2201             :          res = dlange(mode, nrows, ncols, aa, lda, work)
    2202             :          DEALLOCATE (work)
    2203             :       END IF
    2204             : #endif
    2205             : 
    2206         492 :       CALL timestop(handle)
    2207             : 
    2208         492 :    END FUNCTION cp_fm_norm
    2209             : 
    2210             : ! **************************************************************************************************
    2211             : !> \brief trace of a matrix using pdlatra
    2212             : !> \param matrix   : input a square matrix
    2213             : !> \return : the trace
    2214             : !> \author Lianheng Tong
    2215             : ! **************************************************************************************************
    2216           0 :    FUNCTION cp_fm_latra(matrix) RESULT(res)
    2217             :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2218             :       REAL(KIND=dp) :: res
    2219             : 
    2220             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
    2221             : 
    2222             :       INTEGER :: nrows, ncols, handle
    2223             :       REAL(KIND=sp) :: res_sp
    2224             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2225             :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2226             : #if defined(__parallel)
    2227             :       INTEGER, DIMENSION(9) :: desca
    2228             : #else
    2229             :       INTEGER :: ii
    2230             : #endif
    2231             : 
    2232           0 :       CALL timeset(routineN, handle)
    2233             : 
    2234           0 :       nrows = matrix%matrix_struct%nrow_global
    2235           0 :       ncols = matrix%matrix_struct%ncol_global
    2236           0 :       CPASSERT(nrows .EQ. ncols)
    2237           0 :       aa => matrix%local_data
    2238           0 :       aa_sp => matrix%local_data_sp
    2239             : 
    2240             : #if defined(__parallel)
    2241           0 :       desca = matrix%matrix_struct%descriptor
    2242           0 :       IF (matrix%use_sp) THEN
    2243           0 :          res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
    2244           0 :          res = REAL(res_sp, KIND=dp)
    2245             :       ELSE
    2246           0 :          res = pdlatra(nrows, aa, 1, 1, desca)
    2247             :       END IF
    2248             : #else
    2249             :       IF (matrix%use_sp) THEN
    2250             :          res_sp = 0.0_sp
    2251             :          DO ii = 1, nrows
    2252             :             res_sp = res_sp + aa_sp(ii, ii)
    2253             :          END DO
    2254             :          res = REAL(res_sp, KIND=dp)
    2255             :       ELSE
    2256             :          res = 0.0_dp
    2257             :          DO ii = 1, nrows
    2258             :             res = res + aa(ii, ii)
    2259             :          END DO
    2260             :       END IF
    2261             : #endif
    2262             : 
    2263           0 :       CALL timestop(handle)
    2264             : 
    2265           0 :    END FUNCTION cp_fm_latra
    2266             : 
    2267             : ! **************************************************************************************************
    2268             : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
    2269             : !>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
    2270             : !> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
    2271             : !> \param tau      : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
    2272             : !> \param nrow ...
    2273             : !> \param ncol ...
    2274             : !> \param first_row ...
    2275             : !> \param first_col ...
    2276             : !> \author MI
    2277             : ! **************************************************************************************************
    2278          36 :    SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
    2279             : 
    2280             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2281             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2282             :       INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
    2283             : 
    2284             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
    2285             : 
    2286             :       INTEGER                                            :: handle
    2287             :       INTEGER                                            :: info, lwork
    2288          36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
    2289             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2290             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2291             : #if defined(__parallel)
    2292             :       INTEGER, DIMENSION(9) :: descc
    2293             : #else
    2294             :       INTEGER :: lda
    2295             : #endif
    2296             : 
    2297          36 :       CALL timeset(routineN, handle)
    2298             : 
    2299          36 :       a => matrix%local_data
    2300          36 :       lwork = -1
    2301         108 :       ALLOCATE (work(2*nrow))
    2302         108 :       ALLOCATE (ipiv(ncol))
    2303          36 :       info = 0
    2304             : 
    2305             : #if defined(__parallel)
    2306         360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2307             :       ! Call SCALAPACK routine to get optimal work dimension
    2308          36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2309          36 :       lwork = INT(work(1))
    2310          36 :       DEALLOCATE (work)
    2311         108 :       ALLOCATE (work(lwork))
    2312         244 :       tau = 0.0_dp
    2313         354 :       ipiv = 0
    2314             : 
    2315             :       ! Call SCALAPACK routine to get QR decomposition of CTs
    2316          36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2317             : #else
    2318             :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2319             :       lda = SIZE(a, 1)
    2320             :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2321             :       lwork = INT(work(1))
    2322             :       DEALLOCATE (work)
    2323             :       ALLOCATE (work(lwork))
    2324             :       tau = 0.0_dp
    2325             :       ipiv = 0
    2326             :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2327             : #endif
    2328          36 :       CPASSERT(info == 0)
    2329             : 
    2330          36 :       DEALLOCATE (work)
    2331          36 :       DEALLOCATE (ipiv)
    2332             : 
    2333          36 :       CALL timestop(handle)
    2334             : 
    2335          36 :    END SUBROUTINE cp_fm_pdgeqpf
    2336             : 
    2337             : ! **************************************************************************************************
    2338             : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
    2339             : !>         with orthonormal columns, which is defined as the first N columns of a product of K
    2340             : !>         elementary reflectors of order M
    2341             : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
    2342             : !>                  as returned from PDGEQRF
    2343             : !>                 On exit it contains  the M-by-N distributed matrix Q
    2344             : !> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
    2345             : !> \param nrow ...
    2346             : !> \param first_row ...
    2347             : !> \param first_col ...
    2348             : !> \author MI
    2349             : ! **************************************************************************************************
    2350          36 :    SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
    2351             : 
    2352             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2353             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2354             :       INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
    2355             : 
    2356             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
    2357             : 
    2358             :       INTEGER                                            :: handle
    2359             :       INTEGER                                            :: info, lwork
    2360             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2361             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2362             : #if defined(__parallel)
    2363             :       INTEGER, DIMENSION(9) :: descc
    2364             : #else
    2365             :       INTEGER :: lda
    2366             : #endif
    2367             : 
    2368          36 :       CALL timeset(routineN, handle)
    2369             : 
    2370          36 :       a => matrix%local_data
    2371          36 :       lwork = -1
    2372         108 :       ALLOCATE (work(2*nrow))
    2373          36 :       info = 0
    2374             : 
    2375             : #if defined(__parallel)
    2376         360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2377             : 
    2378          36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2379          36 :       CPASSERT(info == 0)
    2380          36 :       lwork = INT(work(1))
    2381          36 :       DEALLOCATE (work)
    2382         108 :       ALLOCATE (work(lwork))
    2383             : 
    2384             :       ! Call SCALAPACK routine to get Q
    2385          36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2386             : #else
    2387             :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2388             :       lda = SIZE(a, 1)
    2389             :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2390             :       lwork = INT(work(1))
    2391             :       DEALLOCATE (work)
    2392             :       ALLOCATE (work(lwork))
    2393             :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2394             : #endif
    2395          36 :       CPASSERT(INFO == 0)
    2396             : 
    2397          36 :       DEALLOCATE (work)
    2398          36 :       CALL timestop(handle)
    2399             : 
    2400          36 :    END SUBROUTINE cp_fm_pdorgqr
    2401             : 
    2402             : ! **************************************************************************************************
    2403             : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
    2404             : !> \param cs cosine of the rotation angle
    2405             : !> \param sn sinus of the rotation angle
    2406             : !> \param irow ...
    2407             : !> \param jrow ...
    2408             : !> \author Ole Schuett
    2409             : ! **************************************************************************************************
    2410      543328 :    SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
    2411             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2412             :       INTEGER, INTENT(IN)                      :: irow, jrow
    2413             :       REAL(dp), INTENT(IN)                     :: cs, sn
    2414             : 
    2415             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
    2416             :       INTEGER                                  :: handle, nrow, ncol
    2417             : 
    2418             : #if defined(__parallel)
    2419             :       INTEGER                                  :: info, lwork
    2420             :       INTEGER, DIMENSION(9)                    :: desc
    2421      543328 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2422             : #endif
    2423             : 
    2424      543328 :       CALL timeset(routineN, handle)
    2425      543328 :       CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
    2426             : 
    2427             : #if defined(__parallel)
    2428      543328 :       lwork = 2*ncol + 1
    2429     1629984 :       ALLOCATE (work(lwork))
    2430     5433280 :       desc(:) = matrix%matrix_struct%descriptor(:)
    2431             :       CALL pdrot(ncol, &
    2432             :                  matrix%local_data(1, 1), irow, 1, desc, ncol, &
    2433             :                  matrix%local_data(1, 1), jrow, 1, desc, ncol, &
    2434      543328 :                  cs, sn, work, lwork, info)
    2435      543328 :       CPASSERT(info == 0)
    2436      543328 :       DEALLOCATE (work)
    2437             : #else
    2438             :       CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
    2439             : #endif
    2440             : 
    2441      543328 :       CALL timestop(handle)
    2442      543328 :    END SUBROUTINE cp_fm_rot_rows
    2443             : 
    2444             : ! **************************************************************************************************
    2445             : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
    2446             : !> \param cs cosine of the rotation angle
    2447             : !> \param sn sinus of the rotation angle
    2448             : !> \param icol ...
    2449             : !> \param jcol ...
    2450             : !> \author Ole Schuett
    2451             : ! **************************************************************************************************
    2452      612158 :    SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
    2453             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2454             :       INTEGER, INTENT(IN)                      :: icol, jcol
    2455             :       REAL(dp), INTENT(IN)                     :: cs, sn
    2456             : 
    2457             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
    2458             :       INTEGER                                  :: handle, nrow, ncol
    2459             : 
    2460             : #if defined(__parallel)
    2461             :       INTEGER                                  :: info, lwork
    2462             :       INTEGER, DIMENSION(9)                    :: desc
    2463      612158 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2464             : #endif
    2465             : 
    2466      612158 :       CALL timeset(routineN, handle)
    2467      612158 :       CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
    2468             : 
    2469             : #if defined(__parallel)
    2470      612158 :       lwork = 2*nrow + 1
    2471     1836474 :       ALLOCATE (work(lwork))
    2472     6121580 :       desc(:) = matrix%matrix_struct%descriptor(:)
    2473             :       CALL pdrot(nrow, &
    2474             :                  matrix%local_data(1, 1), 1, icol, desc, 1, &
    2475             :                  matrix%local_data(1, 1), 1, jcol, desc, 1, &
    2476      612158 :                  cs, sn, work, lwork, info)
    2477      612158 :       CPASSERT(info == 0)
    2478      612158 :       DEALLOCATE (work)
    2479             : #else
    2480             :       CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
    2481             : #endif
    2482             : 
    2483      612158 :       CALL timestop(handle)
    2484      612158 :    END SUBROUTINE cp_fm_rot_cols
    2485             : 
    2486             : ! **************************************************************************************************
    2487             : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
    2488             : !> \param matrix_a ...
    2489             : !> \param B ...
    2490             : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
    2491             : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
    2492             : !> \param start_row starting index of rows, optional, defaults to 1
    2493             : !> \param start_col starting index of columns, optional, defaults to 1
    2494             : !> \param do_norm ...
    2495             : !> \param do_print ...
    2496             : ! **************************************************************************************************
    2497           0 :    SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
    2498             :                                            do_norm, do_print)
    2499             : 
    2500             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    2501             :       REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: B
    2502             :       INTEGER, INTENT(IN), OPTIONAL                      :: nrows, ncols, start_row, start_col
    2503             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_norm, do_print
    2504             : 
    2505             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
    2506             : 
    2507             :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
    2508             :                  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
    2509             :                  start_col_local, start_row_global, start_row_local, this_col, unit_nr
    2510           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    2511             :       LOGICAL                                            :: my_do_norm, my_do_print
    2512             :       REAL(KIND=dp)                                      :: norm
    2513           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: a
    2514             : 
    2515           0 :       CALL timeset(routineN, handle)
    2516             : 
    2517           0 :       my_do_norm = .TRUE.
    2518           0 :       IF (PRESENT(do_norm)) my_do_norm = do_norm
    2519             : 
    2520           0 :       my_do_print = .FALSE.
    2521           0 :       IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
    2522             : 
    2523           0 :       unit_nr = -1
    2524           0 :       IF (my_do_print) THEN
    2525           0 :          unit_nr = cp_logger_get_default_unit_nr()
    2526           0 :          IF (unit_nr < 1) my_do_print = .FALSE.
    2527             :       END IF
    2528             : 
    2529           0 :       IF (SIZE(B) /= 0) THEN
    2530           0 :          IF (PRESENT(nrows)) THEN
    2531           0 :             nrow_global = nrows
    2532             :          ELSE
    2533           0 :             nrow_global = SIZE(B, 1)
    2534             :          END IF
    2535             : 
    2536           0 :          IF (PRESENT(ncols)) THEN
    2537           0 :             ncol_global = ncols
    2538             :          ELSE
    2539           0 :             ncol_global = SIZE(B, 2)
    2540             :          END IF
    2541             : 
    2542           0 :          IF (PRESENT(start_row)) THEN
    2543           0 :             start_row_global = start_row
    2544             :          ELSE
    2545             :             start_row_global = 1
    2546             :          END IF
    2547             : 
    2548           0 :          IF (PRESENT(start_col)) THEN
    2549           0 :             start_col_global = start_col
    2550             :          ELSE
    2551             :             start_col_global = 1
    2552             :          END IF
    2553             : 
    2554           0 :          end_row_global = start_row_global + nrow_global - 1
    2555           0 :          end_col_global = start_col_global + ncol_global - 1
    2556             : 
    2557             :          CALL cp_fm_get_info(matrix=matrix_a, &
    2558             :                              nrow_global=nrow_global, ncol_global=ncol_global, &
    2559             :                              nrow_local=nrow_local, ncol_local=ncol_local, &
    2560           0 :                              row_indices=row_indices, col_indices=col_indices)
    2561           0 :          IF (end_row_global > nrow_global) THEN
    2562             :             end_row_global = nrow_global
    2563             :          END IF
    2564           0 :          IF (end_col_global > ncol_global) THEN
    2565             :             end_col_global = ncol_global
    2566             :          END IF
    2567             : 
    2568             :          ! find out row/column indices of locally stored matrix elements that
    2569             :          ! needs to be copied.
    2570             :          ! Arrays row_indices and col_indices are assumed to be sorted in
    2571             :          ! ascending order
    2572           0 :          DO start_row_local = 1, nrow_local
    2573           0 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
    2574             :          END DO
    2575             : 
    2576           0 :          DO end_row_local = start_row_local, nrow_local
    2577           0 :             IF (row_indices(end_row_local) > end_row_global) EXIT
    2578             :          END DO
    2579           0 :          end_row_local = end_row_local - 1
    2580             : 
    2581           0 :          DO start_col_local = 1, ncol_local
    2582           0 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
    2583             :          END DO
    2584             : 
    2585           0 :          DO end_col_local = start_col_local, ncol_local
    2586           0 :             IF (col_indices(end_col_local) > end_col_global) EXIT
    2587             :          END DO
    2588           0 :          end_col_local = end_col_local - 1
    2589             : 
    2590           0 :          a => matrix_a%local_data
    2591             : 
    2592           0 :          this_col = col_indices(start_col_local) - start_col_global + 1
    2593             : 
    2594           0 :          B(:, this_col) = a(:, start_col_local)
    2595             : 
    2596           0 :          IF (my_do_norm) THEN
    2597           0 :             norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2598           0 :             B(:, this_col) = B(:, this_col)/norm
    2599           0 :             IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2600             :          END IF
    2601             : 
    2602           0 :          DO i = start_col_local + 1, end_col_local
    2603           0 :             this_col = col_indices(i) - start_col_global + 1
    2604           0 :             B(:, this_col) = a(:, i)
    2605           0 :             DO j = start_col_local, i - 1
    2606           0 :                j_col = col_indices(j) - start_col_global + 1
    2607             :                B(:, this_col) = B(:, this_col) - &
    2608             :                                 accurate_dot_product(B(:, j_col), B(:, this_col))* &
    2609           0 :                                 B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
    2610             :             END DO
    2611             : 
    2612           0 :             IF (my_do_norm) THEN
    2613           0 :                norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2614           0 :                B(:, this_col) = B(:, this_col)/norm
    2615           0 :                IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2616             :             END IF
    2617             : 
    2618             :          END DO
    2619           0 :          CALL matrix_a%matrix_struct%para_env%sum(B)
    2620             :       END IF
    2621             : 
    2622           0 :       CALL timestop(handle)
    2623             : 
    2624           0 :    END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
    2625             : 
    2626             : ! **************************************************************************************************
    2627             : !> \brief Cholesky decomposition
    2628             : !> \param fm_matrix ...
    2629             : !> \param n ...
    2630             : !> \param uplo triangular format; defaults to 'U'
    2631             : ! **************************************************************************************************
    2632       10017 :    SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
    2633             :       TYPE(cp_fm_type)                         :: fm_matrix
    2634             :       INTEGER, INTENT(IN)                      :: n
    2635             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2636             : 
    2637             :       CHARACTER                                :: myuplo
    2638             :       INTEGER                                  :: info
    2639       10017 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2640       10017 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2641             : #if defined(__parallel)
    2642             :       INTEGER, DIMENSION(9)                    :: desca
    2643             : #endif
    2644             : 
    2645       10017 :       myuplo = 'U'
    2646           0 :       IF (PRESENT(uplo)) myuplo = uplo
    2647             : 
    2648       10017 :       a => fm_matrix%local_data
    2649       10017 :       a_sp => fm_matrix%local_data_sp
    2650             : #if defined(__parallel)
    2651      100170 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2652       10017 :       IF (fm_matrix%use_sp) THEN
    2653           0 :          CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2654             :       ELSE
    2655       10017 :          CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
    2656             :       END IF
    2657             : #else
    2658             :       IF (fm_matrix%use_sp) THEN
    2659             :          CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2660             :       ELSE
    2661             :          CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2662             :       END IF
    2663             : #endif
    2664       10017 :       IF (info /= 0) &
    2665           0 :          CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
    2666             : 
    2667       10017 :    END SUBROUTINE cp_fm_potrf
    2668             : 
    2669             : ! **************************************************************************************************
    2670             : !> \brief Invert trianguar matrix
    2671             : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
    2672             : !> \param n size of the matrix to invert
    2673             : !> \param uplo triangular format; defaults to 'U'
    2674             : ! **************************************************************************************************
    2675        9241 :    SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
    2676             :       TYPE(cp_fm_type)                         :: fm_matrix
    2677             :       INTEGER, INTENT(IN)                      :: n
    2678             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2679             : 
    2680             :       CHARACTER                                :: myuplo
    2681        9241 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2682        9241 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2683             :       INTEGER                                  :: info
    2684             : #if defined(__parallel)
    2685             :       INTEGER, DIMENSION(9)                    :: desca
    2686             : #endif
    2687             : 
    2688        9241 :       myuplo = 'U'
    2689           0 :       IF (PRESENT(uplo)) myuplo = uplo
    2690             : 
    2691        9241 :       a => fm_matrix%local_data
    2692        9241 :       a_sp => fm_matrix%local_data_sp
    2693             : #if defined(__parallel)
    2694       92410 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2695        9241 :       IF (fm_matrix%use_sp) THEN
    2696           0 :          CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2697             :       ELSE
    2698        9241 :          CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
    2699             :       END IF
    2700             : #else
    2701             :       IF (fm_matrix%use_sp) THEN
    2702             :          CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2703             :       ELSE
    2704             :          CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2705             :       END IF
    2706             : #endif
    2707        9241 :       CPASSERT(info == 0)
    2708        9241 :    END SUBROUTINE cp_fm_potri
    2709             : 
    2710             : ! **************************************************************************************************
    2711             : !> \brief ...
    2712             : !> \param fm_matrix ...
    2713             : !> \param neig ...
    2714             : !> \param fm_matrixb ...
    2715             : !> \param fm_matrixout ...
    2716             : !> \param op ...
    2717             : !> \param pos ...
    2718             : !> \param transa ...
    2719             : ! **************************************************************************************************
    2720        1184 :    SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
    2721             :       TYPE(cp_fm_type)                               :: fm_matrix
    2722             :       TYPE(cp_fm_type)                               :: fm_matrixb
    2723             :       TYPE(cp_fm_type)                               :: fm_matrixout
    2724             :       INTEGER, INTENT(IN)                            :: neig
    2725             :       CHARACTER(LEN=*), INTENT(IN)                   :: op
    2726             :       CHARACTER(LEN=*), INTENT(IN)                   :: pos
    2727             :       CHARACTER(LEN=*), INTENT(IN)                   :: transa
    2728             : 
    2729        1184 :       REAL(KIND=dp), DIMENSION(:, :), POINTER        :: a, b, outm
    2730        1184 :       REAL(KIND=sp), DIMENSION(:, :), POINTER        :: a_sp, b_sp, outm_sp
    2731             :       INTEGER                                        :: n, itype
    2732             :       REAL(KIND=dp)                                  :: alpha
    2733             : #if defined(__parallel)
    2734             :       INTEGER                                        :: i
    2735             :       INTEGER, DIMENSION(9)                          :: desca, descb, descout
    2736             : #endif
    2737             : 
    2738             :       ! notice b is the cholesky guy
    2739        1184 :       a => fm_matrix%local_data
    2740        1184 :       b => fm_matrixb%local_data
    2741        1184 :       outm => fm_matrixout%local_data
    2742        1184 :       a_sp => fm_matrix%local_data_sp
    2743        1184 :       b_sp => fm_matrixb%local_data_sp
    2744        1184 :       outm_sp => fm_matrixout%local_data_sp
    2745             : 
    2746        1184 :       n = fm_matrix%matrix_struct%nrow_global
    2747        1184 :       itype = 1
    2748             : 
    2749             : #if defined(__parallel)
    2750       11840 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2751       11840 :       descb(:) = fm_matrixb%matrix_struct%descriptor(:)
    2752       11840 :       descout(:) = fm_matrixout%matrix_struct%descriptor(:)
    2753        1184 :       alpha = 1.0_dp
    2754        5316 :       DO i = 1, neig
    2755        5316 :          IF (fm_matrix%use_sp) THEN
    2756           0 :             CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
    2757             :          ELSE
    2758        4132 :             CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
    2759             :          END IF
    2760             :       END DO
    2761        1184 :       IF (op .EQ. "SOLVE") THEN
    2762        1184 :          IF (fm_matrix%use_sp) THEN
    2763             :             CALL pstrsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
    2764           0 :                         outm_sp(1, 1), 1, 1, descout)
    2765             :          ELSE
    2766        1184 :             CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
    2767             :          END IF
    2768             :       ELSE
    2769           0 :          IF (fm_matrix%use_sp) THEN
    2770             :             CALL pstrmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
    2771           0 :                         outm_sp(1, 1), 1, 1, descout)
    2772             :          ELSE
    2773           0 :             CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
    2774             :          END IF
    2775             :       END IF
    2776             : #else
    2777             :       alpha = 1.0_dp
    2778             :       IF (fm_matrix%use_sp) THEN
    2779             :          CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
    2780             :       ELSE
    2781             :          CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
    2782             :       END IF
    2783             :       IF (op .EQ. "SOLVE") THEN
    2784             :          IF (fm_matrix%use_sp) THEN
    2785             :             CALL strsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
    2786             :          ELSE
    2787             :             CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
    2788             :          END IF
    2789             :       ELSE
    2790             :          IF (fm_matrix%use_sp) THEN
    2791             :             CALL strmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
    2792             :          ELSE
    2793             :             CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
    2794             :          END IF
    2795             :       END IF
    2796             : #endif
    2797             : 
    2798        1184 :    END SUBROUTINE cp_fm_cholesky_restore
    2799             : 
    2800             : ! **************************************************************************************************
    2801             : !> \brief Calculates
    2802             : !>        yv = alpha*amat*xv + beta*yv
    2803             : !>        where amat: fm matrix
    2804             : !>              xv  : vector replicated
    2805             : !>              yv  : vector replicated
    2806             : !>        Defaults: alpha = 1, beta = 0
    2807             : ! **************************************************************************************************
    2808       16880 :    SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
    2809             :       TYPE(cp_fm_type), INTENT(IN)                   :: amat
    2810             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)        :: xv
    2811             :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)     :: yv
    2812             :       REAL(KIND=dp), OPTIONAL, INTENT(IN)            :: alpha, beta
    2813             : 
    2814             :       INTEGER                                        :: na, nc, nx, ny
    2815             :       REAL(KIND=dp)                                  :: aval, bval
    2816             : #if defined(__parallel)
    2817             :       INTEGER                                        :: nrl, ncl, ic, ir
    2818       16880 :       INTEGER, DIMENSION(:), POINTER                 :: rind, cind
    2819       16880 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE       :: xvl, yvl, yvm
    2820             : #endif
    2821             : 
    2822       16880 :       IF (amat%use_sp) THEN
    2823           0 :          CPABORT("cp_fm_matvec: SP option not available")
    2824             :       END IF
    2825       16880 :       aval = 1.0_dp
    2826       16880 :       IF (PRESENT(alpha)) aval = alpha
    2827       16880 :       bval = 0.0_dp
    2828       16880 :       IF (PRESENT(beta)) bval = beta
    2829             : 
    2830       16880 :       CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
    2831       16880 :       nx = SIZE(xv)
    2832       16880 :       ny = SIZE(yv)
    2833       16880 :       IF ((nx /= ny) .OR. (nc /= nx)) THEN
    2834           0 :          CPABORT("cp_fm_matvec: incompatible dimensions")
    2835             :       END IF
    2836             : #if defined(__parallel)
    2837             :       CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
    2838       16880 :                           row_indices=rind, col_indices=cind)
    2839      118160 :       ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
    2840      147680 :       DO ic = 1, ncl
    2841      147680 :          xvl(ic) = xv(cind(ic))
    2842             :       END DO
    2843     1665560 :       yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
    2844      147680 :       yvm = 0.0_dp
    2845       82280 :       DO ir = 1, nrl
    2846       82280 :          yvm(rind(ir)) = yvl(ir)
    2847             :       END DO
    2848       16880 :       CALL amat%matrix_struct%para_env%sum(yvm)
    2849       16880 :       IF (bval == 0.0_dp) THEN
    2850       73840 :          yv = aval*yvm
    2851             :       ELSE
    2852       73840 :          yv = bval*yv + aval*yvm
    2853             :       END IF
    2854             : #else
    2855             :       IF (bval == 0.0_dp) THEN
    2856             :          yv = aval*MATMUL(amat%local_data, xv)
    2857             :       ELSE
    2858             :          yv = bval*yv + aval*MATMUL(amat%local_data, xv)
    2859             :       END IF
    2860             : #endif
    2861             : 
    2862       67520 :    END SUBROUTINE cp_fm_matvec
    2863             : 
    2864             : END MODULE cp_fm_basic_linalg

Generated by: LCOV version 1.15