LCOV - code coverage report
Current view: top level - src/fm - cp_fm_basic_linalg.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 617 853 72.3 %
Date: 2024-11-22 07:00:40 Functions: 30 43 69.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief basic linear algebra operations for full matrices
      10             : !> \par History
      11             : !>      08.2002 split out of qs_blacs [fawzi]
      12             : !> \author Fawzi Mohamed
      13             : ! **************************************************************************************************
      14             : MODULE cp_fm_basic_linalg
      15             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      16             :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
      17             :    USE cp_fm_types, ONLY: &
      18             :       cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
      19             :       cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
      20             :       cp_fm_type
      21             :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      22             :                               cp_to_string
      23             :    USE kahan_sum, ONLY: accurate_dot_product, &
      24             :                         accurate_sum
      25             :    USE kinds, ONLY: dp, &
      26             :                     int_8, &
      27             :                     sp
      28             :    USE machine, ONLY: m_memory
      29             :    USE mathlib, ONLY: get_pseudo_inverse_svd, &
      30             :                       invert_matrix
      31             :    USE message_passing, ONLY: mp_comm_type
      32             : #include "../base/base_uses.f90"
      33             : 
      34             :    IMPLICIT NONE
      35             :    PRIVATE
      36             : 
      37             :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      38             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
      39             : 
      40             :    PUBLIC :: cp_fm_scale, & ! scale a matrix
      41             :              cp_fm_scale_and_add, & ! scale and add two matrices
      42             :              cp_fm_geadd, & ! general addition
      43             :              cp_fm_column_scale, & ! scale columns of a matrix
      44             :              cp_fm_row_scale, & ! scale rows of a matrix
      45             :              cp_fm_trace, & ! trace of the transpose(A)*B
      46             :              cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
      47             :              cp_fm_norm, & ! different norms of A
      48             :              cp_fm_schur_product, & ! schur product
      49             :              cp_fm_transpose, & ! transpose a matrix
      50             :              cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix
      51             :              cp_fm_syrk, & ! rank k update
      52             :              cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
      53             :              cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
      54             :              cp_fm_gemm, & ! multiply two matrices
      55             :              cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
      56             :              cp_fm_invert, & ! computes the inverse and determinant
      57             :              cp_fm_frobenius_norm, & ! frobenius norm
      58             :              cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
      59             :              cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
      60             :              cp_fm_solve, & ! solves the equation  A*B=C A and C are input
      61             :              cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
      62             :              cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
      63             :              cp_fm_potrf, & ! Cholesky decomposition
      64             :              cp_fm_potri, & ! Invert triangular matrix
      65             :              cp_fm_rot_rows, & ! rotates two rows
      66             :              cp_fm_rot_cols, & ! rotates two columns
      67             :              cp_fm_cholesky_restore, & ! apply Cholesky decomposition
      68             :              cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
      69             :              cp_fm_det, & ! determinant of a real matrix with correct sign
      70             :              cp_fm_matvec ! matrix-vector multiplication (vector replicated)
      71             : 
      72             :    REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
      73             :    REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
      74             : 
      75             :    INTERFACE cp_fm_trace
      76             :       MODULE PROCEDURE cp_fm_trace_a0b0t0
      77             :       MODULE PROCEDURE cp_fm_trace_a1b0t1_a
      78             :       MODULE PROCEDURE cp_fm_trace_a1b0t1_p
      79             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
      80             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
      81             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
      82             :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
      83             :    END INTERFACE cp_fm_trace
      84             : 
      85             :    INTERFACE cp_fm_contracted_trace
      86             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
      87             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
      88             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
      89             :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
      90             :    END INTERFACE cp_fm_contracted_trace
      91             : CONTAINS
      92             : 
      93             : ! **************************************************************************************************
      94             : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
      95             : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
      96             : ! **************************************************************************************************
      97           0 :    SUBROUTINE cp_fm_det(matrix_a, det_a)
      98             : 
      99             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     100             :       REAL(KIND=dp), INTENT(OUT)               :: det_a
     101             :       REAL(KIND=dp)                            :: determinant
     102             :       TYPE(cp_fm_type)                         :: matrix_lu
     103             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     104             :       INTEGER                                  :: n, i, info, P
     105           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     106             :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     107             : 
     108             : #if defined(__parallel)
     109             :       INTEGER                                  :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
     110             :       INTEGER, DIMENSION(9)                    :: desca
     111             : #endif
     112             : 
     113             :       CALL cp_fm_create(matrix=matrix_lu, &
     114             :                         matrix_struct=matrix_a%matrix_struct, &
     115           0 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     116           0 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     117             : 
     118           0 :       a => matrix_lu%local_data
     119           0 :       n = matrix_lu%matrix_struct%nrow_global
     120           0 :       ALLOCATE (ipivot(n))
     121           0 :       ipivot(:) = 0
     122           0 :       P = 0
     123           0 :       ALLOCATE (diag(n))
     124           0 :       diag(:) = 0.0_dp
     125             : #if defined(__parallel)
     126             :       ! Use LU decomposition
     127           0 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     128           0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     129           0 :       CALL cp_fm_get_diag(matrix_lu, diag)
     130           0 :       determinant = PRODUCT(diag)
     131           0 :       myprow = matrix_lu%matrix_struct%context%mepos(1)
     132           0 :       nprow = matrix_lu%matrix_struct%context%num_pe(1)
     133           0 :       npcol = matrix_lu%matrix_struct%context%num_pe(2)
     134           0 :       nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
     135           0 :       nrow_block = matrix_lu%matrix_struct%nrow_block
     136           0 :       DO irow_local = 1, nrow_local
     137           0 :          i = matrix_lu%matrix_struct%row_indices(irow_local)
     138           0 :          IF (ipivot(irow_local) /= i) P = P + 1
     139             :       END DO
     140           0 :       CALL matrix_lu%matrix_struct%para_env%sum(P)
     141             :       ! very important fix
     142           0 :       P = P/npcol
     143             : #else
     144             :       CALL dgetrf(n, n, a, n, ipivot, info)
     145             :       CALL cp_fm_get_diag(matrix_lu, diag)
     146             :       determinant = PRODUCT(diag)
     147             :       DO i = 1, n
     148             :          IF (ipivot(i) /= i) P = P + 1
     149             :       END DO
     150             : #endif
     151           0 :       DEALLOCATE (ipivot)
     152           0 :       DEALLOCATE (diag)
     153           0 :       CALL cp_fm_release(matrix_lu)
     154           0 :       det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
     155           0 :    END SUBROUTINE cp_fm_det
     156             : 
     157             : ! **************************************************************************************************
     158             : !> \brief calc A <- alpha*A + beta*B
     159             : !>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
     160             : !>      scale A)
     161             : !> \param alpha ...
     162             : !> \param matrix_a ...
     163             : !> \param beta ...
     164             : !> \param matrix_b ...
     165             : ! **************************************************************************************************
     166     1062116 :    SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
     167             : 
     168             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     169             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
     170             :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: beta
     171             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_b
     172             : 
     173             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
     174             : 
     175             :       INTEGER                                            :: handle, size_a, size_b
     176             :       REAL(KIND=dp)                                      :: my_beta
     177     1062116 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     178     1062116 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     179             : 
     180     1062116 :       CALL timeset(routineN, handle)
     181             : 
     182     1062116 :       IF (PRESENT(matrix_b)) THEN
     183     1062116 :          my_beta = 1.0_dp
     184             :       ELSE
     185           0 :          my_beta = 0.0_dp
     186             :       END IF
     187     1062116 :       IF (PRESENT(beta)) my_beta = beta
     188     1062116 :       NULLIFY (a, b)
     189             : 
     190     1062116 :       IF (PRESENT(beta)) THEN
     191     1062116 :          CPASSERT(PRESENT(matrix_b))
     192     1062116 :          IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
     193           0 :             CPWARN("Bad use of routine. Call cp_fm_scale instead")
     194           0 :             CALL cp_fm_scale(alpha + beta, matrix_a)
     195           0 :             CALL timestop(handle)
     196           0 :             RETURN
     197             :          END IF
     198             :       END IF
     199             : 
     200     1062116 :       a => matrix_a%local_data
     201     1062116 :       a_sp => matrix_a%local_data_sp
     202             : 
     203     1062116 :       IF (matrix_a%use_sp) THEN
     204           0 :          size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
     205             :       ELSE
     206     1062116 :          size_a = SIZE(a, 1)*SIZE(a, 2)
     207             :       END IF
     208             : 
     209     1062116 :       IF (alpha /= 1.0_dp) THEN
     210       79066 :          IF (matrix_a%use_sp) THEN
     211           0 :             CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
     212             :          ELSE
     213       79066 :             CALL dscal(size_a, alpha, a, 1)
     214             :          END IF
     215             :       END IF
     216     1062116 :       IF (my_beta .NE. 0.0_dp) THEN
     217     1052690 :          IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
     218           0 :             CPABORT("matrixes must be in the same blacs context")
     219             : 
     220     1052690 :          IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
     221             :                                      matrix_b%matrix_struct)) THEN
     222             : 
     223     1052690 :             b => matrix_b%local_data
     224     1052690 :             b_sp => matrix_b%local_data_sp
     225     1052690 :             IF (matrix_b%use_sp) THEN
     226           0 :                size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
     227             :             ELSE
     228     1052690 :                size_b = SIZE(b, 1)*SIZE(b, 2)
     229             :             END IF
     230     1052690 :             IF (size_a /= size_b) &
     231           0 :                CPABORT("Matrixes must have same locale sizes")
     232             : 
     233     1052690 :             IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     234           0 :                CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
     235     1052690 :             ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     236           0 :                CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
     237     1052690 :             ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     238           0 :                CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
     239             :             ELSE
     240     1052690 :                CALL daxpy(size_a, my_beta, b, 1, a, 1)
     241             :             END IF
     242             : 
     243             :          ELSE
     244             : #ifdef __parallel
     245           0 :             CPABORT("to do (pdscal,pdcopy,pdaxpy)")
     246             : #else
     247             :             CPABORT("")
     248             : #endif
     249             :          END IF
     250             : 
     251             :       END IF
     252             : 
     253     1062116 :       CALL timestop(handle)
     254             : 
     255     1062116 :    END SUBROUTINE cp_fm_scale_and_add
     256             : 
     257             : ! **************************************************************************************************
     258             : !> \brief interface to BLACS geadd:
     259             : !>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
     260             : !>        where opt(matrix_a) can be either:
     261             : !>              'N':  matrix_a
     262             : !>              'T':  matrix_a^T
     263             : !>              'C':  matrix_a^H (Hermitian conjugate)
     264             : !>        note that this is a level three routine, use cp_fm_scale_and_add if that
     265             : !>        is sufficient for your needs
     266             : !> \param alpha  : complex scalar
     267             : !> \param trans  : 'N' normal, 'T' transposed
     268             : !> \param matrix_a : input matrix_a
     269             : !> \param beta   : complex scalar
     270             : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
     271             : !> \author  Lianheng Tong
     272             : ! **************************************************************************************************
     273          96 :    SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
     274             :       REAL(KIND=dp), INTENT(IN) :: alpha, beta
     275             :       CHARACTER, INTENT(IN) :: trans
     276             :       TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
     277             : 
     278             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
     279             : 
     280             :       INTEGER :: nrow_global, ncol_global, handle
     281             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
     282             : #if defined(__parallel)
     283             :       INTEGER, DIMENSION(9) :: desca, descb
     284             : #else
     285             :       INTEGER :: ii, jj
     286             : #endif
     287             : 
     288          96 :       CALL timeset(routineN, handle)
     289             : 
     290          96 :       nrow_global = matrix_a%matrix_struct%nrow_global
     291          96 :       ncol_global = matrix_a%matrix_struct%ncol_global
     292          96 :       CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
     293          96 :       CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
     294             : 
     295          96 :       aa => matrix_a%local_data
     296          96 :       bb => matrix_b%local_data
     297             : 
     298             : #if defined(__parallel)
     299         960 :       desca = matrix_a%matrix_struct%descriptor
     300         960 :       descb = matrix_b%matrix_struct%descriptor
     301             :       CALL pdgeadd(trans, &
     302             :                    nrow_global, &
     303             :                    ncol_global, &
     304             :                    alpha, &
     305             :                    aa, &
     306             :                    1, 1, &
     307             :                    desca, &
     308             :                    beta, &
     309             :                    bb, &
     310             :                    1, 1, &
     311          96 :                    descb)
     312             : #else
     313             :       ! dgeadd is not a standard BLAS function, although is implemented
     314             :       ! in some libraries like OpenBLAS, so not going to use it here
     315             :       SELECT CASE (trans)
     316             :       CASE ('T')
     317             :          DO jj = 1, ncol_global
     318             :             DO ii = 1, nrow_global
     319             :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
     320             :             END DO
     321             :          END DO
     322             :       CASE DEFAULT
     323             :          DO jj = 1, ncol_global
     324             :             DO ii = 1, nrow_global
     325             :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
     326             :             END DO
     327             :          END DO
     328             :       END SELECT
     329             : #endif
     330             : 
     331          96 :       CALL timestop(handle)
     332             : 
     333          96 :    END SUBROUTINE cp_fm_geadd
     334             : 
     335             : ! **************************************************************************************************
     336             : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
     337             : !>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
     338             : !> \param matrix_a ...
     339             : !> \param almost_determinant ...
     340             : !> \param correct_sign ...
     341             : !> \par History
     342             : !>      added correct_sign 02.07 (fschiff)
     343             : !> \author Joost VandeVondele
     344             : !> \note
     345             : !>      - matrix_a is overwritten
     346             : !>      - the sign of the determinant might be wrong
     347             : !>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
     348             : !>      - one should be able to find out if ipivot is an even or an odd permutation...
     349             : !>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
     350             : !>      - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
     351             : ! **************************************************************************************************
     352           0 :    SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
     353             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_a
     354             :       REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
     355             :       LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
     356             : 
     357             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
     358             : 
     359             :       INTEGER                                  :: handle, i, info, n
     360           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     361             :       REAL(KIND=dp)                            :: determinant
     362             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     363             : #if defined(__parallel)
     364             :       INTEGER, DIMENSION(9)                    :: desca
     365           0 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     366             : #else
     367             :       INTEGER                                  :: lda
     368             : #endif
     369             : 
     370           0 :       CALL timeset(routineN, handle)
     371             : 
     372           0 :       a => matrix_a%local_data
     373           0 :       n = matrix_a%matrix_struct%nrow_global
     374           0 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
     375             : 
     376             : #if defined(__parallel)
     377             :       MARK_USED(correct_sign)
     378           0 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     379           0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     380             : 
     381           0 :       ALLOCATE (diag(n))
     382           0 :       diag(:) = 0.0_dp
     383           0 :       CALL cp_fm_get_diag(matrix_a, diag)
     384           0 :       determinant = 1.0_dp
     385           0 :       DO i = 1, n
     386           0 :          determinant = determinant*diag(i)
     387             :       END DO
     388           0 :       DEALLOCATE (diag)
     389             : #else
     390             :       lda = SIZE(a, 1)
     391             :       CALL dgetrf(n, n, a, lda, ipivot, info)
     392             :       determinant = 1.0_dp
     393             :       IF (correct_sign) THEN
     394             :          DO i = 1, n
     395             :             IF (ipivot(i) .NE. i) THEN
     396             :                determinant = -determinant*a(i, i)
     397             :             ELSE
     398             :                determinant = determinant*a(i, i)
     399             :             END IF
     400             :          END DO
     401             :       ELSE
     402             :          DO i = 1, n
     403             :             determinant = determinant*a(i, i)
     404             :          END DO
     405             :       END IF
     406             : #endif
     407             :       ! info is allowed to be zero
     408             :       ! this does just signal a zero diagonal element
     409           0 :       DEALLOCATE (ipivot)
     410           0 :       almost_determinant = determinant ! notice that the sign is random
     411           0 :       CALL timestop(handle)
     412           0 :    END SUBROUTINE
     413             : 
     414             : ! **************************************************************************************************
     415             : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
     416             : !> \param transa : 'N' -> normal   'T' -> transpose
     417             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     418             : !> \param transb ...
     419             : !> \param m ...
     420             : !> \param n ...
     421             : !> \param k ...
     422             : !> \param alpha ...
     423             : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
     424             : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
     425             : !> \param beta ...
     426             : !> \param matrix_c : m x n matrix
     427             : !> \param a_first_col ...
     428             : !> \param a_first_row ...
     429             : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
     430             : !> \param b_first_row ...
     431             : !> \param c_first_col ...
     432             : !> \param c_first_row ...
     433             : !> \author Matthias Krack
     434             : !> \note
     435             : !>      matrix_c should have no overlap with matrix_a, matrix_b
     436             : ! **************************************************************************************************
     437         550 :    SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
     438             :                          matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
     439             :                          c_first_col, c_first_row)
     440             : 
     441             :       CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
     442             :       INTEGER, INTENT(IN)                      :: m, n, k
     443             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     444             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     445             :       REAL(KIND=dp), INTENT(IN)                :: beta
     446             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_c
     447             :       INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
     448             :                                                   b_first_col, b_first_row, &
     449             :                                                   c_first_col, c_first_row
     450             : 
     451             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
     452             : 
     453             :       INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
     454             :                                                   j_b, j_c
     455             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     456         550 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp, b_sp, c_sp
     457             : #if defined(__parallel)
     458             :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     459             : #else
     460             :       INTEGER                                  :: lda, ldb, ldc
     461             : #endif
     462             : 
     463         550 :       CALL timeset(routineN, handle)
     464             : 
     465             :       !sample peak memory
     466         550 :       CALL m_memory()
     467             : 
     468         550 :       a => matrix_a%local_data
     469         550 :       b => matrix_b%local_data
     470         550 :       c => matrix_c%local_data
     471             : 
     472         550 :       a_sp => matrix_a%local_data_sp
     473         550 :       b_sp => matrix_b%local_data_sp
     474         550 :       c_sp => matrix_c%local_data_sp
     475             : 
     476         550 :       IF (PRESENT(a_first_row)) THEN
     477           0 :          i_a = a_first_row
     478             :       ELSE
     479         550 :          i_a = 1
     480             :       END IF
     481         550 :       IF (PRESENT(a_first_col)) THEN
     482           0 :          j_a = a_first_col
     483             :       ELSE
     484         550 :          j_a = 1
     485             :       END IF
     486         550 :       IF (PRESENT(b_first_row)) THEN
     487           0 :          i_b = b_first_row
     488             :       ELSE
     489         550 :          i_b = 1
     490             :       END IF
     491         550 :       IF (PRESENT(b_first_col)) THEN
     492           0 :          j_b = b_first_col
     493             :       ELSE
     494         550 :          j_b = 1
     495             :       END IF
     496         550 :       IF (PRESENT(c_first_row)) THEN
     497           0 :          i_c = c_first_row
     498             :       ELSE
     499         550 :          i_c = 1
     500             :       END IF
     501         550 :       IF (PRESENT(c_first_col)) THEN
     502           0 :          j_c = c_first_col
     503             :       ELSE
     504         550 :          j_c = 1
     505             :       END IF
     506             : 
     507             : #if defined(__parallel)
     508             : 
     509        5500 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     510        5500 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     511        5500 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     512             : 
     513         550 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     514             : 
     515             :          CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
     516           0 :                      descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
     517             : 
     518         550 :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     519             : 
     520             :          CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
     521         550 :                      descb, beta, c, i_c, j_c, descc)
     522             : 
     523             :       ELSE
     524           0 :          CPABORT("Mixed precision gemm NYI")
     525             :       END IF
     526             : #else
     527             : 
     528             :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     529             : 
     530             :          lda = SIZE(a_sp, 1)
     531             :          ldb = SIZE(b_sp, 1)
     532             :          ldc = SIZE(c_sp, 1)
     533             : 
     534             :          CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
     535             :                     REAL(beta, sp), c_sp(i_c, j_c), ldc)
     536             : 
     537             :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     538             : 
     539             :          lda = SIZE(a, 1)
     540             :          ldb = SIZE(b, 1)
     541             :          ldc = SIZE(c, 1)
     542             : 
     543             :          CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
     544             : 
     545             :       ELSE
     546             :          CPABORT("Mixed precision gemm NYI")
     547             :       END IF
     548             : 
     549             : #endif
     550         550 :       CALL timestop(handle)
     551             : 
     552         550 :    END SUBROUTINE cp_fm_gemm
     553             : 
     554             : ! **************************************************************************************************
     555             : !> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
     556             : !>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
     557             : !>      where matrix_a is symmetric
     558             : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
     559             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     560             : !> \param uplo ...
     561             : !> \param m ...
     562             : !> \param n ...
     563             : !> \param alpha ...
     564             : !> \param matrix_a : m x m matrix
     565             : !> \param matrix_b : m x n matrix
     566             : !> \param beta ...
     567             : !> \param matrix_c : m x n matrix
     568             : !> \author Matthias Krack
     569             : !> \note
     570             : !>      matrix_c should have no overlap with matrix_a, matrix_b
     571             : !>      all matrices in QS are upper triangular, so uplo should be 'U' always
     572             : !>      matrix_a is always an m x m matrix
     573             : !>      it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !)
     574             : ! **************************************************************************************************
     575      143496 :    SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
     576             : 
     577             :       CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
     578             :       INTEGER, INTENT(IN)                      :: m, n
     579             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     580             :       TYPE(cp_fm_type), INTENT(IN)                :: matrix_a, matrix_b
     581             :       REAL(KIND=dp), INTENT(IN)                :: beta
     582             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_c
     583             : 
     584             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
     585             : 
     586             :       INTEGER                                  :: handle
     587      143496 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     588             : #if defined(__parallel)
     589             :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     590             : #else
     591             :       INTEGER                                  :: lda, ldb, ldc
     592             : #endif
     593             : 
     594      143496 :       CALL timeset(routineN, handle)
     595             : 
     596      143496 :       a => matrix_a%local_data
     597      143496 :       b => matrix_b%local_data
     598      143496 :       c => matrix_c%local_data
     599             : 
     600             : #if defined(__parallel)
     601             : 
     602     1434960 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     603     1434960 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     604     1434960 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     605             : 
     606      143496 :       CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
     607             : 
     608             : #else
     609             : 
     610             :       lda = matrix_a%matrix_struct%local_leading_dimension
     611             :       ldb = matrix_b%matrix_struct%local_leading_dimension
     612             :       ldc = matrix_c%matrix_struct%local_leading_dimension
     613             : 
     614             :       CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
     615             : 
     616             : #endif
     617      143496 :       CALL timestop(handle)
     618             : 
     619      143496 :    END SUBROUTINE cp_fm_symm
     620             : 
     621             : ! **************************************************************************************************
     622             : !> \brief computes the Frobenius norm of matrix_a
     623             : !> \brief computes the Frobenius norm of matrix_a
     624             : !> \param matrix_a : m x n matrix
     625             : !> \return ...
     626             : !> \author VW
     627             : ! **************************************************************************************************
     628        8030 :    FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
     629             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     630             :       REAL(KIND=dp)                            :: norm
     631             : 
     632             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
     633             : 
     634             :       INTEGER                                  :: handle, size_a
     635        8030 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     636             :       REAL(KIND=dp), EXTERNAL                  :: DDOT
     637             : #if defined(__parallel)
     638             :       TYPE(mp_comm_type)                       :: group
     639             : #endif
     640             : 
     641        8030 :       CALL timeset(routineN, handle)
     642             : 
     643             :       norm = 0.0_dp
     644        8030 :       a => matrix_a%local_data
     645        8030 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     646        8030 :       norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
     647             : #if defined(__parallel)
     648        8030 :       group = matrix_a%matrix_struct%para_env
     649        8030 :       CALL group%sum(norm)
     650             : #endif
     651        8030 :       norm = SQRT(norm)
     652             : 
     653        8030 :       CALL timestop(handle)
     654             : 
     655        8030 :    END FUNCTION cp_fm_frobenius_norm
     656             : 
     657             : ! **************************************************************************************************
     658             : !> \brief performs a rank-k update of a symmetric matrix_c
     659             : !>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
     660             : !> \param uplo : 'U'   ('L')
     661             : !> \param trans : 'N'  ('T')
     662             : !> \param k : number of cols to use in matrix_a
     663             : !>      ia,ja ::  1,1 (could be used for selecting subblock of a)
     664             : !> \param alpha ...
     665             : !> \param matrix_a ...
     666             : !> \param ia ...
     667             : !> \param ja ...
     668             : !> \param beta ...
     669             : !> \param matrix_c ...
     670             : !> \author Matthias Krack
     671             : !> \note
     672             : !>      In QS uplo should 'U' (upper part updated)
     673             : ! **************************************************************************************************
     674        7006 :    SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
     675             :       CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
     676             :       INTEGER, INTENT(IN)                      :: k
     677             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     678             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     679             :       INTEGER, INTENT(IN)                      :: ia, ja
     680             :       REAL(KIND=dp), INTENT(IN)                :: beta
     681             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_c
     682             : 
     683             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
     684             : 
     685             :       INTEGER                                  :: handle, n
     686        7006 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
     687             : #if defined(__parallel)
     688             :       INTEGER, DIMENSION(9)                    :: desca, descc
     689             : #else
     690             :       INTEGER                                  :: lda, ldc
     691             : #endif
     692             : 
     693        7006 :       CALL timeset(routineN, handle)
     694             : 
     695        7006 :       n = matrix_c%matrix_struct%nrow_global
     696             : 
     697        7006 :       a => matrix_a%local_data
     698        7006 :       c => matrix_c%local_data
     699             : 
     700             : #if defined(__parallel)
     701             : 
     702       70060 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     703       70060 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     704             : 
     705        7006 :       CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
     706             : 
     707             : #else
     708             : 
     709             :       lda = SIZE(a, 1)
     710             :       ldc = SIZE(c, 1)
     711             : 
     712             :       CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
     713             : 
     714             : #endif
     715        7006 :       CALL timestop(handle)
     716             : 
     717        7006 :    END SUBROUTINE cp_fm_syrk
     718             : 
     719             : ! **************************************************************************************************
     720             : !> \brief computes the schur product of two matrices
     721             : !>       c_ij = a_ij * b_ij
     722             : !> \param matrix_a ...
     723             : !> \param matrix_b ...
     724             : !> \param matrix_c ...
     725             : !> \author Joost VandeVondele
     726             : ! **************************************************************************************************
     727        9190 :    SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
     728             : 
     729             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b, matrix_c
     730             : 
     731             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
     732             : 
     733             :       INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
     734             :                                                             myprow, ncol_local, npcol, nprow, &
     735             :                                                             nrow_local
     736        9190 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
     737             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     738             : 
     739        9190 :       CALL timeset(routineN, handle)
     740             : 
     741        9190 :       context => matrix_a%matrix_struct%context
     742        9190 :       myprow = context%mepos(1)
     743        9190 :       mypcol = context%mepos(2)
     744        9190 :       nprow = context%num_pe(1)
     745        9190 :       npcol = context%num_pe(2)
     746             : 
     747        9190 :       a => matrix_a%local_data
     748        9190 :       b => matrix_b%local_data
     749        9190 :       c => matrix_c%local_data
     750             : 
     751        9190 :       nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
     752        9190 :       ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
     753             : 
     754       99952 :       DO icol_local = 1, ncol_local
     755     6860227 :          DO irow_local = 1, nrow_local
     756     6851037 :             c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
     757             :          END DO
     758             :       END DO
     759             : 
     760        9190 :       CALL timestop(handle)
     761             : 
     762        9190 :    END SUBROUTINE cp_fm_schur_product
     763             : 
     764             : ! **************************************************************************************************
     765             : !> \brief returns the trace of matrix_a^T matrix_b, i.e
     766             : !>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
     767             : !> \param matrix_a a matrix
     768             : !> \param matrix_b another matrix
     769             : !> \param trace ...
     770             : !> \par History
     771             : !>      11.06.2001 Creation (Matthias Krack)
     772             : !>      12.2002 added doc [fawzi]
     773             : !> \author Matthias Krack
     774             : !> \note
     775             : !>      note the transposition of matrix_a!
     776             : ! **************************************************************************************************
     777      693910 :    SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
     778             : 
     779             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b
     780             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     781             : 
     782             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
     783             : 
     784             :       INTEGER                                            :: handle, mypcol, myprow, ncol_local, &
     785             :                                                             npcol, nprow, nrow_local
     786      693910 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     787      693910 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     788             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     789             :       TYPE(mp_comm_type)                                 :: group
     790             : 
     791      693910 :       CALL timeset(routineN, handle)
     792             : 
     793      693910 :       context => matrix_a%matrix_struct%context
     794      693910 :       myprow = context%mepos(1)
     795      693910 :       mypcol = context%mepos(2)
     796      693910 :       nprow = context%num_pe(1)
     797      693910 :       npcol = context%num_pe(2)
     798             : 
     799      693910 :       group = matrix_a%matrix_struct%para_env
     800             : 
     801      693910 :       a => matrix_a%local_data
     802      693910 :       b => matrix_b%local_data
     803             : 
     804      693910 :       a_sp => matrix_a%local_data_sp
     805      693910 :       b_sp => matrix_b%local_data_sp
     806             : 
     807      693910 :       nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
     808      693910 :       ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
     809             : 
     810             :       ! cries for an accurate_dot_product
     811      693910 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     812             :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
     813           0 :                                    b_sp(1:nrow_local, 1:ncol_local), dp))
     814      693910 :       ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     815             :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
     816           0 :                               b(1:nrow_local, 1:ncol_local))
     817      693910 :       ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     818             :          trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
     819           0 :                               REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
     820             :       ELSE
     821             :          trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
     822      693910 :                                       b(1:nrow_local, 1:ncol_local))
     823             :       END IF
     824             : 
     825      693910 :       CALL group%sum(trace)
     826             : 
     827      693910 :       CALL timestop(handle)
     828             : 
     829      693910 :    END SUBROUTINE cp_fm_trace_a0b0t0
     830             : 
     831             :    #:mute
     832             :       #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
     833             :    #:endmute
     834             : 
     835             : ! **************************************************************************************************
     836             : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
     837             : !> \param matrix_a list of A matrices
     838             : !> \param matrix_b B matrix
     839             : !> \param trace    computed traces
     840             : !> \par History
     841             : !>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
     842             : !> \note \parblock
     843             : !>      Computing the trace requires collective communication between involved MPI processes
     844             : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     845             : !>      the amount of time wasted in such synchronisation by performing one large collective
     846             : !>      operation which involves all the matrices in question.
     847             : !>
     848             : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
     849             : !>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
     850             : !>      'matrix_b' is a single matrix.
     851             : !>      \endparblock
     852             : ! **************************************************************************************************
     853             :    #:for longname, shortname, appendix in types
     854        3030 :       SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
     855             :          TYPE(${longname}$), DIMENSION(:), INTENT(in)       :: matrix_a
     856             :          TYPE(cp_fm_type), INTENT(IN)                       :: matrix_b
     857             :          REAL(kind=dp), DIMENSION(:), INTENT(out)           :: trace
     858             : 
     859             :          CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
     860             : 
     861             :          INTEGER                                            :: handle, imatrix, n_matrices, &
     862             :                                                                ncols_local, nrows_local
     863             :          LOGICAL                                            :: use_sp_a, use_sp_b
     864        3030 :          REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     865        3030 :          REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     866             :          TYPE(mp_comm_type)                                 :: group
     867             : 
     868        3030 :          CALL timeset(routineN, handle)
     869             : 
     870        3030 :          n_matrices = SIZE(trace)
     871        3030 :          CPASSERT(SIZE(matrix_a) == n_matrices)
     872             : 
     873        3030 :          CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
     874        3030 :          use_sp_b = matrix_b%use_sp
     875             : 
     876        3030 :          IF (use_sp_b) THEN
     877           0 :             ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
     878             :          ELSE
     879        3030 :             ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
     880             :          END IF
     881             : 
     882             : !$OMP PARALLEL DO DEFAULT(NONE), &
     883             : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
     884             : !$OMP             SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
     885        3030 : !$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
     886             : 
     887             :          DO imatrix = 1, n_matrices
     888             : 
     889             :             use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
     890             : 
     891             :             ! assume that the matrices A(i) and B have identical shapes and distribution schemes
     892             :             IF (use_sp_a .AND. use_sp_b) THEN
     893             :                ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
     894             :                trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     895             :             ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     896             :                ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
     897             :                trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     898             :             ELSE
     899             :                CPABORT("Matrices A and B are of different types")
     900             :             END IF
     901             :          END DO
     902             : !$OMP END PARALLEL DO
     903             : 
     904        3030 :          group = matrix_b%matrix_struct%para_env
     905       18882 :          CALL group%sum(trace)
     906             : 
     907        3030 :          CALL timestop(handle)
     908        3030 :       END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
     909             :    #:endfor
     910             : 
     911             : ! **************************************************************************************************
     912             : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
     913             : !> \param matrix_a list of A matrices
     914             : !> \param matrix_b list of B matrices
     915             : !> \param trace    computed traces
     916             : !> \param accurate ...
     917             : !> \par History
     918             : !>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
     919             : !> \note \parblock
     920             : !>      Computing the trace requires collective communication between involved MPI processes
     921             : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     922             : !>      the amount of time wasted in such synchronisation by performing one large collective
     923             : !>      operation which involves all the matrices in question.
     924             : !>
     925             : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
     926             : !>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
     927             : !>      \endparblock
     928             : ! **************************************************************************************************
     929             :    #:for longname1, shortname1, appendix1 in types
     930             :       #:for longname2, shortname2, appendix2 in types
     931      138648 :          SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     932             :             TYPE(${longname1}$), DIMENSION(:), INTENT(in)       :: matrix_a
     933             :             TYPE(${longname2}$), DIMENSION(:), INTENT(in)       :: matrix_b
     934             :             REAL(kind=dp), DIMENSION(:), INTENT(out)           :: trace
     935             :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     936             : 
     937             :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
     938             : 
     939             :             INTEGER                                            :: handle, imatrix, n_matrices, &
     940             :                                                                   ncols_local, nrows_local
     941             :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
     942      138648 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     943      138648 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     944             :             TYPE(mp_comm_type)                                 :: group
     945             : 
     946      138648 :             CALL timeset(routineN, handle)
     947             : 
     948      138648 :             n_matrices = SIZE(trace)
     949      138648 :             CPASSERT(SIZE(matrix_a) == n_matrices)
     950      138648 :             CPASSERT(SIZE(matrix_b) == n_matrices)
     951             : 
     952      138648 :             use_accurate_sum = .TRUE.
     953      138648 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
     954             : 
     955             : !$OMP PARALLEL DO DEFAULT(NONE), &
     956             : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
     957             : !$OMP             PRIVATE(nrows_local, use_sp_a, use_sp_b), &
     958      138648 : !$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
     959             :             DO imatrix = 1, n_matrices
     960             :                CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
     961             : 
     962             :                use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
     963             :                use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
     964             : 
     965             :                ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
     966             :                IF (use_sp_a .AND. use_sp_b) THEN
     967             :                   ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
     968             :                   ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
     969             :                   IF (use_accurate_sum) THEN
     970             :                      trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     971             :                   ELSE
     972             :                      trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
     973             :                   END IF
     974             :                ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     975             :                   ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
     976             :                   ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
     977             :                   IF (use_accurate_sum) THEN
     978             :                      trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     979             :                   ELSE
     980             :                      trace(imatrix) = SUM(ldata_a*ldata_b)
     981             :                   END IF
     982             :                ELSE
     983             :                   CPABORT("Matrices A and B are of different types")
     984             :                END IF
     985             :             END DO
     986             : !$OMP END PARALLEL DO
     987             : 
     988      138648 :             group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
     989      460424 :             CALL group%sum(trace)
     990             : 
     991      138648 :             CALL timestop(handle)
     992      138648 :          END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
     993             :       #:endfor
     994             :    #:endfor
     995             : 
     996             : ! **************************************************************************************************
     997             : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
     998             : !> \param matrix_a list of A matrices
     999             : !> \param matrix_b list of B matrices
    1000             : !> \param trace    computed traces
    1001             : !> \param accurate ...
    1002             : ! **************************************************************************************************
    1003             :    #:for longname1, shortname1, appendix1 in types
    1004             :       #:for longname2, shortname2, appendix2 in types
    1005       13816 :          SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
    1006             :             TYPE(${longname1}$), DIMENSION(:, :), INTENT(in)       :: matrix_a
    1007             :             TYPE(${longname2}$), DIMENSION(:, :), INTENT(in)       :: matrix_b
    1008             :             REAL(kind=dp), DIMENSION(:, :), INTENT(out)        :: trace
    1009             :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
    1010             : 
    1011             :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
    1012             : 
    1013             :             INTEGER                                            :: handle, ia, ib, iz, na, nb, ncols_local, &
    1014             :                                                                   nrows_local, nz
    1015             :             INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
    1016             :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
    1017             :             REAL(kind=dp)                                      :: t
    1018       13816 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
    1019       13816 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
    1020             :             TYPE(mp_comm_type)                                 :: group
    1021             : 
    1022       13816 :             CALL timeset(routineN, handle)
    1023             : 
    1024       13816 :             nz = SIZE(matrix_a, 1)
    1025       13816 :             CPASSERT(SIZE(matrix_b, 1) == nz)
    1026             : 
    1027       13816 :             na = SIZE(matrix_a, 2)
    1028       13816 :             nb = SIZE(matrix_b, 2)
    1029       13816 :             CPASSERT(SIZE(trace, 1) == na)
    1030       13816 :             CPASSERT(SIZE(trace, 2) == nb)
    1031             : 
    1032       13816 :             use_accurate_sum = .TRUE.
    1033       13816 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
    1034             : 
    1035             :             ! here we use one running index (itrace) instead of two (ia, ib) in order to
    1036             :             ! improve load balance between shared-memory threads
    1037       13816 :             ntraces = na*nb
    1038       13816 :             na8 = INT(na, kind=int_8)
    1039             : 
    1040             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1041             : !$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
    1042             : !$OMP             PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
    1043       13816 : !$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
    1044             :             DO itrace = 1, ntraces
    1045             :                ib8 = (itrace - 1)/na8
    1046             :                ia = INT(itrace - ib8*na8)
    1047             :                ib = INT(ib8) + 1
    1048             : 
    1049             :                t = 0.0_dp
    1050             :                DO iz = 1, nz
    1051             :                   CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
    1052             :                   use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
    1053             :                   use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
    1054             : 
    1055             :                   ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
    1056             :                   IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1057             :                      ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1058             :                      ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1059             :                      IF (use_accurate_sum) THEN
    1060             :                         t = t + accurate_dot_product(ldata_a, ldata_b)
    1061             :                      ELSE
    1062             :                         t = t + SUM(ldata_a*ldata_b)
    1063             :                      END IF
    1064             :                   ELSE IF (use_sp_a .AND. use_sp_b) THEN
    1065             :                      ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1066             :                      ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1067             :                      IF (use_accurate_sum) THEN
    1068             :                         t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1069             :                      ELSE
    1070             :                         t = t + SUM(ldata_a_sp*ldata_b_sp)
    1071             :                      END IF
    1072             :                   ELSE
    1073             :                      CPABORT("Matrices A and B are of different types")
    1074             :                   END IF
    1075             :                END DO
    1076             :                trace(ia, ib) = t
    1077             :             END DO
    1078             : !$OMP END PARALLEL DO
    1079             : 
    1080       13816 :             group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
    1081      617500 :             CALL group%sum(trace)
    1082             : 
    1083       13816 :             CALL timestop(handle)
    1084       13816 :          END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
    1085             :       #:endfor
    1086             :    #:endfor
    1087             : 
    1088             : ! **************************************************************************************************
    1089             : !> \brief multiplies in place by a triangular matrix:
    1090             : !>       matrix_b = alpha op(triangular_matrix) matrix_b
    1091             : !>      or (if side='R')
    1092             : !>       matrix_b = alpha matrix_b op(triangular_matrix)
    1093             : !>      op(triangular_matrix) is:
    1094             : !>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
    1095             : !>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
    1096             : !>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
    1097             : !>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
    1098             : !> \param triangular_matrix the triangular matrix that multiplies the other
    1099             : !> \param matrix_b the matrix that gets multiplied and stores the result
    1100             : !> \param side on which side of matrix_b stays op(triangular_matrix)
    1101             : !>        (defaults to 'L')
    1102             : !> \param transpose_tr if the triangular matrix should be transposed
    1103             : !>        (defaults to false)
    1104             : !> \param invert_tr if the triangular matrix should be inverted
    1105             : !>        (defaults to false)
    1106             : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
    1107             : !>        lower ('L') triangle (defaults to 'U')
    1108             : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
    1109             : !>        be assumed to be 1 (defaults to false)
    1110             : !> \param n_rows the number of rows of the result (defaults to
    1111             : !>        size(matrix_b,1))
    1112             : !> \param n_cols the number of columns of the result (defaults to
    1113             : !>        size(matrix_b,2))
    1114             : !> \param alpha ...
    1115             : !> \par History
    1116             : !>      08.2002 created [fawzi]
    1117             : !> \author Fawzi Mohamed
    1118             : !> \note
    1119             : !>      needs an mpi env
    1120             : ! **************************************************************************************************
    1121      104694 :    SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
    1122             :                                         transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
    1123             :                                         alpha)
    1124             :       TYPE(cp_fm_type), INTENT(IN)                       :: triangular_matrix, matrix_b
    1125             :       CHARACTER, INTENT(in), OPTIONAL                    :: side
    1126             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose_tr, invert_tr
    1127             :       CHARACTER, INTENT(in), OPTIONAL                    :: uplo_tr
    1128             :       LOGICAL, INTENT(in), OPTIONAL                      :: unit_diag_tr
    1129             :       INTEGER, INTENT(in), OPTIONAL                      :: n_rows, n_cols
    1130             :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha
    1131             : 
    1132             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
    1133             : 
    1134             :       CHARACTER                                          :: side_char, transa, unit_diag, uplo
    1135             :       INTEGER                                            :: handle, m, n
    1136             :       LOGICAL                                            :: invert
    1137             :       REAL(KIND=dp)                                      :: al
    1138             : 
    1139       52347 :       CALL timeset(routineN, handle)
    1140       52347 :       side_char = 'L'
    1141       52347 :       unit_diag = 'N'
    1142       52347 :       uplo = 'U'
    1143       52347 :       transa = 'N'
    1144       52347 :       invert = .FALSE.
    1145       52347 :       al = 1.0_dp
    1146       52347 :       CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
    1147       52347 :       IF (PRESENT(side)) side_char = side
    1148       52347 :       IF (PRESENT(invert_tr)) invert = invert_tr
    1149       52347 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1150       52347 :       IF (PRESENT(unit_diag_tr)) THEN
    1151           0 :          IF (unit_diag_tr) THEN
    1152           0 :             unit_diag = 'U'
    1153             :          ELSE
    1154             :             unit_diag = 'N'
    1155             :          END IF
    1156             :       END IF
    1157       52347 :       IF (PRESENT(transpose_tr)) THEN
    1158        3540 :          IF (transpose_tr) THEN
    1159        1280 :             transa = 'T'
    1160             :          ELSE
    1161             :             transa = 'N'
    1162             :          END IF
    1163             :       END IF
    1164       52347 :       IF (PRESENT(alpha)) al = alpha
    1165       52347 :       IF (PRESENT(n_rows)) m = n_rows
    1166       52347 :       IF (PRESENT(n_cols)) n = n_cols
    1167             : 
    1168       52347 :       IF (invert) THEN
    1169             : 
    1170             : #if defined(__parallel)
    1171             :          CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1172             :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1173             :                      triangular_matrix%matrix_struct%descriptor, &
    1174             :                      matrix_b%local_data(1, 1), 1, 1, &
    1175       42597 :                      matrix_b%matrix_struct%descriptor(1))
    1176             : #else
    1177             :          CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1178             :                     triangular_matrix%local_data(1, 1), &
    1179             :                     SIZE(triangular_matrix%local_data, 1), &
    1180             :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1181             : #endif
    1182             : 
    1183             :       ELSE
    1184             : 
    1185             : #if defined(__parallel)
    1186             :          CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1187             :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1188             :                      triangular_matrix%matrix_struct%descriptor, &
    1189             :                      matrix_b%local_data(1, 1), 1, 1, &
    1190        9750 :                      matrix_b%matrix_struct%descriptor(1))
    1191             : #else
    1192             :          CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1193             :                     triangular_matrix%local_data(1, 1), &
    1194             :                     SIZE(triangular_matrix%local_data, 1), &
    1195             :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1196             : #endif
    1197             : 
    1198             :       END IF
    1199             : 
    1200       52347 :       CALL timestop(handle)
    1201       52347 :    END SUBROUTINE cp_fm_triangular_multiply
    1202             : 
    1203             : ! **************************************************************************************************
    1204             : !> \brief scales a matrix
    1205             : !>      matrix_a = alpha * matrix_b
    1206             : !> \param alpha ...
    1207             : !> \param matrix_a ...
    1208             : !> \note
    1209             : !>      use cp_fm_set_all to zero (avoids problems with nan)
    1210             : ! **************************************************************************************************
    1211       84017 :    SUBROUTINE cp_fm_scale(alpha, matrix_a)
    1212             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1213             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    1214             : 
    1215             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_scale'
    1216             : 
    1217             :       INTEGER                                            :: handle, size_a
    1218             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    1219             : 
    1220       84017 :       CALL timeset(routineN, handle)
    1221             : 
    1222             :       NULLIFY (a)
    1223             : 
    1224       84017 :       a => matrix_a%local_data
    1225       84017 :       size_a = SIZE(a, 1)*SIZE(a, 2)
    1226             : 
    1227       84017 :       CALL DSCAL(size_a, alpha, a, 1)
    1228             : 
    1229       84017 :       CALL timestop(handle)
    1230             : 
    1231       84017 :    END SUBROUTINE cp_fm_scale
    1232             : 
    1233             : ! **************************************************************************************************
    1234             : !> \brief transposes a matrix
    1235             : !>      matrixt = matrix ^ T
    1236             : !> \param matrix ...
    1237             : !> \param matrixt ...
    1238             : !> \note
    1239             : !>      all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix)
    1240             : ! **************************************************************************************************
    1241       26502 :    SUBROUTINE cp_fm_transpose(matrix, matrixt)
    1242             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, matrixt
    1243             : 
    1244             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
    1245             : 
    1246             :       INTEGER                                  :: handle, ncol_global, &
    1247             :                                                   nrow_global, ncol_globalt, nrow_globalt
    1248       13251 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
    1249             : #if defined(__parallel)
    1250             :       INTEGER, DIMENSION(9)                    :: desca, descc
    1251             : #else
    1252             :       INTEGER                                  :: i, j
    1253             : #endif
    1254             : 
    1255       13251 :       nrow_global = matrix%matrix_struct%nrow_global
    1256       13251 :       ncol_global = matrix%matrix_struct%ncol_global
    1257       13251 :       nrow_globalt = matrixt%matrix_struct%nrow_global
    1258       13251 :       ncol_globalt = matrixt%matrix_struct%ncol_global
    1259           0 :       CPASSERT(nrow_global == ncol_globalt)
    1260       13251 :       CPASSERT(nrow_globalt == ncol_global)
    1261             : 
    1262       13251 :       CALL timeset(routineN, handle)
    1263             : 
    1264       13251 :       a => matrix%local_data
    1265       13251 :       c => matrixt%local_data
    1266             : 
    1267             : #if defined(__parallel)
    1268      132510 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1269      132510 :       descc(:) = matrixt%matrix_struct%descriptor(:)
    1270       13251 :       CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
    1271             : #else
    1272             :       DO j = 1, ncol_global
    1273             :          DO i = 1, nrow_global
    1274             :             c(j, i) = a(i, j)
    1275             :          END DO
    1276             :       END DO
    1277             : #endif
    1278       13251 :       CALL timestop(handle)
    1279             : 
    1280       13251 :    END SUBROUTINE cp_fm_transpose
    1281             : 
    1282             : ! **************************************************************************************************
    1283             : !> \brief given an upper triangular matrix computes the corresponding full matrix
    1284             : !> \param matrix the upper triangular matrix as input, the full matrix as output
    1285             : !> \param work a matrix of the same size as matrix
    1286             : !> \author Matthias Krack
    1287             : !> \note
    1288             : !>       the lower triangular part is irrelevant
    1289             : ! **************************************************************************************************
    1290      312452 :    SUBROUTINE cp_fm_upper_to_full(matrix, work)
    1291             : 
    1292             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, work
    1293             : 
    1294             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_upper_to_full'
    1295             : 
    1296             :       INTEGER                                  :: handle, icol_global, irow_global, &
    1297             :                                                   mypcol, myprow, ncol_global, &
    1298             :                                                   npcol, nprow, nrow_global
    1299      156226 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
    1300      156226 :       REAL(KIND=sp), DIMENSION(:, :), POINTER   :: a_sp
    1301             :       TYPE(cp_blacs_env_type), POINTER         :: context
    1302             : 
    1303             : #if defined(__parallel)
    1304             :       INTEGER                                  :: icol_local, irow_local, &
    1305             :                                                   ncol_block, ncol_local, &
    1306             :                                                   nrow_block, nrow_local
    1307             :       INTEGER, DIMENSION(9)                    :: desca, descc
    1308      156226 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: c
    1309      156226 :       REAL(KIND=sp), DIMENSION(:, :), POINTER   :: c_sp
    1310             : #endif
    1311             : 
    1312      156226 :       nrow_global = matrix%matrix_struct%nrow_global
    1313      156226 :       ncol_global = matrix%matrix_struct%ncol_global
    1314           0 :       CPASSERT(nrow_global == ncol_global)
    1315      156226 :       nrow_global = work%matrix_struct%nrow_global
    1316      156226 :       ncol_global = work%matrix_struct%ncol_global
    1317      156226 :       CPASSERT(nrow_global == ncol_global)
    1318      156226 :       CPASSERT(matrix%use_sp .EQV. work%use_sp)
    1319             : 
    1320      156226 :       CALL timeset(routineN, handle)
    1321             : 
    1322      156226 :       context => matrix%matrix_struct%context
    1323      156226 :       myprow = context%mepos(1)
    1324      156226 :       mypcol = context%mepos(2)
    1325      156226 :       nprow = context%num_pe(1)
    1326      156226 :       npcol = context%num_pe(2)
    1327             : 
    1328             : #if defined(__parallel)
    1329             : 
    1330      156226 :       nrow_block = matrix%matrix_struct%nrow_block
    1331      156226 :       ncol_block = matrix%matrix_struct%ncol_block
    1332             : 
    1333      156226 :       nrow_local = matrix%matrix_struct%nrow_locals(myprow)
    1334      156226 :       ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
    1335             : 
    1336      156226 :       a => work%local_data
    1337      156226 :       a_sp => work%local_data_sp
    1338     1562260 :       desca(:) = work%matrix_struct%descriptor(:)
    1339      156226 :       c => matrix%local_data
    1340      156226 :       c_sp => matrix%local_data_sp
    1341     1562260 :       descc(:) = matrix%matrix_struct%descriptor(:)
    1342             : 
    1343     5232190 :       DO icol_local = 1, ncol_local
    1344     5075964 :          icol_global = matrix%matrix_struct%col_indices(icol_local)
    1345   238720986 :          DO irow_local = 1, nrow_local
    1346   233488796 :             irow_global = matrix%matrix_struct%row_indices(irow_local)
    1347   238564760 :             IF (irow_global > icol_global) THEN
    1348   115223895 :                IF (matrix%use_sp) THEN
    1349           0 :                   c_sp(irow_local, icol_local) = 0.0_sp
    1350             :                ELSE
    1351   115223895 :                   c(irow_local, icol_local) = 0.0_dp
    1352             :                END IF
    1353   118264901 :             ELSE IF (irow_global == icol_global) THEN
    1354     3041006 :                IF (matrix%use_sp) THEN
    1355           0 :                   c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
    1356             :                ELSE
    1357     3041006 :                   c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
    1358             :                END IF
    1359             :             END IF
    1360             :          END DO
    1361             :       END DO
    1362             : 
    1363     5232190 :       DO icol_local = 1, ncol_local
    1364   238720986 :       DO irow_local = 1, nrow_local
    1365   238564760 :          IF (matrix%use_sp) THEN
    1366           0 :             a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
    1367             :          ELSE
    1368   233488796 :             a(irow_local, icol_local) = c(irow_local, icol_local)
    1369             :          END IF
    1370             :       END DO
    1371             :       END DO
    1372             : 
    1373      156226 :       IF (matrix%use_sp) THEN
    1374           0 :          CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
    1375             :       ELSE
    1376      156226 :          CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
    1377             :       END IF
    1378             : 
    1379             : #else
    1380             : 
    1381             :       a => matrix%local_data
    1382             :       a_sp => matrix%local_data_sp
    1383             :       DO irow_global = 1, nrow_global
    1384             :          DO icol_global = irow_global + 1, ncol_global
    1385             :             IF (matrix%use_sp) THEN
    1386             :                a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
    1387             :             ELSE
    1388             :                a(icol_global, irow_global) = a(irow_global, icol_global)
    1389             :             END IF
    1390             :          END DO
    1391             :       END DO
    1392             : 
    1393             : #endif
    1394      156226 :       CALL timestop(handle)
    1395             : 
    1396      156226 :    END SUBROUTINE cp_fm_upper_to_full
    1397             : 
    1398             : ! **************************************************************************************************
    1399             : !> \brief scales column i of matrix a with scaling(i)
    1400             : !> \param matrixa ...
    1401             : !> \param scaling : an array used for scaling the columns,
    1402             : !>                  SIZE(scaling) determines the number of columns to be scaled
    1403             : !> \author Joost VandeVondele
    1404             : !> \note
    1405             : !>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
    1406             : !>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
    1407             : !>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
    1408             : !>      where every vector has a different prefactor
    1409             : ! **************************************************************************************************
    1410      126558 :    SUBROUTINE cp_fm_column_scale(matrixa, scaling)
    1411             :       TYPE(cp_fm_type), INTENT(IN)          :: matrixa
    1412             :       REAL(KIND=dp), DIMENSION(:), INTENT(in)  :: scaling
    1413             : 
    1414             :       INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
    1415             :                                                   npcol, nprow
    1416      126558 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1417      126558 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1418             : #if defined(__parallel)
    1419             :       INTEGER                                  :: icol_global, icol_local, &
    1420             :                                                   ipcol, iprow, irow_local
    1421             : #else
    1422             :       INTEGER                                  :: i
    1423             : #endif
    1424             : 
    1425      126558 :       myprow = matrixa%matrix_struct%context%mepos(1)
    1426      126558 :       mypcol = matrixa%matrix_struct%context%mepos(2)
    1427      126558 :       nprow = matrixa%matrix_struct%context%num_pe(1)
    1428      126558 :       npcol = matrixa%matrix_struct%context%num_pe(2)
    1429             : 
    1430      126558 :       ncol_global = matrixa%matrix_struct%ncol_global
    1431             : 
    1432      126558 :       a => matrixa%local_data
    1433      126558 :       a_sp => matrixa%local_data_sp
    1434      126558 :       IF (matrixa%use_sp) THEN
    1435           0 :          n = SIZE(a_sp, 1)
    1436             :       ELSE
    1437      126558 :          n = SIZE(a, 1)
    1438             :       END IF
    1439      126558 :       k = MIN(SIZE(scaling), ncol_global)
    1440             : 
    1441             : #if defined(__parallel)
    1442             : 
    1443     2052275 :       DO icol_global = 1, k
    1444             :          CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
    1445             :                       nprow, npcol, myprow, mypcol, &
    1446     1925717 :                       irow_local, icol_local, iprow, ipcol)
    1447     2052275 :          IF ((ipcol == mypcol)) THEN
    1448     1925717 :             IF (matrixa%use_sp) THEN
    1449           0 :                CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
    1450             :             ELSE
    1451     1925717 :                CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
    1452             :             END IF
    1453             :          END IF
    1454             :       END DO
    1455             : #else
    1456             :       DO i = 1, k
    1457             :          IF (matrixa%use_sp) THEN
    1458             :             CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
    1459             :          ELSE
    1460             :             CALL DSCAL(n, scaling(i), a(:, i), 1)
    1461             :          END IF
    1462             :       END DO
    1463             : #endif
    1464      126558 :    END SUBROUTINE cp_fm_column_scale
    1465             : 
    1466             : ! **************************************************************************************************
    1467             : !> \brief scales row i of matrix a with scaling(i)
    1468             : !> \param matrixa ...
    1469             : !> \param scaling : an array used for scaling the columns,
    1470             : !> \author JGH
    1471             : !> \note
    1472             : ! **************************************************************************************************
    1473        6564 :    SUBROUTINE cp_fm_row_scale(matrixa, scaling)
    1474             :       TYPE(cp_fm_type), INTENT(IN)          :: matrixa
    1475             :       REAL(KIND=dp), DIMENSION(:), INTENT(in)  :: scaling
    1476             : 
    1477             :       INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
    1478        6564 :       INTEGER, DIMENSION(:), POINTER           :: row_indices
    1479        6564 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1480        6564 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1481             : #if defined(__parallel)
    1482             :       INTEGER                                  :: irow_global, icol, irow
    1483             : #else
    1484             :       INTEGER                                  :: j
    1485             : #endif
    1486             : 
    1487             :       CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
    1488        6564 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1489        6564 :       CPASSERT(SIZE(scaling) == nrow_global)
    1490             : 
    1491        6564 :       a => matrixa%local_data
    1492        6564 :       a_sp => matrixa%local_data_sp
    1493        6564 :       IF (matrixa%use_sp) THEN
    1494        6564 :          n = SIZE(a_sp, 1)
    1495        6564 :          m = SIZE(a_sp, 2)
    1496             :       ELSE
    1497        6564 :          n = SIZE(a, 1)
    1498        6564 :          m = SIZE(a, 2)
    1499             :       END IF
    1500             : 
    1501             : #if defined(__parallel)
    1502       81426 :       DO icol = 1, ncol_local
    1503       81426 :          IF (matrixa%use_sp) THEN
    1504           0 :             DO irow = 1, nrow_local
    1505           0 :                irow_global = row_indices(irow)
    1506           0 :                a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
    1507             :             END DO
    1508             :          ELSE
    1509     6667421 :             DO irow = 1, nrow_local
    1510     6592559 :                irow_global = row_indices(irow)
    1511     6667421 :                a(irow, icol) = scaling(irow_global)*a(irow, icol)
    1512             :             END DO
    1513             :          END IF
    1514             :       END DO
    1515             : #else
    1516             :       IF (matrixa%use_sp) THEN
    1517             :          DO j = 1, m
    1518             :             a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
    1519             :          END DO
    1520             :       ELSE
    1521             :          DO j = 1, m
    1522             :             a(1:n, j) = scaling(1:n)*a(1:n, j)
    1523             :          END DO
    1524             :       END IF
    1525             : #endif
    1526        6564 :    END SUBROUTINE cp_fm_row_scale
    1527             : ! **************************************************************************************************
    1528             : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
    1529             : !> \param matrix_a the matrix to invert
    1530             : !> \param matrix_inverse the inverse of matrix_a
    1531             : !> \param det_a the determinant of matrix_a
    1532             : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
    1533             : !>                are screened
    1534             : !> \param eigval optionally return matrix eigenvalues/singular values
    1535             : !> \par History
    1536             : !>      note of Jan Wilhelm (12.2015)
    1537             : !>      - computation of determinant corrected
    1538             : !>      - determinant only computed if det_a is present
    1539             : !>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
    1540             : !>      - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
    1541             : !> \author Florian Schiffmann(02.2007)
    1542             : ! **************************************************************************************************
    1543        1336 :    SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
    1544             : 
    1545             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_a, matrix_inverse
    1546             :       REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
    1547             :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
    1548             :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1549             :          INTENT(INOUT), OPTIONAL               :: eigval
    1550             : 
    1551             :       INTEGER                                  :: n
    1552        1336 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1553             :       REAL(KIND=dp)                            :: determinant, my_eps_svd
    1554             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1555             :       TYPE(cp_fm_type)                :: matrix_lu
    1556             : 
    1557             : #if defined(__parallel)
    1558             :       TYPE(cp_fm_type)                :: u, vt, sigma, inv_sigma_ut
    1559             :       TYPE(mp_comm_type) :: group
    1560             :       INTEGER                                  :: i, info, liwork, lwork, exponent_of_minus_one
    1561             :       INTEGER, DIMENSION(9)                    :: desca
    1562             :       LOGICAL                                  :: quenched
    1563             :       REAL(KIND=dp)                            :: alpha, beta
    1564        1336 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
    1565        1336 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
    1566             : #else
    1567             :       LOGICAL                                  :: sign
    1568             :       REAL(KIND=dp)                            :: eps1
    1569             : #endif
    1570             : 
    1571        1336 :       my_eps_svd = 0.0_dp
    1572         468 :       IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
    1573             : 
    1574             :       CALL cp_fm_create(matrix=matrix_lu, &
    1575             :                         matrix_struct=matrix_a%matrix_struct, &
    1576        1336 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1577        1336 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1578             : 
    1579        1336 :       a => matrix_lu%local_data
    1580        1336 :       n = matrix_lu%matrix_struct%nrow_global
    1581        4008 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1582       18866 :       ipivot(:) = 0
    1583             : #if defined(__parallel)
    1584        1336 :       IF (my_eps_svd .EQ. 0.0_dp) THEN
    1585             :          ! Use LU decomposition
    1586        1308 :          lwork = 3*n
    1587        1308 :          liwork = 3*n
    1588       13080 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1589        1308 :          CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1590             : 
    1591        1308 :          IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
    1592             : 
    1593        1116 :             ALLOCATE (diag(n))
    1594         916 :             diag(:) = 0.0_dp
    1595         372 :             CALL cp_fm_get_diag(matrix_lu, diag)
    1596             : 
    1597         372 :             exponent_of_minus_one = 0
    1598         372 :             determinant = 1.0_dp
    1599         916 :             DO i = 1, n
    1600         544 :                determinant = determinant*diag(i)
    1601         916 :                IF (ipivot(i) .NE. i) THEN
    1602         224 :                   exponent_of_minus_one = exponent_of_minus_one + 1
    1603             :                END IF
    1604             :             END DO
    1605         372 :             IF (PRESENT(eigval)) THEN
    1606           0 :                CPASSERT(.NOT. ASSOCIATED(eigval))
    1607           0 :                ALLOCATE (eigval(n))
    1608           0 :                eigval(:) = diag
    1609             :             END IF
    1610         372 :             DEALLOCATE (diag)
    1611             : 
    1612         372 :             group = matrix_lu%matrix_struct%para_env
    1613         372 :             CALL group%sum(exponent_of_minus_one)
    1614             : 
    1615         372 :             determinant = determinant*(-1.0_dp)**exponent_of_minus_one
    1616             : 
    1617             :          END IF
    1618             : 
    1619        1308 :          alpha = 0.0_dp
    1620        1308 :          beta = 1.0_dp
    1621        1308 :          CALL cp_fm_set_all(matrix_inverse, alpha, beta)
    1622        1308 :          CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
    1623             :       ELSE
    1624             :          ! Use singular value decomposition
    1625             :          CALL cp_fm_create(matrix=u, &
    1626             :                            matrix_struct=matrix_a%matrix_struct, &
    1627          28 :                            name="LEFT_SINGULAR_MATRIX")
    1628          28 :          CALL cp_fm_set_all(u, alpha=0.0_dp)
    1629             :          CALL cp_fm_create(matrix=vt, &
    1630             :                            matrix_struct=matrix_a%matrix_struct, &
    1631          28 :                            name="RIGHT_SINGULAR_MATRIX")
    1632          28 :          CALL cp_fm_set_all(vt, alpha=0.0_dp)
    1633          84 :          ALLOCATE (diag(n))
    1634          92 :          diag(:) = 0.0_dp
    1635         280 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1636          28 :          ALLOCATE (work(1))
    1637             :          ! Workspace query
    1638          28 :          lwork = -1
    1639             :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1640          28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1641          28 :          lwork = INT(work(1))
    1642          28 :          DEALLOCATE (work)
    1643          84 :          ALLOCATE (work(lwork))
    1644             :          ! SVD
    1645             :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1646          28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1647             :          ! info == n+1 implies homogeneity error when the number of procs is large
    1648             :          ! this likely isnt a problem, but maybe we should handle it separately
    1649          28 :          IF (info /= 0 .AND. info /= n + 1) &
    1650           0 :             CPABORT("Singular value decomposition of matrix failed.")
    1651             :          ! (Pseudo)inverse and (pseudo)determinant
    1652             :          CALL cp_fm_create(matrix=sigma, &
    1653             :                            matrix_struct=matrix_a%matrix_struct, &
    1654          28 :                            name="SINGULAR_VALUE_MATRIX")
    1655          28 :          CALL cp_fm_set_all(sigma, alpha=0.0_dp)
    1656          28 :          determinant = 1.0_dp
    1657          28 :          quenched = .FALSE.
    1658          28 :          IF (PRESENT(eigval)) THEN
    1659          28 :             CPASSERT(.NOT. ASSOCIATED(eigval))
    1660          84 :             ALLOCATE (eigval(n))
    1661         156 :             eigval(:) = diag
    1662             :          END IF
    1663          92 :          DO i = 1, n
    1664          64 :             IF (diag(i) < my_eps_svd) THEN
    1665          18 :                diag(i) = 0.0_dp
    1666          18 :                quenched = .TRUE.
    1667             :             ELSE
    1668          46 :                determinant = determinant*diag(i)
    1669          46 :                diag(i) = 1.0_dp/diag(i)
    1670             :             END IF
    1671          92 :             CALL cp_fm_set_element(sigma, i, i, diag(i))
    1672             :          END DO
    1673          28 :          DEALLOCATE (diag)
    1674          28 :          IF (quenched) &
    1675             :             CALL cp_warn(__LOCATION__, &
    1676             :                          "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
    1677          12 :                          ". At least one singular value has been quenched.")
    1678             :          ! Sigma^-1 * U^T
    1679             :          CALL cp_fm_create(matrix=inv_sigma_ut, &
    1680             :                            matrix_struct=matrix_a%matrix_struct, &
    1681          28 :                            name="SINGULAR_VALUE_MATRIX")
    1682          28 :          CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
    1683             :          CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
    1684          28 :                      u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
    1685             :          ! A^-1 = V * (Sigma^-1 * U^T)
    1686          28 :          CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
    1687             :          CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
    1688          28 :                      inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
    1689             :          ! Clean up
    1690          28 :          DEALLOCATE (work)
    1691          28 :          CALL cp_fm_release(u)
    1692          28 :          CALL cp_fm_release(vt)
    1693          28 :          CALL cp_fm_release(sigma)
    1694         140 :          CALL cp_fm_release(inv_sigma_ut)
    1695             :       END IF
    1696             : #else
    1697             :       IF (my_eps_svd .EQ. 0.0_dp) THEN
    1698             :          sign = .TRUE.
    1699             :          CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
    1700             :                             eval_error=eps1)
    1701             :          CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
    1702             :          IF (PRESENT(eigval)) &
    1703             :             CALL cp_abort(__LOCATION__, &
    1704             :                           "NYI. Eigenvalues not available for return without SCALAPACK.")
    1705             :       ELSE
    1706             :          CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
    1707             :                                      determinant, eigval)
    1708             :       END IF
    1709             : #endif
    1710        1336 :       CALL cp_fm_release(matrix_lu)
    1711        1336 :       DEALLOCATE (ipivot)
    1712        1336 :       IF (PRESENT(det_a)) det_a = determinant
    1713        1336 :    END SUBROUTINE cp_fm_invert
    1714             : 
    1715             : ! **************************************************************************************************
    1716             : !> \brief inverts a triangular matrix
    1717             : !> \param matrix_a ...
    1718             : !> \param uplo_tr ...
    1719             : !> \author MI
    1720             : ! **************************************************************************************************
    1721        5034 :    SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
    1722             : 
    1723             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_a
    1724             :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
    1725             : 
    1726             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
    1727             : 
    1728             :       CHARACTER                                :: unit_diag, uplo
    1729             :       INTEGER                                  :: handle, info, ncol_global
    1730        5034 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1731             : #if defined(__parallel)
    1732             :       INTEGER, DIMENSION(9)                    :: desca
    1733             : #endif
    1734             : 
    1735        5034 :       CALL timeset(routineN, handle)
    1736             : 
    1737        5034 :       unit_diag = 'N'
    1738        5034 :       uplo = 'U'
    1739        5034 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1740             : 
    1741        5034 :       ncol_global = matrix_a%matrix_struct%ncol_global
    1742             : 
    1743        5034 :       a => matrix_a%local_data
    1744             : 
    1745             : #if defined(__parallel)
    1746             : 
    1747       50340 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1748             : 
    1749        5034 :       CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
    1750             : 
    1751             : #else
    1752             :       CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
    1753             : #endif
    1754             : 
    1755        5034 :       CALL timestop(handle)
    1756        5034 :    END SUBROUTINE cp_fm_triangular_invert
    1757             : 
    1758             : ! **************************************************************************************************
    1759             : !> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
    1760             : !>         the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN
    1761             : !>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
    1762             : !> \param matrix_a ...
    1763             : !> \param matrix_r ...
    1764             : !> \param nrow_fact ...
    1765             : !> \param ncol_fact ...
    1766             : !> \param first_row ...
    1767             : !> \param first_col ...
    1768             : !> \author MI
    1769             : ! **************************************************************************************************
    1770       19320 :    SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
    1771             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_a, matrix_r
    1772             :       INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
    1773             :                                                   first_row, first_col
    1774             : 
    1775             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
    1776             : 
    1777             :       INTEGER                                  :: handle, i, icol, info, irow, &
    1778             :                                                   j, lda, lwork, ncol, &
    1779             :                                                   ndim, nrow
    1780       19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
    1781       19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
    1782             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1783             : #if defined(__parallel)
    1784             :       INTEGER, DIMENSION(9)                    :: desca
    1785             : #endif
    1786             : 
    1787       19320 :       CALL timeset(routineN, handle)
    1788             : 
    1789       19320 :       ncol = matrix_a%matrix_struct%ncol_global
    1790       19320 :       nrow = matrix_a%matrix_struct%nrow_global
    1791       19320 :       lda = nrow
    1792             : 
    1793       19320 :       a => matrix_a%local_data
    1794             : 
    1795       19320 :       IF (PRESENT(nrow_fact)) nrow = nrow_fact
    1796       19320 :       IF (PRESENT(ncol_fact)) ncol = ncol_fact
    1797       19320 :       irow = 1
    1798       19320 :       IF (PRESENT(first_row)) irow = first_row
    1799       19320 :       icol = 1
    1800       19320 :       IF (PRESENT(first_col)) icol = first_col
    1801             : 
    1802       19320 :       CPASSERT(nrow >= ncol)
    1803       19320 :       ndim = SIZE(a, 2)
    1804             : !    ALLOCATE(ipiv(ndim))
    1805       57960 :       ALLOCATE (tau(ndim))
    1806             : 
    1807             : #if defined(__parallel)
    1808             : 
    1809      193200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1810             : 
    1811       19320 :       lwork = -1
    1812       57960 :       ALLOCATE (work(2*ndim))
    1813       19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1814       19320 :       lwork = INT(work(1))
    1815       19320 :       DEALLOCATE (work)
    1816       57960 :       ALLOCATE (work(lwork))
    1817       19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1818             : 
    1819             : #else
    1820             :       lwork = -1
    1821             :       ALLOCATE (work(2*ndim))
    1822             :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1823             :       lwork = INT(work(1))
    1824             :       DEALLOCATE (work)
    1825             :       ALLOCATE (work(lwork))
    1826             :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1827             : 
    1828             : #endif
    1829             : 
    1830       77280 :       ALLOCATE (r_mat(ncol, ncol))
    1831       19320 :       CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
    1832       38640 :       DO i = 1, ncol
    1833       38640 :          DO j = i + 1, ncol
    1834       19320 :             r_mat(j, i) = 0.0_dp
    1835             :          END DO
    1836             :       END DO
    1837       19320 :       CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
    1838             : 
    1839       19320 :       DEALLOCATE (tau, work, r_mat)
    1840             : 
    1841       19320 :       CALL timestop(handle)
    1842             : 
    1843       19320 :    END SUBROUTINE cp_fm_qr_factorization
    1844             : 
    1845             : ! **************************************************************************************************
    1846             : !> \brief computes the the solution to A*b=A_general using lu decomposition
    1847             : !>        pay attention, both matrices are overwritten, a_general contais the result
    1848             : !> \param matrix_a ...
    1849             : !> \param general_a ...
    1850             : !> \author Florian Schiffmann
    1851             : ! **************************************************************************************************
    1852        5666 :    SUBROUTINE cp_fm_solve(matrix_a, general_a)
    1853             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix_a, general_a
    1854             : 
    1855             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
    1856             : 
    1857             :       INTEGER                                  :: handle, info, n, nrhs
    1858        5666 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1859             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
    1860             : #if defined(__parallel)
    1861             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1862             : #else
    1863             :       INTEGER                                  :: lda, ldb
    1864             : #endif
    1865             : 
    1866        5666 :       CALL timeset(routineN, handle)
    1867             : 
    1868        5666 :       a => matrix_a%local_data
    1869        5666 :       a_general => general_a%local_data
    1870        5666 :       n = matrix_a%matrix_struct%nrow_global
    1871        5666 :       nrhs = general_a%matrix_struct%ncol_global
    1872       16998 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1873             : 
    1874             : #if defined(__parallel)
    1875       56660 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1876       56660 :       descb(:) = general_a%matrix_struct%descriptor(:)
    1877        5666 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1878             :       CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
    1879        5666 :                    1, 1, descb, info)
    1880             : 
    1881             : #else
    1882             :       lda = SIZE(a, 1)
    1883             :       ldb = SIZE(a_general, 1)
    1884             :       CALL dgetrf(n, n, a, lda, ipivot, info)
    1885             :       CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
    1886             : 
    1887             : #endif
    1888             :       ! info is allowed to be zero
    1889             :       ! this does just signal a zero diagonal element
    1890        5666 :       DEALLOCATE (ipivot)
    1891        5666 :       CALL timestop(handle)
    1892        5666 :    END SUBROUTINE
    1893             : 
    1894             : ! **************************************************************************************************
    1895             : !> \brief Convenience function. Computes the matrix multiplications needed
    1896             : !>        for the multiplication of complex matrices.
    1897             : !>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
    1898             : !> \param transa : 'N' -> normal   'T' -> transpose
    1899             : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
    1900             : !> \param transb ...
    1901             : !> \param m ...
    1902             : !> \param n ...
    1903             : !> \param k ...
    1904             : !> \param alpha ...
    1905             : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
    1906             : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
    1907             : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
    1908             : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
    1909             : !> \param beta ...
    1910             : !> \param C_re m x n matrix, real part
    1911             : !> \param C_im m x n matrix, imaginary part
    1912             : !> \param a_first_col ...
    1913             : !> \param a_first_row ...
    1914             : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
    1915             : !> \param b_first_row ...
    1916             : !> \param c_first_col ...
    1917             : !> \param c_first_row ...
    1918             : !> \author Samuel Andermatt
    1919             : !> \note
    1920             : !>      C should have no overlap with A, B
    1921             : ! **************************************************************************************************
    1922           0 :    SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
    1923             :                                  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
    1924             :                                  c_first_row)
    1925             :       CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
    1926             :       INTEGER, INTENT(IN)                                :: m, n, k
    1927             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1928             :       TYPE(cp_fm_type), INTENT(IN)                       :: A_re, A_im, B_re, B_im
    1929             :       REAL(KIND=dp), INTENT(IN)                          :: beta
    1930             :       TYPE(cp_fm_type), INTENT(IN)                       :: C_re, C_im
    1931             :       INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
    1932             :                                                             b_first_row, c_first_col, c_first_row
    1933             : 
    1934             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
    1935             : 
    1936             :       INTEGER                                            :: handle
    1937             : 
    1938           0 :       CALL timeset(routineN, handle)
    1939             : 
    1940             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
    1941             :                       a_first_col=a_first_col, &
    1942             :                       a_first_row=a_first_row, &
    1943             :                       b_first_col=b_first_col, &
    1944             :                       b_first_row=b_first_row, &
    1945             :                       c_first_col=c_first_col, &
    1946           0 :                       c_first_row=c_first_row)
    1947             :       CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
    1948             :                       a_first_col=a_first_col, &
    1949             :                       a_first_row=a_first_row, &
    1950             :                       b_first_col=b_first_col, &
    1951             :                       b_first_row=b_first_row, &
    1952             :                       c_first_col=c_first_col, &
    1953           0 :                       c_first_row=c_first_row)
    1954             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
    1955             :                       a_first_col=a_first_col, &
    1956             :                       a_first_row=a_first_row, &
    1957             :                       b_first_col=b_first_col, &
    1958             :                       b_first_row=b_first_row, &
    1959             :                       c_first_col=c_first_col, &
    1960           0 :                       c_first_row=c_first_row)
    1961             :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
    1962             :                       a_first_col=a_first_col, &
    1963             :                       a_first_row=a_first_row, &
    1964             :                       b_first_col=b_first_col, &
    1965             :                       b_first_row=b_first_row, &
    1966             :                       c_first_col=c_first_col, &
    1967           0 :                       c_first_row=c_first_row)
    1968             : 
    1969           0 :       CALL timestop(handle)
    1970             : 
    1971           0 :    END SUBROUTINE cp_complex_fm_gemm
    1972             : 
    1973             : ! **************************************************************************************************
    1974             : !> \brief inverts a matrix using LU decomposition
    1975             : !>        the input matrix will be overwritten
    1976             : !> \param matrix   : input a general square non-singular matrix, outputs its inverse
    1977             : !> \param info_out : optional, if present outputs the info from (p)zgetri
    1978             : !> \author Lianheng Tong
    1979             : ! **************************************************************************************************
    1980           0 :    SUBROUTINE cp_fm_lu_invert(matrix, info_out)
    1981             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    1982             :       INTEGER, INTENT(OUT), OPTIONAL           :: info_out
    1983             : 
    1984             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
    1985             : 
    1986             :       INTEGER :: nrows_global, handle, info, lwork
    1987           0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
    1988             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
    1989             :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: mat_sp
    1990           0 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    1991           0 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    1992             : #if defined(__parallel)
    1993             :       INTEGER                                  :: liwork
    1994             :       INTEGER, DIMENSION(9)                    :: desca
    1995           0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
    1996             : #else
    1997             :       INTEGER                                  :: lda
    1998             : #endif
    1999             : 
    2000           0 :       CALL timeset(routineN, handle)
    2001             : 
    2002           0 :       mat => matrix%local_data
    2003           0 :       mat_sp => matrix%local_data_sp
    2004           0 :       nrows_global = matrix%matrix_struct%nrow_global
    2005           0 :       CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global)
    2006           0 :       ALLOCATE (ipivot(nrows_global))
    2007             :       ! do LU decomposition
    2008             : #if defined(__parallel)
    2009           0 :       desca = matrix%matrix_struct%descriptor
    2010           0 :       IF (matrix%use_sp) THEN
    2011             :          CALL psgetrf(nrows_global, nrows_global, &
    2012           0 :                       mat_sp, 1, 1, desca, ipivot, info)
    2013             :       ELSE
    2014             :          CALL pdgetrf(nrows_global, nrows_global, &
    2015           0 :                       mat, 1, 1, desca, ipivot, info)
    2016             :       END IF
    2017             : #else
    2018             :       lda = SIZE(mat, 1)
    2019             :       IF (matrix%use_sp) THEN
    2020             :          CALL sgetrf(nrows_global, nrows_global, &
    2021             :                      mat_sp, lda, ipivot, info)
    2022             :       ELSE
    2023             :          CALL dgetrf(nrows_global, nrows_global, &
    2024             :                      mat, lda, ipivot, info)
    2025             :       END IF
    2026             : #endif
    2027           0 :       IF (info /= 0) THEN
    2028           0 :          CALL cp_abort(__LOCATION__, "LU decomposition has failed")
    2029             :       END IF
    2030             :       ! do inversion
    2031           0 :       IF (matrix%use_sp) THEN
    2032           0 :          ALLOCATE (work(1))
    2033             :       ELSE
    2034           0 :          ALLOCATE (work_sp(1))
    2035             :       END IF
    2036             : #if defined(__parallel)
    2037           0 :       ALLOCATE (iwork(1))
    2038           0 :       IF (matrix%use_sp) THEN
    2039             :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2040           0 :                       ipivot, work_sp, -1, iwork, -1, info)
    2041           0 :          lwork = INT(work_sp(1))
    2042           0 :          DEALLOCATE (work_sp)
    2043           0 :          ALLOCATE (work_sp(lwork))
    2044             :       ELSE
    2045             :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2046           0 :                       ipivot, work, -1, iwork, -1, info)
    2047           0 :          lwork = INT(work(1))
    2048           0 :          DEALLOCATE (work)
    2049           0 :          ALLOCATE (work(lwork))
    2050             :       END IF
    2051           0 :       liwork = INT(iwork(1))
    2052           0 :       DEALLOCATE (iwork)
    2053           0 :       ALLOCATE (iwork(liwork))
    2054           0 :       IF (matrix%use_sp) THEN
    2055             :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2056           0 :                       ipivot, work_sp, lwork, iwork, liwork, info)
    2057             :       ELSE
    2058             :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2059           0 :                       ipivot, work, lwork, iwork, liwork, info)
    2060             :       END IF
    2061           0 :       DEALLOCATE (iwork)
    2062             : #else
    2063             :       IF (matrix%use_sp) THEN
    2064             :          CALL sgetri(nrows_global, mat_sp, lda, &
    2065             :                      ipivot, work_sp, -1, info)
    2066             :          lwork = INT(work_sp(1))
    2067             :          DEALLOCATE (work_sp)
    2068             :          ALLOCATE (work_sp(lwork))
    2069             :          CALL sgetri(nrows_global, mat_sp, lda, &
    2070             :                      ipivot, work_sp, lwork, info)
    2071             :       ELSE
    2072             :          CALL dgetri(nrows_global, mat, lda, &
    2073             :                      ipivot, work, -1, info)
    2074             :          lwork = INT(work(1))
    2075             :          DEALLOCATE (work)
    2076             :          ALLOCATE (work(lwork))
    2077             :          CALL dgetri(nrows_global, mat, lda, &
    2078             :                      ipivot, work, lwork, info)
    2079             :       END IF
    2080             : #endif
    2081           0 :       IF (matrix%use_sp) THEN
    2082           0 :          DEALLOCATE (work_sp)
    2083             :       ELSE
    2084           0 :          DEALLOCATE (work)
    2085             :       END IF
    2086           0 :       DEALLOCATE (ipivot)
    2087             : 
    2088           0 :       IF (PRESENT(info_out)) THEN
    2089           0 :          info_out = info
    2090             :       ELSE
    2091           0 :          IF (info /= 0) &
    2092           0 :             CALL cp_abort(__LOCATION__, "LU inversion has failed")
    2093             :       END IF
    2094             : 
    2095           0 :       CALL timestop(handle)
    2096             : 
    2097           0 :    END SUBROUTINE cp_fm_lu_invert
    2098             : 
    2099             : ! **************************************************************************************************
    2100             : !> \brief norm of matrix using (p)dlange
    2101             : !> \param matrix   : input a general matrix
    2102             : !> \param mode     : 'M' max abs element value,
    2103             : !>                   '1' or 'O' one norm, i.e. maximum column sum
    2104             : !>                   'I' infinity norm, i.e. maximum row sum
    2105             : !>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
    2106             : !> \return : the norm according to mode
    2107             : !> \author Lianheng Tong
    2108             : ! **************************************************************************************************
    2109         492 :    FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
    2110             :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2111             :       CHARACTER, INTENT(IN) :: mode
    2112             :       REAL(KIND=dp) :: res
    2113             : 
    2114             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
    2115             : 
    2116             :       INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
    2117             :       REAL(KIND=sp) :: res_sp
    2118             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2119             :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2120         492 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2121         492 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2122             : #if defined(__parallel)
    2123             :       INTEGER, DIMENSION(9) :: desca
    2124             : #else
    2125             :       INTEGER :: lda
    2126             : #endif
    2127             : 
    2128         492 :       CALL timeset(routineN, handle)
    2129             : 
    2130             :       CALL cp_fm_get_info(matrix=matrix, &
    2131             :                           nrow_global=nrows, &
    2132             :                           ncol_global=ncols, &
    2133             :                           nrow_local=nrows_local, &
    2134         492 :                           ncol_local=ncols_local)
    2135         492 :       aa => matrix%local_data
    2136         492 :       aa_sp => matrix%local_data_sp
    2137             : 
    2138             : #if defined(__parallel)
    2139        4920 :       desca = matrix%matrix_struct%descriptor
    2140             :       SELECT CASE (mode)
    2141             :       CASE ('M', 'm')
    2142         492 :          lwork = 1
    2143             :       CASE ('1', 'O', 'o')
    2144         492 :          lwork = ncols_local
    2145             :       CASE ('I', 'i')
    2146           0 :          lwork = nrows_local
    2147             :       CASE ('F', 'f', 'E', 'e')
    2148           0 :          lwork = 1
    2149             :       CASE DEFAULT
    2150         492 :          CPABORT("mode input is not valid")
    2151             :       END SELECT
    2152         492 :       IF (matrix%use_sp) THEN
    2153           0 :          ALLOCATE (work_sp(lwork))
    2154           0 :          res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
    2155           0 :          DEALLOCATE (work_sp)
    2156           0 :          res = REAL(res_sp, KIND=dp)
    2157             :       ELSE
    2158        1476 :          ALLOCATE (work(lwork))
    2159         492 :          res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
    2160         492 :          DEALLOCATE (work)
    2161             :       END IF
    2162             : #else
    2163             :       SELECT CASE (mode)
    2164             :       CASE ('M', 'm')
    2165             :          lwork = 1
    2166             :       CASE ('1', 'O', 'o')
    2167             :          lwork = 1
    2168             :       CASE ('I', 'i')
    2169             :          lwork = nrows
    2170             :       CASE ('F', 'f', 'E', 'e')
    2171             :          lwork = 1
    2172             :       CASE DEFAULT
    2173             :          CPABORT("mode input is not valid")
    2174             :       END SELECT
    2175             :       IF (matrix%use_sp) THEN
    2176             :          ALLOCATE (work_sp(lwork))
    2177             :          lda = SIZE(aa_sp, 1)
    2178             :          res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
    2179             :          DEALLOCATE (work_sp)
    2180             :          res = REAL(res_sp, KIND=dp)
    2181             :       ELSE
    2182             :          ALLOCATE (work(lwork))
    2183             :          lda = SIZE(aa, 1)
    2184             :          res = dlange(mode, nrows, ncols, aa, lda, work)
    2185             :          DEALLOCATE (work)
    2186             :       END IF
    2187             : #endif
    2188             : 
    2189         492 :       CALL timestop(handle)
    2190             : 
    2191         492 :    END FUNCTION cp_fm_norm
    2192             : 
    2193             : ! **************************************************************************************************
    2194             : !> \brief trace of a matrix using pdlatra
    2195             : !> \param matrix   : input a square matrix
    2196             : !> \return : the trace
    2197             : !> \author Lianheng Tong
    2198             : ! **************************************************************************************************
    2199           0 :    FUNCTION cp_fm_latra(matrix) RESULT(res)
    2200             :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2201             :       REAL(KIND=dp) :: res
    2202             : 
    2203             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
    2204             : 
    2205             :       INTEGER :: nrows, ncols, handle
    2206             :       REAL(KIND=sp) :: res_sp
    2207             :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2208             :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2209             : #if defined(__parallel)
    2210             :       INTEGER, DIMENSION(9) :: desca
    2211             : #else
    2212             :       INTEGER :: ii
    2213             : #endif
    2214             : 
    2215           0 :       CALL timeset(routineN, handle)
    2216             : 
    2217           0 :       nrows = matrix%matrix_struct%nrow_global
    2218           0 :       ncols = matrix%matrix_struct%ncol_global
    2219           0 :       CPASSERT(nrows .EQ. ncols)
    2220           0 :       aa => matrix%local_data
    2221           0 :       aa_sp => matrix%local_data_sp
    2222             : 
    2223             : #if defined(__parallel)
    2224           0 :       desca = matrix%matrix_struct%descriptor
    2225           0 :       IF (matrix%use_sp) THEN
    2226           0 :          res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
    2227           0 :          res = REAL(res_sp, KIND=dp)
    2228             :       ELSE
    2229           0 :          res = pdlatra(nrows, aa, 1, 1, desca)
    2230             :       END IF
    2231             : #else
    2232             :       IF (matrix%use_sp) THEN
    2233             :          res_sp = 0.0_sp
    2234             :          DO ii = 1, nrows
    2235             :             res_sp = res_sp + aa_sp(ii, ii)
    2236             :          END DO
    2237             :          res = REAL(res_sp, KIND=dp)
    2238             :       ELSE
    2239             :          res = 0.0_dp
    2240             :          DO ii = 1, nrows
    2241             :             res = res + aa(ii, ii)
    2242             :          END DO
    2243             :       END IF
    2244             : #endif
    2245             : 
    2246           0 :       CALL timestop(handle)
    2247             : 
    2248           0 :    END FUNCTION cp_fm_latra
    2249             : 
    2250             : ! **************************************************************************************************
    2251             : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
    2252             : !>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
    2253             : !> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
    2254             : !> \param tau      :  scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
    2255             : !> \param nrow ...
    2256             : !> \param ncol ...
    2257             : !> \param first_row ...
    2258             : !> \param first_col ...
    2259             : !> \author MI
    2260             : ! **************************************************************************************************
    2261          36 :    SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
    2262             : 
    2263             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2264             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2265             :       INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
    2266             : 
    2267             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
    2268             : 
    2269             :       INTEGER                                            :: handle
    2270             :       INTEGER                                            :: info, lwork
    2271          36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
    2272             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2273             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2274             : #if defined(__parallel)
    2275             :       INTEGER, DIMENSION(9) :: descc
    2276             : #else
    2277             :       INTEGER :: lda
    2278             : #endif
    2279             : 
    2280          36 :       CALL timeset(routineN, handle)
    2281             : 
    2282          36 :       a => matrix%local_data
    2283          36 :       lwork = -1
    2284         108 :       ALLOCATE (work(2*nrow))
    2285         108 :       ALLOCATE (ipiv(ncol))
    2286          36 :       info = 0
    2287             : 
    2288             : #if defined(__parallel)
    2289         360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2290             :       ! Call SCALAPACK routine to get optimal work dimension
    2291          36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2292          36 :       lwork = INT(work(1))
    2293          36 :       DEALLOCATE (work)
    2294         108 :       ALLOCATE (work(lwork))
    2295         244 :       tau = 0.0_dp
    2296         354 :       ipiv = 0
    2297             : 
    2298             :       ! Call SCALAPACK routine to get QR decomposition of CTs
    2299          36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2300             : #else
    2301             :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2302             :       lda = SIZE(a, 1)
    2303             :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2304             :       lwork = INT(work(1))
    2305             :       DEALLOCATE (work)
    2306             :       ALLOCATE (work(lwork))
    2307             :       tau = 0.0_dp
    2308             :       ipiv = 0
    2309             :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2310             : #endif
    2311          36 :       CPASSERT(info == 0)
    2312             : 
    2313          36 :       DEALLOCATE (work)
    2314          36 :       DEALLOCATE (ipiv)
    2315             : 
    2316          36 :       CALL timestop(handle)
    2317             : 
    2318          36 :    END SUBROUTINE cp_fm_pdgeqpf
    2319             : 
    2320             : ! **************************************************************************************************
    2321             : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
    2322             : !>         with orthonormal columns, which is defined as the first N columns of a product of K
    2323             : !>         elementary reflectors of order M
    2324             : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
    2325             : !>                  as returned from PDGEQRF
    2326             : !>                 On exit it contains  the M-by-N distributed matrix Q
    2327             : !> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
    2328             : !> \param nrow ...
    2329             : !> \param first_row ...
    2330             : !> \param first_col ...
    2331             : !> \author MI
    2332             : ! **************************************************************************************************
    2333          36 :    SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
    2334             : 
    2335             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2336             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2337             :       INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
    2338             : 
    2339             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
    2340             : 
    2341             :       INTEGER                                            :: handle
    2342             :       INTEGER                                            :: info, lwork
    2343             :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2344             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2345             : #if defined(__parallel)
    2346             :       INTEGER, DIMENSION(9) :: descc
    2347             : #else
    2348             :       INTEGER :: lda
    2349             : #endif
    2350             : 
    2351          36 :       CALL timeset(routineN, handle)
    2352             : 
    2353          36 :       a => matrix%local_data
    2354          36 :       lwork = -1
    2355         108 :       ALLOCATE (work(2*nrow))
    2356          36 :       info = 0
    2357             : 
    2358             : #if defined(__parallel)
    2359         360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2360             : 
    2361          36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2362          36 :       CPASSERT(info == 0)
    2363          36 :       lwork = INT(work(1))
    2364          36 :       DEALLOCATE (work)
    2365         108 :       ALLOCATE (work(lwork))
    2366             : 
    2367             :       ! Call SCALAPACK routine to get Q
    2368          36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2369             : #else
    2370             :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2371             :       lda = SIZE(a, 1)
    2372             :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2373             :       lwork = INT(work(1))
    2374             :       DEALLOCATE (work)
    2375             :       ALLOCATE (work(lwork))
    2376             :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2377             : #endif
    2378          36 :       CPASSERT(INFO == 0)
    2379             : 
    2380          36 :       DEALLOCATE (work)
    2381          36 :       CALL timestop(handle)
    2382             : 
    2383          36 :    END SUBROUTINE cp_fm_pdorgqr
    2384             : 
    2385             : ! **************************************************************************************************
    2386             : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
    2387             : !> \param cs cosine of the rotation angle
    2388             : !> \param sn sinus of the rotation angle
    2389             : !> \param irow ...
    2390             : !> \param jrow ...
    2391             : !> \author Ole Schuett
    2392             : ! **************************************************************************************************
    2393      543328 :    SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
    2394             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2395             :       INTEGER, INTENT(IN)                      :: irow, jrow
    2396             :       REAL(dp), INTENT(IN)                     :: cs, sn
    2397             : 
    2398             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
    2399             :       INTEGER                                  :: handle, nrow, ncol
    2400             : 
    2401             : #if defined(__parallel)
    2402             :       INTEGER                                  :: info, lwork
    2403             :       INTEGER, DIMENSION(9)                    :: desc
    2404      543328 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2405             : #endif
    2406             : 
    2407      543328 :       CALL timeset(routineN, handle)
    2408      543328 :       CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
    2409             : 
    2410             : #if defined(__parallel)
    2411      543328 :       lwork = 2*ncol + 1
    2412     1629984 :       ALLOCATE (work(lwork))
    2413     5433280 :       desc(:) = matrix%matrix_struct%descriptor(:)
    2414             :       CALL pdrot(ncol, &
    2415             :                  matrix%local_data(1, 1), irow, 1, desc, ncol, &
    2416             :                  matrix%local_data(1, 1), jrow, 1, desc, ncol, &
    2417      543328 :                  cs, sn, work, lwork, info)
    2418      543328 :       CPASSERT(info == 0)
    2419      543328 :       DEALLOCATE (work)
    2420             : #else
    2421             :       CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
    2422             : #endif
    2423             : 
    2424      543328 :       CALL timestop(handle)
    2425      543328 :    END SUBROUTINE cp_fm_rot_rows
    2426             : 
    2427             : ! **************************************************************************************************
    2428             : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
    2429             : !> \param cs cosine of the rotation angle
    2430             : !> \param sn sinus of the rotation angle
    2431             : !> \param icol ...
    2432             : !> \param jcol ...
    2433             : !> \author Ole Schuett
    2434             : ! **************************************************************************************************
    2435      612158 :    SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
    2436             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2437             :       INTEGER, INTENT(IN)                      :: icol, jcol
    2438             :       REAL(dp), INTENT(IN)                     :: cs, sn
    2439             : 
    2440             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
    2441             :       INTEGER                                  :: handle, nrow, ncol
    2442             : 
    2443             : #if defined(__parallel)
    2444             :       INTEGER                                  :: info, lwork
    2445             :       INTEGER, DIMENSION(9)                    :: desc
    2446      612158 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2447             : #endif
    2448             : 
    2449      612158 :       CALL timeset(routineN, handle)
    2450      612158 :       CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
    2451             : 
    2452             : #if defined(__parallel)
    2453      612158 :       lwork = 2*nrow + 1
    2454     1836474 :       ALLOCATE (work(lwork))
    2455     6121580 :       desc(:) = matrix%matrix_struct%descriptor(:)
    2456             :       CALL pdrot(nrow, &
    2457             :                  matrix%local_data(1, 1), 1, icol, desc, 1, &
    2458             :                  matrix%local_data(1, 1), 1, jcol, desc, 1, &
    2459      612158 :                  cs, sn, work, lwork, info)
    2460      612158 :       CPASSERT(info == 0)
    2461      612158 :       DEALLOCATE (work)
    2462             : #else
    2463             :       CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
    2464             : #endif
    2465             : 
    2466      612158 :       CALL timestop(handle)
    2467      612158 :    END SUBROUTINE cp_fm_rot_cols
    2468             : 
    2469             : ! **************************************************************************************************
    2470             : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
    2471             : !> \param matrix_a ...
    2472             : !> \param B ...
    2473             : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
    2474             : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
    2475             : !> \param start_row starting index of rows, optional, defaults to 1
    2476             : !> \param start_col starting index of columns, optional, defaults to 1
    2477             : !> \param do_norm ...
    2478             : !> \param do_print ...
    2479             : ! **************************************************************************************************
    2480           0 :    SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
    2481             :                                            do_norm, do_print)
    2482             : 
    2483             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    2484             :       REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: B
    2485             :       INTEGER, INTENT(IN), OPTIONAL                      :: nrows, ncols, start_row, start_col
    2486             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_norm, do_print
    2487             : 
    2488             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
    2489             : 
    2490             :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
    2491             :                  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
    2492             :                  start_col_local, start_row_global, start_row_local, this_col, unit_nr
    2493           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    2494             :       LOGICAL                                            :: my_do_norm, my_do_print
    2495             :       REAL(KIND=dp)                                      :: norm
    2496           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: a
    2497             : 
    2498           0 :       CALL timeset(routineN, handle)
    2499             : 
    2500           0 :       my_do_norm = .TRUE.
    2501           0 :       IF (PRESENT(do_norm)) my_do_norm = do_norm
    2502             : 
    2503           0 :       my_do_print = .FALSE.
    2504           0 :       IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
    2505             : 
    2506           0 :       unit_nr = -1
    2507           0 :       IF (my_do_print) THEN
    2508           0 :          unit_nr = cp_logger_get_default_unit_nr()
    2509           0 :          IF (unit_nr < 1) my_do_print = .FALSE.
    2510             :       END IF
    2511             : 
    2512           0 :       IF (SIZE(B) /= 0) THEN
    2513           0 :          IF (PRESENT(nrows)) THEN
    2514           0 :             nrow_global = nrows
    2515             :          ELSE
    2516           0 :             nrow_global = SIZE(B, 1)
    2517             :          END IF
    2518             : 
    2519           0 :          IF (PRESENT(ncols)) THEN
    2520           0 :             ncol_global = ncols
    2521             :          ELSE
    2522           0 :             ncol_global = SIZE(B, 2)
    2523             :          END IF
    2524             : 
    2525           0 :          IF (PRESENT(start_row)) THEN
    2526           0 :             start_row_global = start_row
    2527             :          ELSE
    2528             :             start_row_global = 1
    2529             :          END IF
    2530             : 
    2531           0 :          IF (PRESENT(start_col)) THEN
    2532           0 :             start_col_global = start_col
    2533             :          ELSE
    2534             :             start_col_global = 1
    2535             :          END IF
    2536             : 
    2537           0 :          end_row_global = start_row_global + nrow_global - 1
    2538           0 :          end_col_global = start_col_global + ncol_global - 1
    2539             : 
    2540             :          CALL cp_fm_get_info(matrix=matrix_a, &
    2541             :                              nrow_global=nrow_global, ncol_global=ncol_global, &
    2542             :                              nrow_local=nrow_local, ncol_local=ncol_local, &
    2543           0 :                              row_indices=row_indices, col_indices=col_indices)
    2544           0 :          IF (end_row_global > nrow_global) THEN
    2545             :             end_row_global = nrow_global
    2546             :          END IF
    2547           0 :          IF (end_col_global > ncol_global) THEN
    2548             :             end_col_global = ncol_global
    2549             :          END IF
    2550             : 
    2551             :          ! find out row/column indices of locally stored matrix elements that
    2552             :          ! needs to be copied.
    2553             :          ! Arrays row_indices and col_indices are assumed to be sorted in
    2554             :          ! ascending order
    2555           0 :          DO start_row_local = 1, nrow_local
    2556           0 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
    2557             :          END DO
    2558             : 
    2559           0 :          DO end_row_local = start_row_local, nrow_local
    2560           0 :             IF (row_indices(end_row_local) > end_row_global) EXIT
    2561             :          END DO
    2562           0 :          end_row_local = end_row_local - 1
    2563             : 
    2564           0 :          DO start_col_local = 1, ncol_local
    2565           0 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
    2566             :          END DO
    2567             : 
    2568           0 :          DO end_col_local = start_col_local, ncol_local
    2569           0 :             IF (col_indices(end_col_local) > end_col_global) EXIT
    2570             :          END DO
    2571           0 :          end_col_local = end_col_local - 1
    2572             : 
    2573           0 :          a => matrix_a%local_data
    2574             : 
    2575           0 :          this_col = col_indices(start_col_local) - start_col_global + 1
    2576             : 
    2577           0 :          B(:, this_col) = a(:, start_col_local)
    2578             : 
    2579           0 :          IF (my_do_norm) THEN
    2580           0 :             norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2581           0 :             B(:, this_col) = B(:, this_col)/norm
    2582           0 :             IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2583             :          END IF
    2584             : 
    2585           0 :          DO i = start_col_local + 1, end_col_local
    2586           0 :             this_col = col_indices(i) - start_col_global + 1
    2587           0 :             B(:, this_col) = a(:, i)
    2588           0 :             DO j = start_col_local, i - 1
    2589           0 :                j_col = col_indices(j) - start_col_global + 1
    2590             :                B(:, this_col) = B(:, this_col) - &
    2591             :                                 accurate_dot_product(B(:, j_col), B(:, this_col))* &
    2592           0 :                                 B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
    2593             :             END DO
    2594             : 
    2595           0 :             IF (my_do_norm) THEN
    2596           0 :                norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2597           0 :                B(:, this_col) = B(:, this_col)/norm
    2598           0 :                IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2599             :             END IF
    2600             : 
    2601             :          END DO
    2602           0 :          CALL matrix_a%matrix_struct%para_env%sum(B)
    2603             :       END IF
    2604             : 
    2605           0 :       CALL timestop(handle)
    2606             : 
    2607           0 :    END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
    2608             : 
    2609             : ! **************************************************************************************************
    2610             : !> \brief Cholesky decomposition
    2611             : !> \param fm_matrix ...
    2612             : !> \param n ...
    2613             : ! **************************************************************************************************
    2614       10015 :    SUBROUTINE cp_fm_potrf(fm_matrix, n)
    2615             :       TYPE(cp_fm_type)                         :: fm_matrix
    2616             :       INTEGER, INTENT(in)                      :: n
    2617             : 
    2618             :       INTEGER                                  :: info
    2619       10015 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2620       10015 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2621             : #if defined(__parallel)
    2622             :       INTEGER, DIMENSION(9)                    :: desca
    2623             : #endif
    2624             : 
    2625       10015 :       a => fm_matrix%local_data
    2626       10015 :       a_sp => fm_matrix%local_data_sp
    2627             : #if defined(__parallel)
    2628      100150 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2629       10015 :       IF (fm_matrix%use_sp) THEN
    2630           0 :          CALL pspotrf('U', n, a_sp(1, 1), 1, 1, desca, info)
    2631             :       ELSE
    2632       10015 :          CALL pdpotrf('U', n, a(1, 1), 1, 1, desca, info)
    2633             :       END IF
    2634             : #else
    2635             :       IF (fm_matrix%use_sp) THEN
    2636             :          CALL spotrf('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2637             :       ELSE
    2638             :          CALL dpotrf('U', n, a(1, 1), SIZE(a, 1), info)
    2639             :       END IF
    2640             : #endif
    2641       10015 :       IF (info /= 0) &
    2642           0 :          CPABORT("Cholesky decomposition failed. Matrix ill conditioned ?")
    2643             : 
    2644       10015 :    END SUBROUTINE cp_fm_potrf
    2645             : 
    2646             : ! **************************************************************************************************
    2647             : !> \brief Invert trianguar matrix
    2648             : !> \param fm_matrix the matrix to invert (must be an upper triangular matrix)
    2649             : !> \param n size of the matrix to invert
    2650             : ! **************************************************************************************************
    2651        9239 :    SUBROUTINE cp_fm_potri(fm_matrix, n)
    2652             :       TYPE(cp_fm_type)                          :: fm_matrix
    2653             :       INTEGER, INTENT(in)                       :: n
    2654             : 
    2655        9239 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
    2656        9239 :       REAL(KIND=sp), DIMENSION(:, :), POINTER   :: a_sp
    2657             :       INTEGER                                   :: info
    2658             : #if defined(__parallel)
    2659             :       INTEGER, DIMENSION(9)                     :: desca
    2660             : #endif
    2661             : 
    2662        9239 :       a => fm_matrix%local_data
    2663        9239 :       a_sp => fm_matrix%local_data_sp
    2664             : #if defined(__parallel)
    2665       92390 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2666        9239 :       IF (fm_matrix%use_sp) THEN
    2667           0 :          CALL pspotri('U', n, a_sp(1, 1), 1, 1, desca, info)
    2668             :       ELSE
    2669        9239 :          CALL pdpotri('U', n, a(1, 1), 1, 1, desca, info)
    2670             :       END IF
    2671             : #else
    2672             :       IF (fm_matrix%use_sp) THEN
    2673             :          CALL spotri('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2674             :       ELSE
    2675             :          CALL dpotri('U', n, a(1, 1), SIZE(a, 1), info)
    2676             :       END IF
    2677             : #endif
    2678        9239 :       CPASSERT(info == 0)
    2679        9239 :    END SUBROUTINE cp_fm_potri
    2680             : 
    2681             : ! **************************************************************************************************
    2682             : !> \brief ...
    2683             : !> \param fm_matrix ...
    2684             : !> \param neig ...
    2685             : !> \param fm_matrixb ...
    2686             : !> \param fm_matrixout ...
    2687             : !> \param op ...
    2688             : !> \param pos ...
    2689             : !> \param transa ...
    2690             : ! **************************************************************************************************
    2691        1184 :    SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
    2692             :       TYPE(cp_fm_type)                               :: fm_matrix
    2693             :       TYPE(cp_fm_type)                               :: fm_matrixb
    2694             :       TYPE(cp_fm_type)                               :: fm_matrixout
    2695             :       INTEGER, INTENT(IN)                            :: neig
    2696             :       CHARACTER(LEN=*), INTENT(IN)                   :: op
    2697             :       CHARACTER(LEN=*), INTENT(IN)                   :: pos
    2698             :       CHARACTER(LEN=*), INTENT(IN)                   :: transa
    2699             : 
    2700        1184 :       REAL(KIND=dp), DIMENSION(:, :), POINTER        :: a, b, outm
    2701        1184 :       REAL(KIND=sp), DIMENSION(:, :), POINTER        :: a_sp, b_sp, outm_sp
    2702             :       INTEGER                                        :: n, itype
    2703             :       REAL(KIND=dp)                                  :: alpha
    2704             : #if defined(__parallel)
    2705             :       INTEGER                                        :: i
    2706             :       INTEGER, DIMENSION(9)                          :: desca, descb, descout
    2707             : #endif
    2708             : 
    2709             :       ! notice b is the cholesky guy
    2710        1184 :       a => fm_matrix%local_data
    2711        1184 :       b => fm_matrixb%local_data
    2712        1184 :       outm => fm_matrixout%local_data
    2713        1184 :       a_sp => fm_matrix%local_data_sp
    2714        1184 :       b_sp => fm_matrixb%local_data_sp
    2715        1184 :       outm_sp => fm_matrixout%local_data_sp
    2716             : 
    2717        1184 :       n = fm_matrix%matrix_struct%nrow_global
    2718        1184 :       itype = 1
    2719             : 
    2720             : #if defined(__parallel)
    2721       11840 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2722       11840 :       descb(:) = fm_matrixb%matrix_struct%descriptor(:)
    2723       11840 :       descout(:) = fm_matrixout%matrix_struct%descriptor(:)
    2724        1184 :       alpha = 1.0_dp
    2725        5316 :       DO i = 1, neig
    2726        5316 :          IF (fm_matrix%use_sp) THEN
    2727           0 :             CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
    2728             :          ELSE
    2729        4132 :             CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
    2730             :          END IF
    2731             :       END DO
    2732        1184 :       IF (op .EQ. "SOLVE") THEN
    2733        1184 :          IF (fm_matrix%use_sp) THEN
    2734             :             CALL pstrsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
    2735           0 :                         outm_sp(1, 1), 1, 1, descout)
    2736             :          ELSE
    2737        1184 :             CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
    2738             :          END IF
    2739             :       ELSE
    2740           0 :          IF (fm_matrix%use_sp) THEN
    2741             :             CALL pstrmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
    2742           0 :                         outm_sp(1, 1), 1, 1, descout)
    2743             :          ELSE
    2744           0 :             CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
    2745             :          END IF
    2746             :       END IF
    2747             : #else
    2748             :       alpha = 1.0_dp
    2749             :       IF (fm_matrix%use_sp) THEN
    2750             :          CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
    2751             :       ELSE
    2752             :          CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
    2753             :       END IF
    2754             :       IF (op .EQ. "SOLVE") THEN
    2755             :          IF (fm_matrix%use_sp) THEN
    2756             :             CALL strsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
    2757             :          ELSE
    2758             :             CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
    2759             :          END IF
    2760             :       ELSE
    2761             :          IF (fm_matrix%use_sp) THEN
    2762             :             CALL strmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
    2763             :          ELSE
    2764             :             CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
    2765             :          END IF
    2766             :       END IF
    2767             : #endif
    2768             : 
    2769        1184 :    END SUBROUTINE cp_fm_cholesky_restore
    2770             : 
    2771             : ! **************************************************************************************************
    2772             : !> \brief Calculates
    2773             : !>        yv = alpha*amat*xv + beta*yv
    2774             : !>        where amat: fm matrix
    2775             : !>              xv  : vector replicated
    2776             : !>              yv  : vector replicated
    2777             : !>        Defaults: alpha = 1, beta = 0
    2778             : ! **************************************************************************************************
    2779       16116 :    SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
    2780             :       TYPE(cp_fm_type), INTENT(IN)                   :: amat
    2781             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)        :: xv
    2782             :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)     :: yv
    2783             :       REAL(KIND=dp), OPTIONAL, INTENT(IN)            :: alpha, beta
    2784             : 
    2785             :       INTEGER                                        :: na, nc, nx, ny
    2786             :       REAL(KIND=dp)                                  :: aval, bval
    2787             : #if defined(__parallel)
    2788             :       INTEGER                                        :: nrl, ncl, ic, ir
    2789       16116 :       INTEGER, DIMENSION(:), POINTER                 :: rind, cind
    2790       16116 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE       :: xvl, yvl, yvm
    2791             : #endif
    2792             : 
    2793       16116 :       IF (amat%use_sp) THEN
    2794           0 :          CPABORT("cp_fm_matvec: SP option not available")
    2795             :       END IF
    2796       16116 :       aval = 1.0_dp
    2797       16116 :       IF (PRESENT(alpha)) aval = alpha
    2798       16116 :       bval = 0.0_dp
    2799       16116 :       IF (PRESENT(beta)) bval = beta
    2800             : 
    2801       16116 :       CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
    2802       16116 :       nx = SIZE(xv)
    2803       16116 :       ny = SIZE(yv)
    2804       16116 :       IF ((nx /= ny) .OR. (nc /= nx)) THEN
    2805           0 :          CPABORT("cp_fm_matvec: incompatible dimensions")
    2806             :       END IF
    2807             : #if defined(__parallel)
    2808             :       CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
    2809       16116 :                           row_indices=rind, col_indices=cind)
    2810      112812 :       ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
    2811      152576 :       DO ic = 1, ncl
    2812      152576 :          xvl(ic) = xv(cind(ic))
    2813             :       END DO
    2814     1786706 :       yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
    2815      152576 :       yvm = 0.0_dp
    2816       84346 :       DO ir = 1, nrl
    2817       84346 :          yvm(rind(ir)) = yvl(ir)
    2818             :       END DO
    2819       16116 :       CALL amat%matrix_struct%para_env%sum(yvm)
    2820       16116 :       IF (bval == 0.0_dp) THEN
    2821       76288 :          yv = aval*yvm
    2822             :       ELSE
    2823       76288 :          yv = bval*yv + aval*yvm
    2824             :       END IF
    2825             : #else
    2826             :       IF (bval == 0.0_dp) THEN
    2827             :          yv = aval*MATMUL(amat%local_data, xv)
    2828             :       ELSE
    2829             :          yv = bval*yv + aval*MATMUL(amat%local_data, xv)
    2830             :       END IF
    2831             : #endif
    2832             : 
    2833       64464 :    END SUBROUTINE cp_fm_matvec
    2834             : 
    2835             : END MODULE cp_fm_basic_linalg

Generated by: LCOV version 1.15