LCOV - code coverage report
Current view: top level - src - matrix_exp.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 402 416 96.6 %
Date: 2024-11-21 06:45:46 Functions: 7 7 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines for calculating a complex matrix exponential.
      10             : !> \author Florian Schiffmann (02.09)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE matrix_exp
      14             : 
      15             :    USE cp_cfm_basic_linalg,             ONLY: cp_cfm_scale_and_add,&
      16             :                                               cp_cfm_solve
      17             :    USE cp_cfm_types,                    ONLY: cp_cfm_create,&
      18             :                                               cp_cfm_release,&
      19             :                                               cp_cfm_set_all,&
      20             :                                               cp_cfm_to_cfm,&
      21             :                                               cp_cfm_type
      22             :    USE cp_fm_basic_linalg,              ONLY: cp_complex_fm_gemm,&
      23             :                                               cp_fm_scale_and_add,&
      24             :                                               cp_fm_solve
      25             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_double,&
      26             :                                               cp_fm_struct_release,&
      27             :                                               cp_fm_struct_type
      28             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      29             :                                               cp_fm_get_info,&
      30             :                                               cp_fm_release,&
      31             :                                               cp_fm_set_all,&
      32             :                                               cp_fm_to_fm,&
      33             :                                               cp_fm_type
      34             :    USE cp_log_handling,                 ONLY: cp_to_string
      35             :    USE kinds,                           ONLY: dp
      36             :    USE mathconstants,                   ONLY: fac,&
      37             :                                               z_one,&
      38             :                                               z_zero
      39             :    USE message_passing,                 ONLY: mp_comm_type,&
      40             :                                               mp_para_env_type
      41             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      42             : #include "./base/base_uses.f90"
      43             : 
      44             :    IMPLICIT NONE
      45             : 
      46             :    PRIVATE
      47             : 
      48             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'matrix_exp'
      49             : 
      50             :    PUBLIC :: taylor_only_imaginary, &
      51             :              taylor_full_complex, &
      52             :              exp_pade_full_complex, &
      53             :              exp_pade_only_imaginary, &
      54             :              get_nsquare_norder, &
      55             :              arnoldi, exp_pade_real
      56             : 
      57             : CONTAINS
      58             : 
      59             : ! **************************************************************************************************
      60             : !> \brief specialized subroutine for purely imaginary matrix exponentials
      61             : !> \param exp_H ...
      62             : !> \param im_matrix ...
      63             : !> \param nsquare ...
      64             : !> \param ntaylor ...
      65             : !> \author Florian Schiffmann (02.09)
      66             : ! **************************************************************************************************
      67             : 
      68         700 :    SUBROUTINE taylor_only_imaginary(exp_H, im_matrix, nsquare, ntaylor)
      69             :       TYPE(cp_fm_type), DIMENSION(2)                     :: exp_H
      70             :       TYPE(cp_fm_type), INTENT(IN)                       :: im_matrix
      71             :       INTEGER, INTENT(in)                                :: nsquare, ntaylor
      72             : 
      73             :       CHARACTER(len=*), PARAMETER :: routineN = 'taylor_only_imaginary'
      74             :       REAL(KIND=dp), PARAMETER                           :: one = 1.0_dp, zero = 0.0_dp
      75             : 
      76             :       INTEGER                                            :: handle, i, ndim, nloop
      77             :       REAL(KIND=dp)                                      :: square_fac, Tfac, tmp
      78             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
      79         140 :          POINTER                                         :: local_data_im
      80             :       TYPE(cp_fm_type)                                   :: T1, T2, Tres_im, Tres_re
      81             : 
      82         140 :       CALL timeset(routineN, handle)
      83             : 
      84         140 :       CALL cp_fm_get_info(im_matrix, local_data=local_data_im)
      85         140 :       ndim = im_matrix%matrix_struct%nrow_global
      86             : 
      87         140 :       square_fac = 1.0_dp/(2.0_dp**REAL(nsquare, dp))
      88             : !    CALL cp_fm_scale(square_fac,im_matrix)
      89             :       CALL cp_fm_create(T1, &
      90             :                         matrix_struct=im_matrix%matrix_struct, &
      91         140 :                         name="T1")
      92             : 
      93             :       CALL cp_fm_create(T2, &
      94             :                         matrix_struct=T1%matrix_struct, &
      95         140 :                         name="T2")
      96             :       CALL cp_fm_create(Tres_im, &
      97             :                         matrix_struct=T1%matrix_struct, &
      98         140 :                         name="T3")
      99             :       CALL cp_fm_create(Tres_re, &
     100             :                         matrix_struct=T1%matrix_struct, &
     101         140 :                         name="Tres")
     102         140 :       tmp = 1.0_dp
     103             : 
     104         140 :       CALL cp_fm_set_all(Tres_re, zero, one)
     105         140 :       CALL cp_fm_set_all(Tres_im, zero, zero)
     106         140 :       CALL cp_fm_set_all(T1, zero, one)
     107             : 
     108         140 :       Tfac = one
     109         140 :       nloop = CEILING(REAL(ntaylor, dp)/2.0_dp)
     110             : 
     111         700 :       DO i = 1, nloop
     112         560 :          tmp = tmp*(REAL(i, dp)*2.0_dp - 1.0_dp)
     113             :          CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_matrix, T1, zero, &
     114             :                             !       CALL parallel_gemm("N","N",ndim,ndim,ndim,one,im_matrix,T1,zero,&
     115         560 :                             T2)
     116         560 :          Tfac = 1._dp/tmp
     117         560 :          IF (MOD(i, 2) == 0) Tfac = -Tfac
     118         560 :          CALL cp_fm_scale_and_add(one, Tres_im, Tfac, T2)
     119         560 :          tmp = tmp*REAL(i, dp)*2.0_dp
     120             :          CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_matrix, T2, zero, &
     121             :                             !       CALL parallel_gemm("N","N",ndim,ndim,ndim,one,im_matrix,T2,zero,&
     122         560 :                             T1)
     123         560 :          Tfac = 1._dp/tmp
     124         560 :          IF (MOD(i, 2) == 1) Tfac = -Tfac
     125         700 :          CALL cp_fm_scale_and_add(one, Tres_re, Tfac, T1)
     126             : 
     127             :       END DO
     128             : 
     129         140 :       IF (nsquare .GT. 0) THEN
     130           0 :          DO i = 1, nsquare
     131             :             CALL cp_complex_fm_gemm("N", "N", ndim, ndim, ndim, one, Tres_re, Tres_im, &
     132           0 :                                     Tres_re, Tres_im, zero, exp_H(1), exp_H(2))
     133             : 
     134           0 :             CALL cp_fm_to_fm(exp_H(1), Tres_re)
     135           0 :             CALL cp_fm_to_fm(exp_H(2), Tres_im)
     136             :          END DO
     137             :       ELSE
     138         140 :          CALL cp_fm_to_fm(Tres_re, exp_H(1))
     139         140 :          CALL cp_fm_to_fm(Tres_im, exp_H(2))
     140             :       END IF
     141             : 
     142         140 :       CALL cp_fm_release(T1)
     143         140 :       CALL cp_fm_release(T2)
     144         140 :       CALL cp_fm_release(Tres_re)
     145         140 :       CALL cp_fm_release(Tres_im)
     146             : 
     147         140 :       CALL timestop(handle)
     148             : 
     149         140 :    END SUBROUTINE taylor_only_imaginary
     150             : 
     151             : ! **************************************************************************************************
     152             : !> \brief subroutine for general complex matrix exponentials
     153             : !>        on input a separate cp_fm_type for real and complex part
     154             : !>        on output a size 2 cp_fm_type, first element is the real part of
     155             : !>        the exponential second the imaginary
     156             : !> \param exp_H ...
     157             : !> \param re_part ...
     158             : !> \param im_part ...
     159             : !> \param nsquare ...
     160             : !> \param ntaylor ...
     161             : !> \author Florian Schiffmann (02.09)
     162             : ! **************************************************************************************************
     163             : 
     164        2250 :    SUBROUTINE taylor_full_complex(exp_H, re_part, im_part, nsquare, ntaylor)
     165             :       TYPE(cp_fm_type), DIMENSION(2)                     :: exp_H
     166             :       TYPE(cp_fm_type), INTENT(IN)                       :: re_part, im_part
     167             :       INTEGER, INTENT(in)                                :: nsquare, ntaylor
     168             : 
     169             :       CHARACTER(len=*), PARAMETER :: routineN = 'taylor_full_complex'
     170             : 
     171             :       COMPLEX(KIND=dp)                                   :: Tfac
     172             :       INTEGER                                            :: handle, i, ndim
     173             :       REAL(KIND=dp)                                      :: square_fac, tmp
     174             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     175         450 :          POINTER                                         :: local_data_im, local_data_re
     176             :       TYPE(cp_cfm_type)                                  :: T1, T2, T3, Tres
     177             : 
     178         450 :       CALL timeset(routineN, handle)
     179         450 :       CALL cp_fm_get_info(re_part, local_data=local_data_re)
     180         450 :       CALL cp_fm_get_info(im_part, local_data=local_data_im)
     181         450 :       ndim = re_part%matrix_struct%nrow_global
     182             : 
     183             :       CALL cp_cfm_create(T1, &
     184             :                          matrix_struct=re_part%matrix_struct, &
     185         450 :                          name="T1")
     186             : 
     187         450 :       square_fac = 2.0_dp**REAL(nsquare, dp)
     188             : 
     189       38538 :       T1%local_data = CMPLX(local_data_re/square_fac, local_data_im/square_fac, KIND=dp)
     190             : 
     191             :       CALL cp_cfm_create(T2, &
     192             :                          matrix_struct=T1%matrix_struct, &
     193         450 :                          name="T2")
     194             :       CALL cp_cfm_create(T3, &
     195             :                          matrix_struct=T1%matrix_struct, &
     196         450 :                          name="T3")
     197             :       CALL cp_cfm_create(Tres, &
     198             :                          matrix_struct=T1%matrix_struct, &
     199         450 :                          name="Tres")
     200         450 :       tmp = 1.0_dp
     201         450 :       CALL cp_cfm_set_all(Tres, z_zero, z_one)
     202         450 :       CALL cp_cfm_set_all(T2, z_zero, z_one)
     203         450 :       Tfac = z_one
     204             : 
     205        3568 :       DO i = 1, ntaylor
     206        3118 :          tmp = tmp*REAL(i, dp)
     207             :          CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, T1, T2, z_zero, &
     208        3118 :                             T3)
     209        3118 :          Tfac = CMPLX(1._dp/tmp, 0.0_dp, kind=dp)
     210        3118 :          CALL cp_cfm_scale_and_add(z_one, Tres, Tfac, T3)
     211        3568 :          CALL cp_cfm_to_cfm(T3, T2)
     212             :       END DO
     213             : 
     214         450 :       IF (nsquare .GT. 0) THEN
     215         376 :          DO i = 1, nsquare
     216             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, Tres, Tres, z_zero, &
     217         194 :                                T2)
     218         376 :             CALL cp_cfm_to_cfm(T2, Tres)
     219             :          END DO
     220             :       END IF
     221             : 
     222       38538 :       exp_H(1)%local_data = REAL(Tres%local_data, KIND=dp)
     223       38538 :       exp_H(2)%local_data = AIMAG(Tres%local_data)
     224             : 
     225         450 :       CALL cp_cfm_release(T1)
     226         450 :       CALL cp_cfm_release(T2)
     227         450 :       CALL cp_cfm_release(T3)
     228         450 :       CALL cp_cfm_release(Tres)
     229         450 :       CALL timestop(handle)
     230             : 
     231         450 :    END SUBROUTINE taylor_full_complex
     232             : 
     233             : ! **************************************************************************************************
     234             : !> \brief optimization function for pade/taylor order and number of squaring steps
     235             : !> \param norm ...
     236             : !> \param nsquare ...
     237             : !> \param norder ...
     238             : !> \param eps_exp ...
     239             : !> \param method ...
     240             : !> \param do_emd ...
     241             : !> \author Florian Schiffmann (02.09)
     242             : ! **************************************************************************************************
     243        4506 :    SUBROUTINE get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
     244             : 
     245             :       REAL(dp), INTENT(in)                               :: norm
     246             :       INTEGER, INTENT(out)                               :: nsquare, norder
     247             :       REAL(dp), INTENT(in)                               :: eps_exp
     248             :       INTEGER, INTENT(in)                                :: method
     249             :       LOGICAL, INTENT(in)                                :: do_emd
     250             : 
     251             :       INTEGER                                            :: cost, i, iscale, orders(3), p, &
     252             :                                                             prev_cost, q
     253             :       LOGICAL                                            :: new_scale
     254             :       REAL(dp)                                           :: D, eval, myval, N, scaleD, scaleN
     255             : 
     256        4506 :       orders(:) = (/12, 12, 12/)
     257        4506 :       IF (method == 2) THEN
     258       18426 :          DO iscale = 0, 12
     259       17758 :             new_scale = .FALSE.
     260       17758 :             eval = norm/(2.0_dp**REAL(iscale, dp))
     261      116706 :             DO q = 1, 12
     262      296996 :                DO p = MAX(1, q - 1), q
     263             :                   IF (p > q) EXIT
     264             :                   D = 1.0_dp
     265             :                   N = 1.0_dp
     266     1397486 :                   DO i = 1, q
     267     1199438 :                      IF (i .LE. p) scaleN = fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i))
     268     1199438 :                      scaleD = (-1.0)**i*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i))
     269     1199438 :                      IF (i .LE. p) N = N + scaleN*eval**i
     270     1397486 :                      D = D + scaleD*eval**i
     271             :                   END DO
     272      296996 :                   IF (ABS((EXP(norm) - (N/D)**(2.0_dp**iscale))/MAX(1.0_dp, EXP(norm))) .LE. eps_exp) THEN
     273       10548 :                      IF (do_emd) THEN
     274          70 :                         cost = iscale + q
     275          70 :                         prev_cost = orders(1) + orders(2)
     276             :                      ELSE
     277       10478 :                         cost = iscale + CEILING(REAL(q, dp)/3.0_dp)
     278       10478 :                         prev_cost = orders(1) + CEILING(REAL(orders(2), dp)/3.0_dp)
     279             :                      END IF
     280       10548 :                      IF (cost .LT. prev_cost) THEN
     281       17344 :                         orders(:) = (/iscale, q, p/)
     282        4336 :                         myval = (N/D)**(2.0_dp**iscale)
     283             :                      END IF
     284             :                      new_scale = .TRUE.
     285             :                      EXIT
     286             :                   END IF
     287             :                END DO
     288      106158 :                IF (new_scale) EXIT
     289             :             END DO
     290       18426 :             IF (iscale .GE. orders(1) + orders(2) .AND. new_scale) EXIT
     291             :          END DO
     292         170 :       ELSE IF (method == 1) THEN
     293         170 :          q = 0
     294         170 :          eval = norm
     295        1324 :          DO iscale = 0, 6
     296        1166 :             new_scale = .FALSE.
     297        1166 :             IF (iscale .GE. 1) eval = norm/(2.0_dp**REAL(iscale, dp))
     298        6096 :             DO p = 1, 20
     299       27370 :                D = 1.0_dp
     300             :                N = 1.0_dp
     301       27370 :                DO i = 1, p
     302       21278 :                   scaleN = 1.0_dp/fac(i)
     303       27370 :                   N = N + scaleN*(eval**REAL(i, dp))
     304             :                END DO
     305        6096 :                IF (ABS((EXP(norm) - N**(2.0_dp**REAL(iscale, dp)))/MAX(1.0_dp, EXP(norm))) .LE. eps_exp) THEN
     306        1162 :                   IF (do_emd) THEN
     307         304 :                      cost = iscale + p
     308         304 :                      prev_cost = orders(1) + orders(2)
     309             :                   ELSE
     310         858 :                      cost = iscale + CEILING(REAL(p, dp)/3.0_dp)
     311         858 :                      prev_cost = orders(1) + CEILING(REAL(orders(2), dp)/3.0_dp)
     312             :                   END IF
     313        1162 :                   IF (cost .LT. prev_cost) THEN
     314         808 :                      orders(:) = (/iscale, p, 0/)
     315         202 :                      myval = (N)**(2.0_dp**iscale)
     316             :                   END IF
     317             :                   new_scale = .TRUE.
     318             :                   EXIT
     319             :                END IF
     320             :             END DO
     321        1324 :             IF (iscale .GE. orders(1) + orders(2) .AND. new_scale) EXIT
     322             :          END DO
     323             :       END IF
     324             : 
     325        4506 :       nsquare = orders(1)
     326        4506 :       norder = orders(2)
     327             : 
     328        4506 :    END SUBROUTINE get_nsquare_norder
     329             : 
     330             : ! **************************************************************************************************
     331             : !> \brief exponential of a complex matrix,
     332             : !>        calculated using pade approximation together with scaling and squaring
     333             : !> \param exp_H ...
     334             : !> \param re_part ...
     335             : !> \param im_part ...
     336             : !> \param nsquare ...
     337             : !> \param npade ...
     338             : !> \author Florian Schiffmann (02.09)
     339             : ! **************************************************************************************************
     340             : 
     341        2848 :    SUBROUTINE exp_pade_full_complex(exp_H, re_part, im_part, nsquare, npade)
     342             :       TYPE(cp_fm_type), DIMENSION(2)                     :: exp_H
     343             :       TYPE(cp_fm_type), INTENT(IN)                       :: re_part, im_part
     344             :       INTEGER, INTENT(in)                                :: nsquare, npade
     345             : 
     346             :       CHARACTER(len=*), PARAMETER :: routineN = 'exp_pade_full_complex'
     347             : 
     348             :       COMPLEX(KIND=dp)                                   :: scaleD, scaleN
     349             :       INTEGER                                            :: handle, i, ldim, ndim, p, q
     350             :       REAL(KIND=dp)                                      :: square_fac, tmp
     351             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     352         356 :          POINTER                                         :: local_data_im, local_data_re
     353             :       TYPE(cp_cfm_type)                                  :: Dpq, fin_p, T1
     354        1068 :       TYPE(cp_cfm_type), DIMENSION(2)                    :: mult_p
     355             :       TYPE(cp_cfm_type), TARGET                          :: Npq, T2, Tres
     356             : 
     357         356 :       p = npade
     358         356 :       q = npade
     359             : 
     360         356 :       CALL timeset(routineN, handle)
     361             :       CALL cp_fm_get_info(re_part, local_data=local_data_re, ncol_local=ldim, &
     362         356 :                           nrow_global=ndim)
     363         356 :       CALL cp_fm_get_info(im_part, local_data=local_data_im)
     364             : 
     365             :       CALL cp_cfm_create(Dpq, &
     366             :                          matrix_struct=re_part%matrix_struct, &
     367         356 :                          name="Dpq")
     368             : 
     369         356 :       square_fac = 2.0_dp**REAL(nsquare, dp)
     370             : 
     371             :       CALL cp_cfm_create(T1, &
     372             :                          matrix_struct=Dpq%matrix_struct, &
     373         356 :                          name="T1")
     374             : 
     375             :       CALL cp_cfm_create(T2, &
     376             :                          matrix_struct=T1%matrix_struct, &
     377         356 :                          name="T2")
     378             :       CALL cp_cfm_create(Npq, &
     379             :                          matrix_struct=T1%matrix_struct, &
     380         356 :                          name="Npq")
     381             :       CALL cp_cfm_create(Tres, &
     382             :                          matrix_struct=T1%matrix_struct, &
     383         356 :                          name="Tres")
     384             : 
     385        4218 :       DO i = 1, ldim
     386       33761 :          T2%local_data(:, i) = CMPLX(local_data_re(:, i)/square_fac, local_data_im(:, i)/square_fac, KIND=dp)
     387             :       END DO
     388         356 :       CALL cp_cfm_to_cfm(T2, T1)
     389         356 :       mult_p(1) = T2
     390         356 :       mult_p(2) = Tres
     391         356 :       tmp = 1.0_dp
     392         356 :       CALL cp_cfm_set_all(Npq, z_zero, z_one)
     393         356 :       CALL cp_cfm_set_all(Dpq, z_zero, z_one)
     394             : 
     395         356 :       CALL cp_cfm_scale_and_add(z_one, Npq, z_one*0.5_dp, T2)
     396         356 :       CALL cp_cfm_scale_and_add(z_one, Dpq, -z_one*0.5_dp, T2)
     397             : 
     398         356 :       IF (npade .GT. 2) THEN
     399        1392 :          DO i = 2, npade
     400        1044 :             IF (i .LE. p) scaleN = CMPLX(fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), 0.0_dp, kind=dp)
     401        1044 :             scaleD = CMPLX((-1.0_dp)**i*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), 0.0_dp, kind=dp)
     402             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, T1, mult_p(MOD(i, 2) + 1), z_zero, &
     403        1044 :                                mult_p(MOD(i + 1, 2) + 1))
     404        1044 :             IF (i .LE. p) CALL cp_cfm_scale_and_add(z_one, Npq, scaleN, mult_p(MOD(i + 1, 2) + 1))
     405        1392 :             IF (i .LE. q) CALL cp_cfm_scale_and_add(z_one, Dpq, scaleD, mult_p(MOD(i + 1, 2) + 1))
     406             :          END DO
     407             :       END IF
     408             : 
     409         356 :       CALL cp_cfm_solve(Dpq, Npq)
     410             : 
     411         356 :       mult_p(2) = Npq
     412         356 :       mult_p(1) = Tres
     413         356 :       IF (nsquare .GT. 0) THEN
     414           0 :          DO i = 1, nsquare
     415             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, mult_p(MOD(i, 2) + 1), mult_p(MOD(i, 2) + 1), z_zero, &
     416           0 :                                mult_p(MOD(i + 1, 2) + 1))
     417           0 :             fin_p = mult_p(MOD(i + 1, 2) + 1)
     418             :          END DO
     419             :       ELSE
     420         356 :          fin_p = Npq
     421             :       END IF
     422        4218 :       DO i = 1, ldim
     423       33405 :          exp_H(1)%local_data(:, i) = REAL(fin_p%local_data(:, i), KIND=dp)
     424       33761 :          exp_H(2)%local_data(:, i) = AIMAG(fin_p%local_data(:, i))
     425             :       END DO
     426             : 
     427         356 :       CALL cp_cfm_release(Npq)
     428         356 :       CALL cp_cfm_release(Dpq)
     429         356 :       CALL cp_cfm_release(T1)
     430         356 :       CALL cp_cfm_release(T2)
     431         356 :       CALL cp_cfm_release(Tres)
     432         356 :       CALL timestop(handle)
     433             : 
     434         712 :    END SUBROUTINE exp_pade_full_complex
     435             : 
     436             : ! **************************************************************************************************
     437             : !> \brief exponential of a complex matrix,
     438             : !>        calculated using pade approximation together with scaling and squaring
     439             : !> \param exp_H ...
     440             : !> \param im_part ...
     441             : !> \param nsquare ...
     442             : !> \param npade ...
     443             : !> \author Florian Schiffmann (02.09)
     444             : ! **************************************************************************************************
     445             : 
     446        2656 :    SUBROUTINE exp_pade_only_imaginary(exp_H, im_part, nsquare, npade)
     447             :       TYPE(cp_fm_type), DIMENSION(2)                     :: exp_H
     448             :       TYPE(cp_fm_type), INTENT(IN)                       :: im_part
     449             :       INTEGER, INTENT(in)                                :: nsquare, npade
     450             : 
     451             :       CHARACTER(len=*), PARAMETER :: routineN = 'exp_pade_only_imaginary'
     452             :       REAL(KIND=dp), PARAMETER                           :: rone = 1.0_dp, rzero = 0.0_dp
     453             : 
     454             :       COMPLEX(KIND=dp)                                   :: scaleD, scaleN
     455             :       INTEGER                                            :: handle, i, j, k, ldim, ndim, p, q
     456             :       REAL(KIND=dp)                                      :: my_fac, square_fac
     457             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     458         332 :          POINTER                                         :: local_data_im
     459             :       TYPE(cp_cfm_type)                                  :: Dpq, fin_p
     460         996 :       TYPE(cp_cfm_type), DIMENSION(2)                    :: cmult_p
     461             :       TYPE(cp_cfm_type), TARGET                          :: Npq, T1
     462             :       TYPE(cp_fm_type)                                   :: T2, Tres
     463             : 
     464         332 :       CALL timeset(routineN, handle)
     465         332 :       p = npade
     466         332 :       q = npade !p==q seems to be necessary for the rest of the code
     467             : 
     468         332 :       CALL cp_fm_get_info(im_part, local_data=local_data_im, ncol_local=ldim, nrow_global=ndim)
     469         332 :       square_fac = 1.0_dp/(2.0_dp**REAL(nsquare, dp))
     470             : 
     471             :       CALL cp_cfm_create(Dpq, &
     472             :                          matrix_struct=im_part%matrix_struct, &
     473         332 :                          name="Dpq")
     474             : 
     475             :       CALL cp_cfm_create(Npq, &
     476             :                          matrix_struct=Dpq%matrix_struct, &
     477         332 :                          name="Npq")
     478             : 
     479             :       CALL cp_cfm_create(T1, &
     480             :                          matrix_struct=Dpq%matrix_struct, &
     481         332 :                          name="T1")
     482             : 
     483             :       CALL cp_fm_create(T2, &
     484             :                         matrix_struct=T1%matrix_struct, &
     485         332 :                         name="T2")
     486             : 
     487             :       CALL cp_fm_create(Tres, &
     488             :                         matrix_struct=T1%matrix_struct, &
     489         332 :                         name="Tres")
     490             : 
     491             : !    DO i=1,ldim
     492             : !       local_data_im(:,i)=local_data_im(:,i)/square_fac
     493             : !    END DO
     494             : 
     495         332 :       CALL cp_fm_to_fm(im_part, T2)
     496             : 
     497         332 :       CALL cp_cfm_set_all(Npq, z_zero, z_one)
     498         332 :       CALL cp_cfm_set_all(Dpq, z_zero, z_one)
     499             : 
     500        5006 :       DO i = 1, ldim
     501       47633 :          Npq%local_data(:, i) = Npq%local_data(:, i) + CMPLX(rzero, 0.5_dp*square_fac*local_data_im(:, i), dp)
     502       47965 :          Dpq%local_data(:, i) = Dpq%local_data(:, i) - CMPLX(rzero, 0.5_dp*square_fac*local_data_im(:, i), dp)
     503             :       END DO
     504             : 
     505         332 :       IF (npade .GT. 2) THEN
     506         960 :          DO j = 1, FLOOR(npade/2.0_dp)
     507         640 :             i = 2*j
     508         640 :             my_fac = (-rone)**j
     509         640 :             IF (i .LE. p) scaleN = CMPLX(my_fac*fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), 0.0_dp, dp)
     510         640 :             scaleD = CMPLX(my_fac*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), 0.0_dp, dp)
     511         640 :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_part, T2, rzero, Tres)
     512             : 
     513        9748 :             DO k = 1, ldim
     514       93826 :                Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*Tres%local_data(:, k)
     515       94466 :                Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*Tres%local_data(:, k)
     516             :             END DO
     517             : 
     518         960 :             IF (2*j + 1 .LE. q) THEN
     519         320 :                i = 2*j + 1
     520         320 :                IF (i .LE. p) scaleN = CMPLX(my_fac*fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), rzero, dp)
     521         320 :                scaleD = CMPLX(-my_fac*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), rzero, dp)
     522         320 :                CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_part, Tres, rzero, T2)
     523             : 
     524        4874 :                DO k = 1, ldim
     525       46913 :                   Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*CMPLX(rzero, T2%local_data(:, k), dp)
     526       47233 :                   Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*CMPLX(rzero, T2%local_data(:, k), dp)
     527             :                END DO
     528             :             END IF
     529             :          END DO
     530             :       END IF
     531             : 
     532         332 :       CALL cp_cfm_solve(Dpq, Npq)
     533             : 
     534         332 :       cmult_p(2) = Npq
     535         332 :       cmult_p(1) = T1
     536         332 :       IF (nsquare .GT. 0) THEN
     537           0 :          DO i = 1, nsquare
     538             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, cmult_p(MOD(i, 2) + 1), cmult_p(MOD(i, 2) + 1), z_zero, &
     539           0 :                                cmult_p(MOD(i + 1, 2) + 1))
     540           0 :             fin_p = cmult_p(MOD(i + 1, 2) + 1)
     541             :          END DO
     542             :       ELSE
     543         332 :          fin_p = Npq
     544             :       END IF
     545             : 
     546        5006 :       DO k = 1, ldim
     547       47633 :          exp_H(1)%local_data(:, k) = REAL(fin_p%local_data(:, k), KIND=dp)
     548       47965 :          exp_H(2)%local_data(:, k) = AIMAG(fin_p%local_data(:, k))
     549             :       END DO
     550             : 
     551         332 :       CALL cp_cfm_release(Npq)
     552         332 :       CALL cp_cfm_release(Dpq)
     553         332 :       CALL cp_cfm_release(T1)
     554         332 :       CALL cp_fm_release(T2)
     555         332 :       CALL cp_fm_release(Tres)
     556         332 :       CALL timestop(handle)
     557         332 :    END SUBROUTINE exp_pade_only_imaginary
     558             : 
     559             : ! **************************************************************************************************
     560             : !> \brief exponential of a real matrix,
     561             : !>        calculated using pade approximation together with scaling and squaring
     562             : !> \param exp_H ...
     563             : !> \param matrix ...
     564             : !> \param nsquare ...
     565             : !> \param npade ...
     566             : !> \author Florian Schiffmann (02.09)
     567             : ! **************************************************************************************************
     568             : 
     569       34368 :    SUBROUTINE exp_pade_real(exp_H, matrix, nsquare, npade)
     570             :       TYPE(cp_fm_type), INTENT(IN)                       :: exp_H, matrix
     571             :       INTEGER, INTENT(in)                                :: nsquare, npade
     572             : 
     573             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'exp_pade_real'
     574             :       REAL(KIND=dp), PARAMETER                           :: one = 1.0_dp, zero = 0.0_dp
     575             : 
     576             :       INTEGER                                            :: handle, i, j, k, ldim, ndim, p, q
     577             :       REAL(KIND=dp)                                      :: my_fac, scaleD, scaleN, square_fac
     578             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     579        4296 :          POINTER                                         :: local_data
     580             :       TYPE(cp_fm_type)                                   :: Dpq, fin_p
     581       12888 :       TYPE(cp_fm_type), DIMENSION(2)                     :: mult_p
     582             :       TYPE(cp_fm_type), TARGET                           :: Npq, T1, T2, Tres
     583             : 
     584        4296 :       CALL timeset(routineN, handle)
     585        4296 :       p = npade
     586        4296 :       q = npade !p==q seems to be necessary for the rest of the code
     587             : 
     588        4296 :       CALL cp_fm_get_info(matrix, local_data=local_data, ncol_local=ldim, nrow_global=ndim)
     589        4296 :       square_fac = 2.0_dp**REAL(nsquare, dp)
     590             : 
     591             :       CALL cp_fm_create(Dpq, &
     592             :                         matrix_struct=matrix%matrix_struct, &
     593        4296 :                         name="Dpq")
     594             : 
     595             :       CALL cp_fm_create(Npq, &
     596             :                         matrix_struct=Dpq%matrix_struct, &
     597        4296 :                         name="Npq")
     598             : 
     599             :       CALL cp_fm_create(T1, &
     600             :                         matrix_struct=Dpq%matrix_struct, &
     601        4296 :                         name="T1")
     602             : 
     603             :       CALL cp_fm_create(T2, &
     604             :                         matrix_struct=T1%matrix_struct, &
     605        4296 :                         name="T2")
     606             : 
     607             :       CALL cp_fm_create(Tres, &
     608             :                         matrix_struct=T1%matrix_struct, &
     609        4296 :                         name="Tres")
     610             : 
     611       62878 :       DO i = 1, ldim
     612      999496 :          T2%local_data(:, i) = local_data(:, i)/square_fac
     613             :       END DO
     614             : 
     615        4296 :       CALL cp_fm_to_fm(T2, T1)
     616        4296 :       CALL cp_fm_set_all(Npq, zero, one)
     617        4296 :       CALL cp_fm_set_all(Dpq, zero, one)
     618             : 
     619       62878 :       DO i = 1, ldim
     620      995200 :          Npq%local_data(:, i) = Npq%local_data(:, i) + 0.5_dp*local_data(:, i)
     621      531187 :          Dpq%local_data(:, i) = Dpq%local_data(:, i) - 0.5_dp*local_data(:, i)
     622             :       END DO
     623             : 
     624        4296 :       mult_p(1) = T2
     625        4296 :       mult_p(2) = Tres
     626             : 
     627        4296 :       IF (npade .GE. 2) THEN
     628        5686 :          DO j = 2, npade
     629        4106 :             my_fac = (-1.0_dp)**j
     630        4106 :             scaleN = fac(p + q - j)*fac(p)/(fac(p + q)*fac(j)*fac(p - j))
     631        4106 :             scaleD = my_fac*fac(p + q - j)*fac(q)/(fac(p + q)*fac(j)*fac(q - j))
     632             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, one, mult_p(MOD(j, 2) + 1), T1, &
     633        4106 :                                zero, mult_p(MOD(j + 1, 2) + 1))
     634             : 
     635       57542 :             DO k = 1, ldim
     636      500564 :                Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*mult_p(MOD(j + 1, 2) + 1)%local_data(:, k)
     637      504670 :                Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*mult_p(MOD(j + 1, 2) + 1)%local_data(:, k)
     638             :             END DO
     639             :          END DO
     640             :       END IF
     641             : 
     642        4296 :       CALL cp_fm_solve(Dpq, Npq)
     643             : 
     644        4296 :       mult_p(2) = Npq
     645        4296 :       mult_p(1) = T1
     646        4296 :       IF (nsquare .GT. 0) THEN
     647           0 :          DO i = 1, nsquare
     648             :             CALL parallel_gemm("N", "N", ndim, ndim, ndim, one, mult_p(MOD(i, 2) + 1), mult_p(MOD(i, 2) + 1), zero, &
     649           0 :                                mult_p(MOD(i + 1, 2) + 1))
     650             :          END DO
     651           0 :          fin_p = mult_p(MOD(nsquare + 1, 2) + 1)
     652             :       ELSE
     653        4296 :          fin_p = Npq
     654             :       END IF
     655             : 
     656       62878 :       DO k = 1, ldim
     657      531187 :          exp_H%local_data(:, k) = fin_p%local_data(:, k)
     658             :       END DO
     659             : 
     660        4296 :       CALL cp_fm_release(Npq)
     661        4296 :       CALL cp_fm_release(Dpq)
     662        4296 :       CALL cp_fm_release(T1)
     663        4296 :       CALL cp_fm_release(T2)
     664        4296 :       CALL cp_fm_release(Tres)
     665        4296 :       CALL timestop(handle)
     666             : 
     667        8592 :    END SUBROUTINE exp_pade_real
     668             : 
     669             : ! ***************************************************************************************************
     670             : !> \brief exponential of a complex matrix,
     671             : !>        calculated using arnoldi subspace method (directly applies to the MOs)
     672             : !> \param mos_old ...
     673             : !> \param mos_new ...
     674             : !> \param eps_exp ...
     675             : !> \param Hre ...
     676             : !> \param Him ...
     677             : !> \param mos_next ...
     678             : !> \param narn_old ...
     679             : !> \author Florian Schiffmann (02.09)
     680             : ! **************************************************************************************************
     681             : 
     682         744 :    SUBROUTINE arnoldi(mos_old, mos_new, eps_exp, Hre, Him, mos_next, narn_old)
     683             : 
     684             :       TYPE(cp_fm_type), DIMENSION(2)                     :: mos_old, mos_new
     685             :       REAL(KIND=dp), INTENT(in)                          :: eps_exp
     686             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: Hre
     687             :       TYPE(cp_fm_type), INTENT(IN)                       :: Him
     688             :       TYPE(cp_fm_type), DIMENSION(2), OPTIONAL           :: mos_next
     689             :       INTEGER, INTENT(inout)                             :: narn_old
     690             : 
     691             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'arnoldi'
     692             :       REAL(KIND=dp), PARAMETER                           :: rone = 1.0_dp, rzero = 0.0_dp
     693             : 
     694             :       INTEGER                                            :: handle, i, icol_local, idim, info, j, l, &
     695             :                                                             mydim, nao, narnoldi, ncol_local, &
     696             :                                                             newdim, nmo, npade, pade_step
     697         744 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipivot
     698         744 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, col_procs
     699             :       LOGICAL                                            :: convergence, double_col, double_row
     700         744 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: last_norm, norm1, results
     701         744 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: D, mat1, mat2, mat3, N
     702         744 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: H_approx, H_approx_save
     703             :       REAL(KIND=dp)                                      :: conv_norm, prefac, scaleD, scaleN
     704             :       TYPE(cp_fm_struct_type), POINTER                   :: mo_struct, newstruct
     705         744 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: V_mats
     706             :       TYPE(mp_comm_type)                                 :: col_group
     707             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     708             : 
     709         744 :       CALL timeset(routineN, handle)
     710         744 :       para_env => mos_new(1)%matrix_struct%para_env
     711             : 
     712             :       CALL cp_fm_get_info(mos_old(1), ncol_local=ncol_local, col_indices=col_indices, &
     713         744 :                           nrow_global=nao, ncol_global=nmo, matrix_struct=mo_struct)
     714         744 :       narnoldi = MIN(18, nao)
     715             : 
     716        2232 :       ALLOCATE (results(ncol_local))
     717        1488 :       ALLOCATE (norm1(ncol_local))
     718       13632 :       ALLOCATE (V_mats(narnoldi + 1))
     719        1488 :       ALLOCATE (last_norm(ncol_local))
     720        3720 :       ALLOCATE (H_approx(narnoldi, narnoldi, ncol_local))
     721        2976 :       ALLOCATE (H_approx_save(narnoldi, narnoldi, ncol_local))
     722         744 :       col_procs => mo_struct%context%blacs2mpi(:, mo_struct%context%mepos(2))
     723        2232 :       CALL col_group%from_reordering(para_env, col_procs)
     724             : 
     725         744 :       double_col = .TRUE.
     726         744 :       double_row = .FALSE.
     727         744 :       CALL cp_fm_struct_double(newstruct, mo_struct, mo_struct%context, double_col, double_row)
     728      735882 :       H_approx_save = rzero
     729             : 
     730       12144 :       DO i = 1, narnoldi + 1
     731             :          CALL cp_fm_create(V_mats(i), matrix_struct=newstruct, &
     732       12144 :                            name="V_mat"//cp_to_string(i))
     733             :       END DO
     734         744 :       CALL cp_fm_get_info(V_mats(1), ncol_global=newdim)
     735             : 
     736        4006 :       norm1 = 0.0_dp
     737         744 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(V_mats,norm1,mos_old,ncol_local)
     738             :       DO icol_local = 1, ncol_local
     739             :          V_mats(1)%local_data(:, icol_local) = mos_old(1)%local_data(:, icol_local)
     740             :          V_mats(1)%local_data(:, icol_local + ncol_local) = mos_old(2)%local_data(:, icol_local)
     741             :          norm1(icol_local) = SUM(V_mats(1)%local_data(:, icol_local)**2) &
     742             :                              + SUM(V_mats(1)%local_data(:, icol_local + ncol_local)**2)
     743             :       END DO
     744             : 
     745         744 :       CALL col_group%sum(norm1)
     746             :       !!! normalize the mo vectors
     747        4006 :       norm1(:) = SQRT(norm1(:))
     748             : 
     749         744 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(V_mats,norm1,ncol_local)
     750             :       DO icol_local = 1, ncol_local
     751             :          V_mats(1)%local_data(:, icol_local) = V_mats(1)%local_data(:, icol_local)/norm1(icol_local)
     752             :          V_mats(1)%local_data(:, icol_local + ncol_local) = &
     753             :             V_mats(1)%local_data(:, icol_local + ncol_local)/norm1(icol_local)
     754             :       END DO
     755             : 
     756             :       ! arnoldi subspace procedure to get H_approx
     757        4994 :       DO i = 2, narnoldi + 1
     758             :          !Be careful, imaginary matrix multiplied with complex. Unfortunately requires a swap of arrays afterwards
     759        4994 :          CALL parallel_gemm("N", "N", nao, newdim, nao, 1.0_dp, Him, V_mats(i - 1), 0.0_dp, V_mats(i))
     760             : 
     761        4994 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(mos_new,V_mats,ncol_local,i)
     762             :          DO icol_local = 1, ncol_local
     763             :             mos_new(1)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local)
     764             :             V_mats(i)%local_data(:, icol_local) = -V_mats(i)%local_data(:, icol_local + ncol_local)
     765             :             V_mats(i)%local_data(:, icol_local + ncol_local) = mos_new(1)%local_data(:, icol_local)
     766             :          END DO
     767             : 
     768        4994 :          IF (PRESENT(Hre)) THEN
     769        3118 :             CALL parallel_gemm("N", "N", nao, newdim, nao, 1.0_dp, Hre, V_mats(i - 1), 1.0_dp, V_mats(i))
     770             :          END IF
     771             : 
     772       24586 :          DO l = 1, i - 1
     773       19592 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(results,V_mats,ncol_local,l,i)
     774             :             DO icol_local = 1, ncol_local
     775             :                results(icol_local) = SUM(V_mats(l)%local_data(:, icol_local)*V_mats(i)%local_data(:, icol_local)) + &
     776             :                                      SUM(V_mats(l)%local_data(:, icol_local + ncol_local)* &
     777             :                                          V_mats(i)%local_data(:, icol_local + ncol_local))
     778             :             END DO
     779             : 
     780       19592 :             CALL col_group%sum(results)
     781             : 
     782       24586 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(H_approx_save,V_mats,ncol_local,l,i,results)
     783             :             DO icol_local = 1, ncol_local
     784             :                H_approx_save(l, i - 1, icol_local) = results(icol_local)
     785             :                V_mats(i)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local) - &
     786             :                                                      results(icol_local)*V_mats(l)%local_data(:, icol_local)
     787             :                V_mats(i)%local_data(:, icol_local + ncol_local) = &
     788             :                   V_mats(i)%local_data(:, icol_local + ncol_local) - &
     789             :                   results(icol_local)*V_mats(l)%local_data(:, icol_local + ncol_local)
     790             :             END DO
     791             :          END DO
     792             : 
     793        4994 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ncol_local,V_mats,results,i)
     794             :          DO icol_local = 1, ncol_local
     795             :             results(icol_local) = SUM(V_mats(i)%local_data(:, icol_local)**2) + &
     796             :                                   SUM(V_mats(i)%local_data(:, icol_local + ncol_local)**2)
     797             :          END DO
     798             : 
     799        4994 :          CALL col_group%sum(results)
     800             : 
     801        4994 :          IF (i .LE. narnoldi) THEN
     802             : 
     803        4994 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(H_approx_save,last_norm,V_mats,ncol_local,i,results)
     804             :             DO icol_local = 1, ncol_local
     805             :                H_approx_save(i, i - 1, icol_local) = SQRT(results(icol_local))
     806             :                last_norm(icol_local) = SQRT(results(icol_local))
     807             :                V_mats(i)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local)/SQRT(results(icol_local))
     808             :                V_mats(i)%local_data(:, icol_local + ncol_local) = &
     809             :                   V_mats(i)%local_data(:, icol_local + ncol_local)/SQRT(results(icol_local))
     810             :             END DO
     811             :          ELSE
     812           0 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ncol_local,last_norm,results)
     813             :             DO icol_local = 1, ncol_local
     814             :                last_norm(icol_local) = SQRT(results(icol_local))
     815             :             END DO
     816             :          END IF
     817             : 
     818     5178290 :          H_approx(:, :, :) = H_approx_save
     819             : 
     820             :          ! PADE approximation for exp(H_approx), everything is done locally
     821             : 
     822        4994 :          convergence = .FALSE.
     823        4994 :          IF (i .GE. narn_old) THEN
     824        1630 :             npade = 9
     825        1630 :             mydim = MIN(i, narnoldi)
     826        4890 :             ALLOCATE (ipivot(mydim))
     827        6520 :             ALLOCATE (mat1(mydim, mydim))
     828        4890 :             ALLOCATE (mat2(mydim, mydim))
     829        4890 :             ALLOCATE (mat3(mydim, mydim))
     830        4890 :             ALLOCATE (N(mydim, mydim))
     831        4890 :             ALLOCATE (D(mydim, mydim))
     832        8718 :             DO icol_local = 1, ncol_local
     833       56420 :                DO idim = 1, mydim
     834      412184 :                   DO j = 1, mydim
     835      355764 :                      mat1(idim, j) = H_approx(idim, j, icol_local)/16.0_dp
     836      405096 :                      mat3(idim, j) = mat1(idim, j)
     837             :                   END DO
     838             :                END DO
     839      412184 :                N = 0.0_dp
     840      412184 :                D = 0.0_dp
     841       56420 :                DO idim = 1, mydim
     842       49332 :                   N(idim, idim) = rone
     843       56420 :                   D(idim, idim) = rone
     844             :                END DO
     845      412184 :                N(:, :) = N + 0.5_dp*mat1
     846      412184 :                D(:, :) = D - 0.5_dp*mat1
     847             :                pade_step = 1
     848       35440 :                DO idim = 1, 4
     849       28352 :                   pade_step = pade_step + 1
     850             :                   CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), &
     851       28352 :                              mydim, mat3(1, 1), mydim, rzero, mat2(1, 1), mydim)
     852             :                   scaleN = REAL(fac(2*npade - pade_step)*fac(npade)/ &
     853       28352 :                                 (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
     854             :                   scaleD = REAL((-1.0_dp)**pade_step*fac(2*npade - pade_step)*fac(npade)/ &
     855       28352 :                                 (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
     856     1648736 :                   N(:, :) = N + scaleN*mat2
     857     1648736 :                   D(:, :) = D + scaleD*mat2
     858       28352 :                   pade_step = pade_step + 1
     859             :                   CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat2(1, 1), &
     860       28352 :                              mydim, mat1(1, 1), mydim, rzero, mat3(1, 1), mydim)
     861             :                   scaleN = REAL(fac(2*npade - pade_step)*fac(npade)/ &
     862       28352 :                                 (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
     863             :                   scaleD = REAL((-1.0_dp)**pade_step*fac(2*npade - pade_step)*fac(npade)/ &
     864       28352 :                                 (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
     865     1648736 :                   N(:, :) = N + scaleN*mat3
     866     1655824 :                   D(:, :) = D + scaleD*mat3
     867             :                END DO
     868             : 
     869        7088 :                CALL dgetrf(mydim, mydim, D(1, 1), mydim, ipivot, info)
     870        7088 :                CALL dgetrs("N", mydim, mydim, D(1, 1), mydim, ipivot, N, mydim, info)
     871        7088 :                CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
     872        7088 :                CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, N(1, 1), mydim)
     873        7088 :                CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
     874        7088 :                CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, N(1, 1), mydim)
     875       58050 :                DO idim = 1, mydim
     876      412184 :                   DO j = 1, mydim
     877      405096 :                      H_approx(idim, j, icol_local) = N(idim, j)
     878             :                   END DO
     879             :                END DO
     880             :             END DO
     881             :             ! H_approx is exp(H_approx) right now, calculate new MOs and check for convergence
     882        1630 :             conv_norm = 0.0_dp
     883        8718 :             results = 0.0_dp
     884        8718 :             DO icol_local = 1, ncol_local
     885        7088 :                results(icol_local) = last_norm(icol_local)*H_approx(i - 1, 1, icol_local)
     886        8718 :                conv_norm = MAX(conv_norm, ABS(results(icol_local)))
     887             :             END DO
     888             : 
     889        1630 :             CALL para_env%max(conv_norm)
     890             : 
     891        1630 :             IF (conv_norm .LT. eps_exp .OR. i .GT. narnoldi) THEN
     892             : 
     893       30768 :                mos_new(1)%local_data = rzero
     894       30768 :                mos_new(2)%local_data = rzero
     895        4006 :                DO icol_local = 1, ncol_local
     896       29044 :                   DO idim = 1, mydim
     897       25038 :                      prefac = H_approx(idim, 1, icol_local)*norm1(icol_local)
     898             :                      mos_new(1)%local_data(:, icol_local) = mos_new(1)%local_data(:, icol_local) + &
     899      237900 :                                                             V_mats(idim)%local_data(:, icol_local)*prefac
     900             :                      mos_new(2)%local_data(:, icol_local) = mos_new(2)%local_data(:, icol_local) + &
     901      241162 :                                                             V_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
     902             :                   END DO
     903             :                END DO
     904             : 
     905         744 :                IF (PRESENT(mos_next)) THEN
     906        4006 :                   DO icol_local = 1, ncol_local
     907       28300 :                      DO idim = 1, mydim
     908      223010 :                         DO j = 1, mydim
     909      219748 :                            N(idim, j) = H_approx(idim, j, icol_local)
     910             :                         END DO
     911             :                      END DO
     912        3262 :                      CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
     913       29044 :                      DO idim = 1, mydim
     914      223010 :                         DO j = 1, mydim
     915      219748 :                            H_approx(idim, j, icol_local) = mat1(idim, j)
     916             :                         END DO
     917             :                      END DO
     918             :                   END DO
     919       30768 :                   mos_next(1)%local_data = rzero
     920       30768 :                   mos_next(2)%local_data = rzero
     921        4006 :                   DO icol_local = 1, ncol_local
     922       29044 :                      DO idim = 1, mydim
     923       25038 :                         prefac = H_approx(idim, 1, icol_local)*norm1(icol_local)
     924             :                         mos_next(1)%local_data(:, icol_local) = &
     925             :                            mos_next(1)%local_data(:, icol_local) + &
     926      237900 :                            V_mats(idim)%local_data(:, icol_local)*prefac
     927             :                         mos_next(2)%local_data(:, icol_local) = &
     928             :                            mos_next(2)%local_data(:, icol_local) + &
     929      241162 :                            V_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
     930             :                      END DO
     931             :                   END DO
     932             :                END IF
     933         744 :                IF (conv_norm .LT. eps_exp) THEN
     934         744 :                   convergence = .TRUE.
     935         744 :                   narn_old = i - 1
     936             :                END IF
     937             :             END IF
     938             : 
     939        1630 :             DEALLOCATE (ipivot)
     940        1630 :             DEALLOCATE (mat1)
     941        1630 :             DEALLOCATE (mat2)
     942        1630 :             DEALLOCATE (mat3)
     943        1630 :             DEALLOCATE (N)
     944        1630 :             DEALLOCATE (D)
     945             :          END IF
     946        1630 :          IF (convergence) EXIT
     947             : 
     948             :       END DO
     949         744 :       CPWARN_IF(.NOT. convergence, "ARNOLDI method did not converge")
     950             :       !deallocate all work matrices
     951             : 
     952         744 :       CALL cp_fm_release(V_mats)
     953         744 :       CALL cp_fm_struct_release(newstruct)
     954         744 :       CALL col_group%free()
     955             : 
     956         744 :       DEALLOCATE (H_approx)
     957         744 :       DEALLOCATE (H_approx_save)
     958         744 :       DEALLOCATE (results)
     959         744 :       DEALLOCATE (norm1)
     960         744 :       DEALLOCATE (last_norm)
     961         744 :       CALL timestop(handle)
     962        2232 :    END SUBROUTINE arnoldi
     963             : 
     964             : END MODULE matrix_exp

Generated by: LCOV version 1.15