LCOV - CP2K Regtests (git:b8e0b09)

LCOV - code coverage report

Current view:	top level - src - bse_util.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:b8e0b09)	Lines:	557	687	81.1 %
Date:	2024-08-31 06:31:37	Functions:	13	15	86.7 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
      10             : !> \par History
      11             : !>      11.2023 created [Maximilian Graml]
      12             : ! **************************************************************************************************
      13             : MODULE bse_util
      14             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      15             :    USE cp_dbcsr_api,                    ONLY: dbcsr_create,&
      16             :                                               dbcsr_init_p,&
      17             :                                               dbcsr_p_type,&
      18             :                                               dbcsr_set,&
      19             :                                               dbcsr_type_symmetric
      20             :    USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_alloc_block_from_nbl
      21             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply,&
      22             :                                               dbcsr_allocate_matrix_set,&
      23             :                                               dbcsr_deallocate_matrix_set
      24             :    USE cp_files,                        ONLY: close_file,&
      25             :                                               open_file
      26             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_upper_to_full
      27             :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose,&
      28             :                                               cp_fm_cholesky_invert
      29             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      30             :                                               cp_fm_struct_release,&
      31             :                                               cp_fm_struct_type
      32             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      33             :                                               cp_fm_get_info,&
      34             :                                               cp_fm_get_submatrix,&
      35             :                                               cp_fm_release,&
      36             :                                               cp_fm_set_all,&
      37             :                                               cp_fm_to_fm,&
      38             :                                               cp_fm_to_fm_submat_general,&
      39             :                                               cp_fm_type
      40             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_unit_nr
      41             :    USE input_constants,                 ONLY: use_mom_ref_coac
      42             :    USE kinds,                           ONLY: dp,&
      43             :                                               int_8
      44             :    USE message_passing,                 ONLY: mp_para_env_type,&
      45             :                                               mp_request_type
      46             :    USE moments_utils,                   ONLY: get_reference_point
      47             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      48             :                                               mp2_type
      49             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      50             :    USE physcon,                         ONLY: evolt
      51             :    USE qs_environment_types,            ONLY: get_qs_env,&
      52             :                                               qs_environment_type
      53             :    USE qs_mo_types,                     ONLY: get_mo_set,&
      54             :                                               mo_set_type
      55             :    USE qs_moments,                      ONLY: build_local_moment_matrix
      56             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      57             :    USE rpa_communication,               ONLY: communicate_buffer
      58             :    USE util,                            ONLY: sort,&
      59             :                                               sort_unique
      60             : #include "./base/base_uses.f90"
      61             : 
      62             :    IMPLICIT NONE
      63             : 
      64             :    PRIVATE
      65             : 
      66             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
      67             : 
      68             :    PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, fm_write_thresh, print_BSE_start_flag, &
      69             :              deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
      70             :              estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices, &
      71             :              get_oscillator_strengths, compute_absorption_spectrum, determine_cutoff_indices
      72             : 
      73             : CONTAINS
      74             : 
      75             : ! **************************************************************************************************
      76             : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
      77             : !> \param fm_mat_S_ij_bse ...
      78             : !> \param fm_mat_S_ia_bse ...
      79             : !> \param fm_mat_S_bar_ia_bse ...
      80             : !> \param fm_mat_S_bar_ij_bse ...
      81             : !> \param fm_mat_Q_static_bse_gemm ...
      82             : !> \param dimen_RI ...
      83             : !> \param homo ...
      84             : !> \param virtual ...
      85             : ! **************************************************************************************************
      86         108 :    SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
      87             :                             fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
      88             :                             dimen_RI, homo, virtual)
      89             : 
      90             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
      91             :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
      92             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_Q_static_bse_gemm
      93             :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
      94             : 
      95             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mult_B_with_W'
      96             : 
      97             :       INTEGER                                            :: handle, i_global, iiB, info_chol, &
      98             :                                                             j_global, jjB, ncol_local, nrow_local
      99          18 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     100             :       TYPE(cp_fm_type)                                   :: fm_work
     101             : 
     102          18 :       CALL timeset(routineN, handle)
     103             : 
     104          18 :       CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
     105          18 :       CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
     106             : 
     107          18 :       CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
     108          18 :       CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
     109             : 
     110          18 :       CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
     111          18 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
     112             : 
     113             :       ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
     114             :       CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
     115             :                           nrow_local=nrow_local, &
     116             :                           ncol_local=ncol_local, &
     117             :                           row_indices=row_indices, &
     118          18 :                           col_indices=col_indices)
     119             : 
     120        1512 :       DO jjB = 1, ncol_local
     121        1494 :          j_global = col_indices(jjB)
     122       63513 :          DO iiB = 1, nrow_local
     123       62001 :             i_global = row_indices(iiB)
     124       63495 :             IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
     125         747 :                fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
     126             :             END IF
     127             :          END DO
     128             :       END DO
     129             : 
     130             :       ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
     131          18 :       CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
     132             : 
     133          18 :       CPASSERT(info_chol == 0)
     134             : 
     135             :       ! calculate [1+Q(i0)]^-1
     136          18 :       CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
     137             : 
     138             :       ! symmetrize the result
     139          18 :       CALL cp_fm_upper_to_full(fm_mat_Q_static_bse_gemm, fm_work)
     140             : 
     141             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
     142             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
     143          18 :                          matrix_c=fm_mat_S_bar_ij_bse)
     144             : 
     145             :       ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
     146             :       ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
     147             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
     148             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
     149          18 :                          matrix_c=fm_mat_S_bar_ia_bse)
     150             : 
     151          18 :       CALL cp_fm_release(fm_work)
     152             : 
     153          18 :       CALL timestop(handle)
     154             : 
     155          18 :    END SUBROUTINE
     156             : 
     157             : ! **************************************************************************************************
     158             : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
     159             : !> to A_ia, which needs MPI communication.
     160             : !> \param fm_out ...
     161             : !> \param fm_in ...
     162             : !> \param beta ...
     163             : !> \param nrow_secidx_in ...
     164             : !> \param ncol_secidx_in ...
     165             : !> \param nrow_secidx_out ...
     166             : !> \param ncol_secidx_out ...
     167             : !> \param unit_nr ...
     168             : !> \param reordering ...
     169             : !> \param mp2_env ...
     170             : ! **************************************************************************************************
     171          78 :    SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
     172             :                                  nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
     173             : 
     174             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out, fm_in
     175             :       REAL(kind=dp)                                      :: beta
     176             :       INTEGER, INTENT(IN)                                :: nrow_secidx_in, ncol_secidx_in, &
     177             :                                                             nrow_secidx_out, ncol_secidx_out
     178             :       INTEGER                                            :: unit_nr
     179             :       INTEGER, DIMENSION(4)                              :: reordering
     180             :       TYPE(mp2_type), INTENT(IN)                         :: mp2_env
     181             : 
     182             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
     183             : 
     184             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
     185             :          iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, &
     186             :          nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, &
     187             :          send_pcol, send_prow
     188          78 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     189             :                                                             num_entries_send
     190             :       INTEGER, DIMENSION(4)                              :: indices_in
     191          78 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     192          78 :                                                             row_indices_in, row_indices_out
     193             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     194          78 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     195             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     196          78 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     197             : 
     198          78 :       CALL timeset(routineN, handle)
     199          78 :       CALL timeset(routineN//"_1_setup", handle2)
     200             : 
     201          78 :       para_env_out => fm_out%matrix_struct%para_env
     202             :       ! A_iajb
     203             :       ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
     204             :       CALL cp_fm_get_info(matrix=fm_out, &
     205             :                           nrow_local=nrow_local_out, &
     206             :                           ncol_local=ncol_local_out, &
     207             :                           row_indices=row_indices_out, &
     208             :                           col_indices=col_indices_out, &
     209             :                           nrow_block=nrow_block_out, &
     210          78 :                           ncol_block=ncol_block_out)
     211             : 
     212         234 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     213         234 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     214             : 
     215         234 :       num_entries_rec(:) = 0
     216         234 :       num_entries_send(:) = 0
     217             : 
     218          78 :       dummy = 0
     219             : 
     220             :       CALL cp_fm_get_info(matrix=fm_in, &
     221             :                           nrow_local=nrow_local_in, &
     222             :                           ncol_local=ncol_local_in, &
     223             :                           row_indices=row_indices_in, &
     224             :                           col_indices=col_indices_in, &
     225             :                           nrow_block=nrow_block_in, &
     226          78 :                           ncol_block=ncol_block_in)
     227             : 
     228          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     229           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     230           0 :             fm_out%matrix_struct%nrow_global
     231           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     232           0 :             fm_out%matrix_struct%ncol_global
     233             : 
     234           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     235           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     236             : 
     237           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     238           0 :             fm_in%matrix_struct%nrow_global
     239           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     240           0 :             fm_in%matrix_struct%ncol_global
     241             : 
     242           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     243           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     244             :       END IF
     245             : 
     246             :       ! Use scalapack wrapper to find process index in fm_out
     247             :       ! To that end, we obtain the global index in fm_out from the level indices
     248          78 :       indices_in(:) = 0
     249         690 :       DO row_idx_loc = 1, nrow_local_in
     250         612 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     251         612 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     252       29634 :          DO col_idx_loc = 1, ncol_local_in
     253       28944 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     254       28944 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     255             : 
     256       28944 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     257       28944 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     258             : 
     259       28944 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     260       28944 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     261             : 
     262       28944 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     263             : 
     264       29556 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     265             : 
     266             :          END DO
     267             :       END DO
     268             : 
     269          78 :       CALL timestop(handle2)
     270             : 
     271          78 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     272          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     273           0 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     274             :       END IF
     275             : 
     276          78 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     277             : 
     278          78 :       CALL timestop(handle2)
     279             : 
     280          78 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     281          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     282           0 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     283             :       END IF
     284             : 
     285             :       ! Buffers for entries and their indices
     286         390 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     287         390 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     288             : 
     289             :       ! allocate data message and corresponding indices
     290         234 :       DO iproc = 0, para_env_out%num_pe - 1
     291             : 
     292         390 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     293       29178 :          buffer_rec(iproc)%msg = 0.0_dp
     294             : 
     295             :       END DO
     296             : 
     297         234 :       DO iproc = 0, para_env_out%num_pe - 1
     298             : 
     299         390 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     300       29178 :          buffer_send(iproc)%msg = 0.0_dp
     301             : 
     302             :       END DO
     303             : 
     304         234 :       DO iproc = 0, para_env_out%num_pe - 1
     305             : 
     306         390 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     307       58434 :          buffer_rec(iproc)%indx = 0
     308             : 
     309             :       END DO
     310             : 
     311         234 :       DO iproc = 0, para_env_out%num_pe - 1
     312             : 
     313         390 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     314       58434 :          buffer_send(iproc)%indx = 0
     315             : 
     316             :       END DO
     317             : 
     318          78 :       CALL timestop(handle2)
     319             : 
     320          78 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     321          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     322           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     323             :       END IF
     324             : 
     325         234 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     326         234 :       entry_counter(:) = 0
     327             : 
     328             :       ! Now we can write the actual data and indices to the send-buffer
     329         690 :       DO row_idx_loc = 1, nrow_local_in
     330         612 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     331         612 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     332       29634 :          DO col_idx_loc = 1, ncol_local_in
     333       28944 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     334       28944 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     335             : 
     336       28944 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     337       28944 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     338             : 
     339       28944 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     340       28944 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     341             : 
     342       28944 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     343       28944 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     344             : 
     345             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     346       28944 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     347             : 
     348       28944 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
     349       29556 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     350             : 
     351             :          END DO
     352             :       END DO
     353             : 
     354        1170 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     355             : 
     356          78 :       CALL timestop(handle2)
     357             : 
     358          78 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     359          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     360           0 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     361             :       END IF
     362             : 
     363             :       ! communicate the buffer
     364             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     365          78 :                               buffer_send, req_array)
     366             : 
     367          78 :       CALL timestop(handle2)
     368             : 
     369          78 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     370          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     371           0 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     372             :       END IF
     373             : 
     374             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     375          78 :       nprocs = para_env_out%num_pe
     376             : 
     377             : !$OMP PARALLEL DO DEFAULT(NONE) &
     378             : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) &
     379          78 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     380             :       DO iproc = 0, nprocs - 1
     381             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     382             :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     383             :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     384             : 
     385             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
     386             :          END DO
     387             :       END DO
     388             : !$OMP END PARALLEL DO
     389             : 
     390          78 :       CALL timestop(handle2)
     391             : 
     392          78 :       CALL timeset(routineN//"_7_cleanup", handle2)
     393          78 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     394           0 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     395             :       END IF
     396             : 
     397             :       !Clean up all the arrays from the communication process
     398         234 :       DO iproc = 0, para_env_out%num_pe - 1
     399         156 :          DEALLOCATE (buffer_rec(iproc)%msg)
     400         156 :          DEALLOCATE (buffer_rec(iproc)%indx)
     401         156 :          DEALLOCATE (buffer_send(iproc)%msg)
     402         234 :          DEALLOCATE (buffer_send(iproc)%indx)
     403             :       END DO
     404         390 :       DEALLOCATE (buffer_rec, buffer_send)
     405          78 :       DEALLOCATE (req_array)
     406          78 :       DEALLOCATE (entry_counter)
     407          78 :       DEALLOCATE (num_entries_rec, num_entries_send)
     408             : 
     409          78 :       CALL timestop(handle2)
     410          78 :       CALL timestop(handle)
     411             : 
     412         546 :    END SUBROUTINE fm_general_add_bse
     413             : 
     414             : ! **************************************************************************************************
     415             : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
     416             : !>  Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in  for the incoming (untruncated) matrix
     417             : !>  and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
     418             : !>  via parallel communication.
     419             : !> \param fm_out ...
     420             : !> \param fm_in ...
     421             : !> \param ncol_in ...
     422             : !> \param nrow_out ...
     423             : !> \param ncol_out ...
     424             : !> \param unit_nr ...
     425             : !> \param mp2_env ...
     426             : !> \param nrow_offset ...
     427             : !> \param ncol_offset ...
     428             : ! **************************************************************************************************
     429          54 :    SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
     430             :                           nrow_out, ncol_out, unit_nr, mp2_env, &
     431             :                           nrow_offset, ncol_offset)
     432             : 
     433             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     434             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     435             :       INTEGER                                            :: ncol_in, nrow_out, ncol_out, unit_nr
     436             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
     437             :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow_offset, ncol_offset
     438             : 
     439             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'truncate_fm'
     440             : 
     441             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
     442             :          idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
     443             :          ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, &
     444             :          nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
     445          54 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     446             :                                                             num_entries_send
     447          54 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     448          54 :                                                             row_indices_in, row_indices_out
     449             :       LOGICAL                                            :: correct_ncol, correct_nrow
     450             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     451          54 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     452             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     453          54 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     454             : 
     455          54 :       CALL timeset(routineN, handle)
     456          54 :       CALL timeset(routineN//"_1_setup", handle2)
     457             : 
     458          54 :       correct_nrow = .FALSE.
     459          54 :       correct_ncol = .FALSE.
     460             :       !In case of truncation in the occupied space, we need to correct the interval of indices
     461          54 :       IF (PRESENT(nrow_offset)) THEN
     462          36 :          correct_nrow = .TRUE.
     463             :       END IF
     464          54 :       IF (PRESENT(ncol_offset)) THEN
     465          18 :          correct_ncol = .TRUE.
     466             :       END IF
     467             : 
     468          54 :       para_env_out => fm_out%matrix_struct%para_env
     469             : 
     470             :       CALL cp_fm_get_info(matrix=fm_out, &
     471             :                           nrow_local=nrow_local_out, &
     472             :                           ncol_local=ncol_local_out, &
     473             :                           row_indices=row_indices_out, &
     474             :                           col_indices=col_indices_out, &
     475             :                           nrow_block=nrow_block_out, &
     476          54 :                           ncol_block=ncol_block_out)
     477             : 
     478         162 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     479         162 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     480             : 
     481         162 :       num_entries_rec(:) = 0
     482         162 :       num_entries_send(:) = 0
     483             : 
     484          54 :       dummy = 0
     485             : 
     486             :       CALL cp_fm_get_info(matrix=fm_in, &
     487             :                           nrow_local=nrow_local_in, &
     488             :                           ncol_local=ncol_local_in, &
     489             :                           row_indices=row_indices_in, &
     490             :                           col_indices=col_indices_in, &
     491             :                           nrow_block=nrow_block_in, &
     492          54 :                           ncol_block=ncol_block_in)
     493             : 
     494          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     495           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     496           0 :             fm_out%matrix_struct%nrow_global
     497           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     498           0 :             fm_out%matrix_struct%ncol_global
     499             : 
     500           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     501           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     502             : 
     503           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     504           0 :             fm_in%matrix_struct%nrow_global
     505           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     506           0 :             fm_in%matrix_struct%ncol_global
     507             : 
     508           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     509           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     510             :       END IF
     511             : 
     512             :       ! We find global indices in S with nrow_in and ncol_in for truncation
     513        4302 :       DO col_idx_loc = 1, ncol_local_in
     514        4248 :          idx_col_in = col_indices_in(col_idx_loc)
     515             : 
     516        4248 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     517        4248 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     518             : 
     519             :          ! If occupied orbitals are included, these have to be handled differently
     520             :          ! due to their reversed indexing
     521        4248 :          IF (correct_nrow) THEN
     522        1656 :             idx_col_first = idx_col_first - nrow_offset + 1
     523        1656 :             IF (idx_col_first .LE. 0) CYCLE
     524             :          ELSE
     525        2592 :             IF (idx_col_first > nrow_out) EXIT
     526             :          END IF
     527        4248 :          IF (correct_ncol) THEN
     528         288 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     529         288 :             IF (idx_col_sec .LE. 0) CYCLE
     530             :          ELSE
     531        3960 :             IF (idx_col_sec > ncol_out) CYCLE
     532             :          END IF
     533             : 
     534        3744 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     535             : 
     536      159174 :          DO row_idx_loc = 1, nrow_local_in
     537      155376 :             idx_row_in = row_indices_in(row_idx_loc)
     538             : 
     539      155376 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     540      155376 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     541             : 
     542      155376 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     543             : 
     544      159624 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     545             : 
     546             :          END DO
     547             :       END DO
     548             : 
     549          54 :       CALL timestop(handle2)
     550             : 
     551          54 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     552          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     553           0 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     554             :       END IF
     555             : 
     556          54 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     557             : 
     558          54 :       CALL timestop(handle2)
     559             : 
     560          54 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     561          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     562           0 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     563             :       END IF
     564             : 
     565             :       ! Buffers for entries and their indices
     566         270 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     567         270 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     568             : 
     569             :       ! allocate data message and corresponding indices
     570         162 :       DO iproc = 0, para_env_out%num_pe - 1
     571             : 
     572         288 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     573      155538 :          buffer_rec(iproc)%msg = 0.0_dp
     574             : 
     575             :       END DO
     576             : 
     577         162 :       DO iproc = 0, para_env_out%num_pe - 1
     578             : 
     579         288 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     580      155538 :          buffer_send(iproc)%msg = 0.0_dp
     581             : 
     582             :       END DO
     583             : 
     584         162 :       DO iproc = 0, para_env_out%num_pe - 1
     585             : 
     586         288 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     587      311130 :          buffer_rec(iproc)%indx = 0
     588             : 
     589             :       END DO
     590             : 
     591         162 :       DO iproc = 0, para_env_out%num_pe - 1
     592             : 
     593         288 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     594      311130 :          buffer_send(iproc)%indx = 0
     595             : 
     596             :       END DO
     597             : 
     598          54 :       CALL timestop(handle2)
     599             : 
     600          54 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     601          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     602           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     603             :       END IF
     604             : 
     605         162 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     606         162 :       entry_counter(:) = 0
     607             : 
     608             :       ! Now we can write the actual data and indices to the send-buffer
     609        4302 :       DO col_idx_loc = 1, ncol_local_in
     610        4248 :          idx_col_in = col_indices_in(col_idx_loc)
     611             : 
     612        4248 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     613        4248 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     614             : 
     615             :          ! If occupied orbitals are included, these have to be handled differently
     616             :          ! due to their reversed indexing
     617        4248 :          IF (correct_nrow) THEN
     618        1656 :             idx_col_first = idx_col_first - nrow_offset + 1
     619        1656 :             IF (idx_col_first .LE. 0) CYCLE
     620             :          ELSE
     621        2592 :             IF (idx_col_first > nrow_out) EXIT
     622             :          END IF
     623        4248 :          IF (correct_ncol) THEN
     624         288 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     625         288 :             IF (idx_col_sec .LE. 0) CYCLE
     626             :          ELSE
     627        3960 :             IF (idx_col_sec > ncol_out) CYCLE
     628             :          END IF
     629             : 
     630        3744 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     631             : 
     632      159174 :          DO row_idx_loc = 1, nrow_local_in
     633      155376 :             idx_row_in = row_indices_in(row_idx_loc)
     634             : 
     635      155376 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     636             : 
     637      155376 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     638             : 
     639      155376 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     640      155376 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     641             : 
     642             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     643      155376 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     644             :             !No need to create row_out, since it is identical to incoming
     645             :             !We dont change the RI index for any fm_mat_XX_BSE
     646      155376 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
     647      159624 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     648             : 
     649             :          END DO
     650             :       END DO
     651             : 
     652         810 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     653             : 
     654          54 :       CALL timestop(handle2)
     655             : 
     656          54 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     657          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     658           0 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     659             :       END IF
     660             : 
     661             :       ! communicate the buffer
     662             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     663          54 :                               buffer_send, req_array)
     664             : 
     665          54 :       CALL timestop(handle2)
     666             : 
     667          54 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     668          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     669           0 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     670             :       END IF
     671             : 
     672             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     673          54 :       nprocs = para_env_out%num_pe
     674             : 
     675             : !$OMP PARALLEL DO DEFAULT(NONE) &
     676             : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) &
     677          54 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     678             :       DO iproc = 0, nprocs - 1
     679             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     680             :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     681             :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     682             : 
     683             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
     684             :          END DO
     685             :       END DO
     686             : !$OMP END PARALLEL DO
     687             : 
     688          54 :       CALL timestop(handle2)
     689             : 
     690          54 :       CALL timeset(routineN//"_7_cleanup", handle2)
     691          54 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     692           0 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     693             :       END IF
     694             : 
     695             :       !Clean up all the arrays from the communication process
     696         162 :       DO iproc = 0, para_env_out%num_pe - 1
     697         108 :          DEALLOCATE (buffer_rec(iproc)%msg)
     698         108 :          DEALLOCATE (buffer_rec(iproc)%indx)
     699         108 :          DEALLOCATE (buffer_send(iproc)%msg)
     700         162 :          DEALLOCATE (buffer_send(iproc)%indx)
     701             :       END DO
     702         270 :       DEALLOCATE (buffer_rec, buffer_send)
     703          54 :       DEALLOCATE (req_array)
     704          54 :       DEALLOCATE (entry_counter)
     705          54 :       DEALLOCATE (num_entries_rec, num_entries_send)
     706             : 
     707          54 :       CALL timestop(handle2)
     708          54 :       CALL timestop(handle)
     709             : 
     710         432 :    END SUBROUTINE truncate_fm
     711             : 
     712             : ! **************************************************************************************************
     713             : !> \brief Debug function to write elements of a full matrix to file, if they are larger than a given threshold
     714             : !> \param fm ...
     715             : !> \param thresh ...
     716             : !> \param header ...
     717             : !> \param unit_nr ...
     718             : !> \param abs_vals ...
     719             : ! **************************************************************************************************
     720           0 :    SUBROUTINE fm_write_thresh(fm, thresh, header, unit_nr, abs_vals)
     721             : 
     722             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     723             :       REAL(KIND=dp), INTENT(IN)                          :: thresh
     724             :       CHARACTER(LEN=*), INTENT(IN)                       :: header
     725             :       INTEGER, INTENT(IN)                                :: unit_nr
     726             :       LOGICAL, OPTIONAL                                  :: abs_vals
     727             : 
     728             :       CHARACTER(LEN=*), PARAMETER :: my_footer = " | ENDING WRITING OF MATRIX", &
     729             :          routineN = 'fm_write_thresh'
     730             : 
     731             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
     732           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     733             :       LOGICAL                                            :: my_abs_vals
     734             : 
     735           0 :       CALL timeset(routineN, handle)
     736             : 
     737           0 :       IF (PRESENT(abs_vals)) THEN
     738           0 :          my_abs_vals = abs_vals
     739             :       ELSE
     740             :          my_abs_vals = .FALSE.
     741             :       END IF
     742             : 
     743             :       CALL cp_fm_get_info(matrix=fm, &
     744             :                           nrow_local=nrow_local, &
     745             :                           ncol_local=ncol_local, &
     746             :                           row_indices=row_indices, &
     747           0 :                           col_indices=col_indices)
     748             : 
     749           0 :       IF (unit_nr > 0) THEN
     750           0 :          WRITE (unit_nr, *) header
     751             :       END IF
     752           0 :       IF (my_abs_vals) THEN
     753           0 :          DO i = 1, nrow_local
     754           0 :             DO j = 1, ncol_local
     755           0 :                IF (ABS(fm%local_data(i, j)) > thresh) THEN
     756           0 :                   WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
     757           0 :                      ABS(fm%local_data(i, j))
     758             :                END IF
     759             :             END DO
     760             :          END DO
     761             :       ELSE
     762           0 :          DO i = 1, nrow_local
     763           0 :             DO j = 1, ncol_local
     764           0 :                IF (ABS(fm%local_data(i, j)) > thresh) THEN
     765           0 :                   WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
     766           0 :                      fm%local_data(i, j)
     767             :                END IF
     768             :             END DO
     769             :          END DO
     770             :       END IF
     771           0 :       CALL fm%matrix_struct%para_env%sync()
     772           0 :       IF (unit_nr > 0) THEN
     773           0 :          WRITE (unit_nr, *) my_footer
     774             :       END IF
     775             : 
     776           0 :       CALL timestop(handle)
     777             : 
     778           0 :    END SUBROUTINE
     779             : 
     780             : ! **************************************************************************************************
     781             : !> \brief ...
     782             : !> \param bse_tda ...
     783             : !> \param bse_abba ...
     784             : !> \param unit_nr ...
     785             : ! **************************************************************************************************
     786          18 :    SUBROUTINE print_BSE_start_flag(bse_tda, bse_abba, unit_nr)
     787             : 
     788             :       LOGICAL, INTENT(IN)                                :: bse_tda, bse_abba
     789             :       INTEGER, INTENT(IN)                                :: unit_nr
     790             : 
     791             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'print_BSE_start_flag'
     792             : 
     793             :       INTEGER                                            :: handle
     794             : 
     795          18 :       CALL timeset(routineN, handle)
     796             : 
     797          18 :       IF (unit_nr > 0) THEN
     798           9 :          WRITE (unit_nr, *) ' '
     799           9 :          WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
     800           9 :          WRITE (unit_nr, '(T2,A79)') '**                                                                           **'
     801           9 :          WRITE (unit_nr, '(T2,A79)') '**           Bethe Salpeter equation (BSE) for excitation energies           **'
     802           9 :          IF (bse_tda .AND. bse_abba) THEN
     803           0 :             WRITE (unit_nr, '(T2,A79)') '**          solved with and without Tamm-Dancoff approximation (TDA)         **'
     804           9 :          ELSE IF (bse_tda) THEN
     805           6 :             WRITE (unit_nr, '(T2,A79)') '**                solved with Tamm-Dancoff approximation (TDA)               **'
     806             :          ELSE
     807           3 :             WRITE (unit_nr, '(T2,A79)') '**               solved without Tamm-Dancoff approximation (TDA)             **'
     808             :          END IF
     809             : 
     810           9 :          WRITE (unit_nr, '(T2,A79)') '**                                                                           **'
     811           9 :          WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
     812           9 :          WRITE (unit_nr, *) ' '
     813             :       END IF
     814             : 
     815          18 :       CALL timestop(handle)
     816             : 
     817          18 :    END SUBROUTINE
     818             : 
     819             : ! **************************************************************************************************
     820             : !> \brief ...
     821             : !> \param fm_mat_S_bar_ia_bse ...
     822             : !> \param fm_mat_S_bar_ij_bse ...
     823             : !> \param fm_mat_S_trunc ...
     824             : !> \param fm_mat_S_ij_trunc ...
     825             : !> \param fm_mat_S_ab_trunc ...
     826             : !> \param fm_mat_Q_static_bse_gemm ...
     827             : ! **************************************************************************************************
     828          18 :    SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
     829             :                                       fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
     830             :                                       fm_mat_Q_static_bse_gemm)
     831             : 
     832             :       TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
     833             :          fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse_gemm
     834             : 
     835             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
     836             : 
     837             :       INTEGER                                            :: handle
     838             : 
     839          18 :       CALL timeset(routineN, handle)
     840             : 
     841          18 :       CALL cp_fm_release(fm_mat_S_bar_ia_bse)
     842          18 :       CALL cp_fm_release(fm_mat_S_bar_ij_bse)
     843          18 :       CALL cp_fm_release(fm_mat_S_trunc)
     844          18 :       CALL cp_fm_release(fm_mat_S_ij_trunc)
     845          18 :       CALL cp_fm_release(fm_mat_S_ab_trunc)
     846          18 :       CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
     847             : 
     848          18 :       CALL timestop(handle)
     849             : 
     850          18 :    END SUBROUTINE deallocate_matrices_bse
     851             : 
     852             : ! **************************************************************************************************
     853             : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
     854             : !>  multiplication with the eigenvalues
     855             : !> \param fm_work ...
     856             : !> \param eig_vals ...
     857             : !> \param beta ...
     858             : !> \param gamma ...
     859             : !> \param do_transpose ...
     860             : ! **************************************************************************************************
     861          24 :    SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
     862             : 
     863             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_work
     864             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
     865             :          INTENT(IN)                                      :: eig_vals
     866             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     867             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: gamma
     868             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_transpose
     869             : 
     870             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
     871             : 
     872             :       INTEGER                                            :: handle, i_row_global, ii, j_col_global, &
     873             :                                                             jj, ncol_local, nrow_local
     874          12 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     875             :       LOGICAL                                            :: my_do_transpose
     876             :       REAL(KIND=dp)                                      :: coeff, my_gamma
     877             : 
     878          12 :       CALL timeset(routineN, handle)
     879             : 
     880          12 :       IF (PRESENT(gamma)) THEN
     881          12 :          my_gamma = gamma
     882             :       ELSE
     883             :          my_gamma = 2.0_dp
     884             :       END IF
     885             : 
     886          12 :       IF (PRESENT(do_transpose)) THEN
     887          12 :          my_do_transpose = do_transpose
     888             :       ELSE
     889             :          my_do_transpose = .FALSE.
     890             :       END IF
     891             : 
     892             :       CALL cp_fm_get_info(matrix=fm_work, &
     893             :                           nrow_local=nrow_local, &
     894             :                           ncol_local=ncol_local, &
     895             :                           row_indices=row_indices, &
     896          12 :                           col_indices=col_indices)
     897             : 
     898          12 :       IF (my_do_transpose) THEN
     899         588 :          DO jj = 1, ncol_local
     900         576 :             j_col_global = col_indices(jj)
     901       14412 :             DO ii = 1, nrow_local
     902       13824 :                coeff = (eig_vals(j_col_global)**beta)/my_gamma
     903       14400 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     904             :             END DO
     905             :          END DO
     906             :       ELSE
     907           0 :          DO jj = 1, ncol_local
     908           0 :             DO ii = 1, nrow_local
     909           0 :                i_row_global = row_indices(ii)
     910           0 :                coeff = (eig_vals(i_row_global)**beta)/my_gamma
     911           0 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     912             :             END DO
     913             :          END DO
     914             :       END IF
     915             : 
     916          12 :       CALL timestop(handle)
     917             : 
     918          12 :    END SUBROUTINE
     919             : 
     920             : ! **************************************************************************************************
     921             : !> \brief ...
     922             : !> \param idx_prim ...
     923             : !> \param idx_sec ...
     924             : !> \param eigvec_entries ...
     925             : ! **************************************************************************************************
     926         450 :    SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
     927             : 
     928             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim, idx_sec
     929             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     930             : 
     931             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'sort_excitations'
     932             : 
     933             :       INTEGER                                            :: handle, ii, kk, num_entries, num_mults
     934         450 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim_work, idx_sec_work, tmp_index
     935             :       LOGICAL                                            :: unique_entries
     936         450 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries_work
     937             : 
     938         450 :       CALL timeset(routineN, handle)
     939             : 
     940         450 :       num_entries = SIZE(idx_prim)
     941             : 
     942        1350 :       ALLOCATE (tmp_index(num_entries))
     943             : 
     944         450 :       CALL sort(idx_prim, num_entries, tmp_index)
     945             : 
     946         900 :       ALLOCATE (idx_sec_work(num_entries))
     947        1350 :       ALLOCATE (eigvec_entries_work(num_entries))
     948             : 
     949        1512 :       DO ii = 1, num_entries
     950        1062 :          idx_sec_work(ii) = idx_sec(tmp_index(ii))
     951        1512 :          eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
     952             :       END DO
     953             : 
     954         450 :       DEALLOCATE (tmp_index)
     955         450 :       DEALLOCATE (idx_sec)
     956         450 :       DEALLOCATE (eigvec_entries)
     957             : 
     958         450 :       CALL MOVE_ALLOC(idx_sec_work, idx_sec)
     959         450 :       CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
     960             : 
     961             :       !Now check for multiple entries in first idx to check necessity of sorting in second idx
     962         450 :       CALL sort_unique(idx_prim, unique_entries)
     963         450 :       IF (.NOT. unique_entries) THEN
     964         200 :          ALLOCATE (idx_prim_work(num_entries))
     965         552 :          idx_prim_work(:) = idx_prim(:)
     966             :          ! Find duplicate entries in idx_prim
     967         552 :          DO ii = 1, num_entries
     968         452 :             IF (idx_prim_work(ii) == 0) CYCLE
     969        1830 :             num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
     970         316 :             IF (num_mults > 1) THEN
     971             :                !Set all duplicate entries to 0
     972         360 :                idx_prim_work(ii:ii + num_mults - 1) = 0
     973             :                !Start sorting in secondary index
     974         336 :                ALLOCATE (idx_sec_work(num_mults))
     975         336 :                ALLOCATE (eigvec_entries_work(num_mults))
     976         360 :                idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
     977         360 :                eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
     978         224 :                ALLOCATE (tmp_index(num_mults))
     979         112 :                CALL sort(idx_sec_work, num_mults, tmp_index)
     980             : 
     981             :                !Now write newly sorted indices to original arrays
     982         360 :                DO kk = ii, ii + num_mults - 1
     983         248 :                   idx_sec(kk) = idx_sec_work(kk - ii + 1)
     984         360 :                   eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
     985             :                END DO
     986             :                !Deallocate work arrays
     987         112 :                DEALLOCATE (tmp_index)
     988         112 :                DEALLOCATE (idx_sec_work)
     989         112 :                DEALLOCATE (eigvec_entries_work)
     990             :             END IF
     991         552 :             idx_prim_work(ii) = idx_prim(ii)
     992             :          END DO
     993         100 :          DEALLOCATE (idx_prim_work)
     994             :       END IF
     995             : 
     996         450 :       CALL timestop(handle)
     997             : 
     998        1350 :    END SUBROUTINE sort_excitations
     999             : 
    1000             : ! **************************************************************************************************
    1001             : !> \brief Roughly estimates the needed runtime and memory during the BSE run
    1002             : !> \param homo_red ...
    1003             : !> \param virtual_red ...
    1004             : !> \param unit_nr ...
    1005             : !> \param bse_abba ...
    1006             : !> \param para_env ...
    1007             : !> \param diag_runtime_est ...
    1008             : ! **************************************************************************************************
    1009          18 :    SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
    1010             :                                      para_env, diag_runtime_est)
    1011             : 
    1012             :       INTEGER                                            :: homo_red, virtual_red, unit_nr
    1013             :       LOGICAL                                            :: bse_abba
    1014             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1015             :       REAL(KIND=dp)                                      :: diag_runtime_est
    1016             : 
    1017             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
    1018             : 
    1019             :       INTEGER                                            :: handle, num_BSE_matrices
    1020             :       INTEGER(KIND=int_8)                                :: full_dim
    1021             :       REAL(KIND=dp)                                      :: mem_est, mem_est_per_rank
    1022             : 
    1023          18 :       CALL timeset(routineN, handle)
    1024             : 
    1025             :       ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
    1026          18 :       num_BSE_matrices = 2
    1027             :       ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
    1028             :       ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
    1029          18 :       IF (bse_abba) THEN
    1030           6 :          num_BSE_matrices = 10
    1031             :       END IF
    1032             : 
    1033          18 :       full_dim = (INT(homo_red, KIND=int_8)**2*INT(virtual_red, KIND=int_8)**2)*INT(num_BSE_matrices, KIND=int_8)
    1034          18 :       mem_est = REAL(8*full_dim, KIND=dp)/REAL(1024**3, KIND=dp)
    1035          18 :       mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
    1036             : 
    1037          18 :       IF (unit_nr > 0) THEN
    1038             :          ! WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
    1039             :          !    mem_est
    1040           9 :          WRITE (unit_nr, '(T2,A4,T7,A40,T68,ES13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
    1041          18 :             mem_est
    1042           9 :          WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
    1043          18 :             mem_est_per_rank
    1044           9 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1045             :       END IF
    1046             :       ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
    1047             :       ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
    1048             :       diag_runtime_est = REAL(INT(homo_red, KIND=int_8)*INT(virtual_red, KIND=int_8)/11000_int_8, KIND=dp)**3* &
    1049          18 :                          10*32/REAL(para_env%num_pe, KIND=dp)
    1050             : 
    1051          18 :       CALL timestop(handle)
    1052             : 
    1053          18 :    END SUBROUTINE estimate_BSE_resources
    1054             : 
    1055             : ! **************************************************************************************************
    1056             : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
    1057             : !> singleparticle basis
    1058             : !> \param fm_eigvec ...
    1059             : !> \param idx_homo ...
    1060             : !> \param idx_virt ...
    1061             : !> \param eigvec_entries ...
    1062             : !> \param i_exc ...
    1063             : !> \param virtual ...
    1064             : !> \param num_entries ...
    1065             : !> \param mp2_env ...
    1066             : ! **************************************************************************************************
    1067         450 :    SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
    1068             :                                     i_exc, virtual, num_entries, mp2_env)
    1069             : 
    1070             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
    1071             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_homo, idx_virt
    1072             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
    1073             :       INTEGER                                            :: i_exc, virtual, num_entries
    1074             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1075             : 
    1076             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
    1077             : 
    1078             :       INTEGER                                            :: eigvec_idx, handle, ii, iproc, jj, kk, &
    1079             :                                                             ncol_local, nrow_local, &
    1080             :                                                             num_entries_local
    1081             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_entries_to_comm
    1082         450 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1083             :       REAL(KIND=dp)                                      :: eigvec_entry
    1084             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1085         450 :          DIMENSION(:)                                    :: buffer_entries
    1086             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1087             : 
    1088         450 :       CALL timeset(routineN, handle)
    1089             : 
    1090         450 :       para_env => fm_eigvec%matrix_struct%para_env
    1091             : 
    1092             :       CALL cp_fm_get_info(matrix=fm_eigvec, &
    1093             :                           nrow_local=nrow_local, &
    1094             :                           ncol_local=ncol_local, &
    1095             :                           row_indices=row_indices, &
    1096         450 :                           col_indices=col_indices)
    1097             : 
    1098        1350 :       ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
    1099        1350 :       num_entries_to_comm(:) = 0
    1100             : 
    1101       22050 :       DO jj = 1, ncol_local
    1102             :          !First check if i is localized on this proc
    1103       21600 :          IF (col_indices(jj) /= i_exc) THEN
    1104             :             CYCLE
    1105             :          END IF
    1106       11700 :          DO ii = 1, nrow_local
    1107       10800 :             eigvec_idx = row_indices(ii)
    1108       10800 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1109       32400 :             IF (ABS(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
    1110         531 :                num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
    1111             :             END IF
    1112             :          END DO
    1113             :       END DO
    1114             : 
    1115             :       !Gather number of entries of other processes
    1116         450 :       CALL para_env%sum(num_entries_to_comm)
    1117             : 
    1118         450 :       num_entries_local = num_entries_to_comm(para_env%mepos)
    1119             : 
    1120        2250 :       ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
    1121             : 
    1122        1350 :       DO iproc = 0, para_env%num_pe - 1
    1123        2514 :          ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
    1124        2514 :          ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
    1125        1962 :          buffer_entries(iproc)%msg = 0.0_dp
    1126        5274 :          buffer_entries(iproc)%indx = 0
    1127             :       END DO
    1128             : 
    1129             :       kk = 1
    1130       22050 :       DO jj = 1, ncol_local
    1131             :          !First check if i is localized on this proc
    1132       21600 :          IF (col_indices(jj) /= i_exc) THEN
    1133             :             CYCLE
    1134             :          END IF
    1135       11700 :          DO ii = 1, nrow_local
    1136       10800 :             eigvec_idx = row_indices(ii)
    1137       10800 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1138       32400 :             IF (ABS(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
    1139         531 :                buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
    1140         531 :                buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
    1141         531 :                buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
    1142         531 :                kk = kk + 1
    1143             :             END IF
    1144             :          END DO
    1145             :       END DO
    1146             : 
    1147        1350 :       DO iproc = 0, para_env%num_pe - 1
    1148         900 :          CALL para_env%sum(buffer_entries(iproc)%msg)
    1149        1350 :          CALL para_env%sum(buffer_entries(iproc)%indx)
    1150             :       END DO
    1151             : 
    1152             :       !Now sum up gathered information
    1153        1350 :       num_entries = SUM(num_entries_to_comm)
    1154        1350 :       ALLOCATE (idx_homo(num_entries))
    1155         900 :       ALLOCATE (idx_virt(num_entries))
    1156        1350 :       ALLOCATE (eigvec_entries(num_entries))
    1157             : 
    1158         450 :       kk = 1
    1159        1350 :       DO iproc = 0, para_env%num_pe - 1
    1160        1350 :          IF (num_entries_to_comm(iproc) /= 0) THEN
    1161        1776 :             DO ii = 1, num_entries_to_comm(iproc)
    1162        1062 :                idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
    1163        1062 :                idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
    1164        1062 :                eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
    1165        1776 :                kk = kk + 1
    1166             :             END DO
    1167             :          END IF
    1168             :       END DO
    1169             : 
    1170             :       !Deallocate all the used arrays
    1171        1350 :       DO iproc = 0, para_env%num_pe - 1
    1172         900 :          DEALLOCATE (buffer_entries(iproc)%msg)
    1173        1350 :          DEALLOCATE (buffer_entries(iproc)%indx)
    1174             :       END DO
    1175        1800 :       DEALLOCATE (buffer_entries)
    1176         450 :       DEALLOCATE (num_entries_to_comm)
    1177         450 :       NULLIFY (row_indices)
    1178         450 :       NULLIFY (col_indices)
    1179             : 
    1180             :       !Now sort the results according to the involved singleparticle orbitals
    1181             :       ! (homo first, then virtual)
    1182         450 :       CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
    1183             : 
    1184         450 :       CALL timestop(handle)
    1185             : 
    1186         450 :    END SUBROUTINE
    1187             : 
    1188             : ! **************************************************************************************************
    1189             : !> \brief Reads cutoffs for BSE from mp2_env and compares to energies in Eigenval to extract
    1190             : !>        reduced homo/virtual and
    1191             : !> \param Eigenval array (1d) with energies, can be e.g. from GW or DFT
    1192             : !> \param homo Total number of occupied orbitals
    1193             : !> \param virtual Total number of unoccupied orbitals
    1194             : !> \param homo_red Total number of occupied orbitals to include after cutoff
    1195             : !> \param virt_red Total number of unoccupied orbitals to include after ctuoff
    1196             : !> \param homo_incl First occupied index to include after cutoff
    1197             : !> \param virt_incl Last unoccupied index to include after cutoff
    1198             : !> \param mp2_env ...
    1199             : ! **************************************************************************************************
    1200          36 :    SUBROUTINE determine_cutoff_indices(Eigenval, &
    1201             :                                        homo, virtual, &
    1202             :                                        homo_red, virt_red, &
    1203             :                                        homo_incl, virt_incl, &
    1204             :                                        mp2_env)
    1205             : 
    1206             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
    1207             :       INTEGER, INTENT(IN)                                :: homo, virtual
    1208             :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red, homo_incl, virt_incl
    1209             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1210             : 
    1211             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'determine_cutoff_indices'
    1212             : 
    1213             :       INTEGER                                            :: handle, i_homo, j_virt
    1214             : 
    1215          36 :       CALL timeset(routineN, handle)
    1216             :       ! Determine index in homo and virtual for truncation
    1217             :       ! Uses indices of outermost orbitals within energy range (-mp2_env%ri_g0w0%bse_cutoff_occ,mp2_env%ri_g0w0%bse_cutoff_empty)
    1218          36 :       IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_empty > 0) THEN
    1219             :          IF (-mp2_env%ri_g0w0%bse_cutoff_occ .LT. Eigenval(1) - Eigenval(homo) &
    1220          36 :              .OR. mp2_env%ri_g0w0%bse_cutoff_occ < 0) THEN
    1221          36 :             homo_red = homo
    1222          36 :             homo_incl = 1
    1223             :          ELSE
    1224           0 :             homo_incl = 1
    1225           0 :             DO i_homo = 1, homo
    1226           0 :                IF (Eigenval(i_homo) - Eigenval(homo) .GT. -mp2_env%ri_g0w0%bse_cutoff_occ) THEN
    1227           0 :                   homo_incl = i_homo
    1228           0 :                   EXIT
    1229             :                END IF
    1230             :             END DO
    1231           0 :             homo_red = homo - homo_incl + 1
    1232             :          END IF
    1233             : 
    1234             :          IF (mp2_env%ri_g0w0%bse_cutoff_empty .GT. Eigenval(homo + virtual) - Eigenval(homo + 1) &
    1235          36 :              .OR. mp2_env%ri_g0w0%bse_cutoff_empty < 0) THEN
    1236           0 :             virt_red = virtual
    1237           0 :             virt_incl = virtual
    1238             :          ELSE
    1239          36 :             virt_incl = homo + 1
    1240         468 :             DO j_virt = 1, virtual
    1241         468 :                IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) .GT. mp2_env%ri_g0w0%bse_cutoff_empty) THEN
    1242          36 :                   virt_incl = j_virt - 1
    1243          36 :                   EXIT
    1244             :                END IF
    1245             :             END DO
    1246          36 :             virt_red = virt_incl
    1247             :          END IF
    1248             :       ELSE
    1249           0 :          homo_red = homo
    1250           0 :          virt_red = virtual
    1251           0 :          homo_incl = 1
    1252           0 :          virt_incl = virtual
    1253             :       END IF
    1254             : 
    1255          36 :       CALL timestop(handle)
    1256             : 
    1257          36 :    END SUBROUTINE
    1258             : 
    1259             : ! **************************************************************************************************
    1260             : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
    1261             : !> \param fm_mat_S_ia_bse ...
    1262             : !> \param fm_mat_S_ij_bse ...
    1263             : !> \param fm_mat_S_ab_bse ...
    1264             : !> \param fm_mat_S_trunc ...
    1265             : !> \param fm_mat_S_ij_trunc ...
    1266             : !> \param fm_mat_S_ab_trunc ...
    1267             : !> \param Eigenval_scf ...
    1268             : !> \param Eigenval ...
    1269             : !> \param Eigenval_reduced ...
    1270             : !> \param homo ...
    1271             : !> \param virtual ...
    1272             : !> \param dimen_RI ...
    1273             : !> \param unit_nr ...
    1274             : !> \param bse_lev_virt ...
    1275             : !> \param homo_red ...
    1276             : !> \param virt_red ...
    1277             : !> \param mp2_env ...
    1278             : ! **************************************************************************************************
    1279          90 :    SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
    1280             :                                     fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
    1281          18 :                                     Eigenval_scf, Eigenval, Eigenval_reduced, &
    1282             :                                     homo, virtual, dimen_RI, unit_nr, &
    1283             :                                     bse_lev_virt, &
    1284             :                                     homo_red, virt_red, &
    1285             :                                     mp2_env)
    1286             : 
    1287             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
    1288             :                                                             fm_mat_S_ab_bse
    1289             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
    1290             :                                                             fm_mat_S_ab_trunc
    1291             :       REAL(KIND=dp), DIMENSION(:)                        :: Eigenval_scf, Eigenval
    1292             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: Eigenval_reduced
    1293             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_RI, unit_nr, &
    1294             :                                                             bse_lev_virt
    1295             :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red
    1296             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1297             : 
    1298             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
    1299             : 
    1300             :       INTEGER                                            :: handle, homo_incl, virt_incl
    1301             :       TYPE(cp_blacs_env_type), POINTER                   :: context
    1302             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ab, fm_struct_ia, fm_struct_ij
    1303             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1304             : 
    1305          18 :       CALL timeset(routineN, handle)
    1306             : 
    1307             :       ! Determine index in homo and virtual for truncation
    1308             :       ! Uses indices of outermost orbitals within energy range (-mp2_env%ri_g0w0%bse_cutoff_occ,mp2_env%ri_g0w0%bse_cutoff_empty)
    1309             : 
    1310             :       CALL determine_cutoff_indices(Eigenval_scf, &
    1311             :                                     homo, virtual, &
    1312             :                                     homo_red, virt_red, &
    1313             :                                     homo_incl, virt_incl, &
    1314          18 :                                     mp2_env)
    1315             : 
    1316          18 :       IF (unit_nr > 0) THEN
    1317           9 :          IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0) THEN
    1318           9 :             WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
    1319          18 :                mp2_env%ri_g0w0%bse_cutoff_occ*evolt
    1320             :          ELSE
    1321           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
    1322             :          END IF
    1323           9 :          IF (mp2_env%ri_g0w0%bse_cutoff_empty > 0) THEN
    1324           9 :             WRITE (unit_nr, '(T2,A4,T7,A26,T71,F10.3)') 'BSE|', 'Cutoff empty orbitals [eV]', &
    1325          18 :                mp2_env%ri_g0w0%bse_cutoff_empty*evolt
    1326             :          ELSE
    1327           0 :             WRITE (unit_nr, '(T2,A4,T7,A34)') 'BSE|', 'No cutoff given for empty orbitals'
    1328             :          END IF
    1329           9 :          WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
    1330           9 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Last empty index (not MO index!)', virt_incl
    1331           9 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
    1332           9 :          WRITE (unit_nr, '(T2,A4,T7,A31,T71,F10.3)') 'BSE|', 'Energy of last empty index [eV]', Eigenval(homo + virt_incl)*evolt
    1333           9 :          WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
    1334          18 :             -(Eigenval(homo_incl) - Eigenval(homo))*evolt
    1335           9 :          WRITE (unit_nr, '(T2,A4,T7,A50,T71,F10.3)') 'BSE|', 'Energy difference of last empty index to LUMO [eV]', &
    1336          18 :             (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
    1337           9 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
    1338           9 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Number of GW-corrected empty MOs', mp2_env%ri_g0w0%corr_mos_virt
    1339           9 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1340             :       END IF
    1341          18 :       IF (unit_nr > 0) THEN
    1342           9 :          IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
    1343           0 :             CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
    1344             :          END IF
    1345           9 :          IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
    1346           0 :             CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
    1347             :          END IF
    1348             :       END IF
    1349             :       !Truncate full fm_S matrices
    1350             :       !Allocate new truncated matrices of proper size
    1351          18 :       para_env => fm_mat_S_ia_bse%matrix_struct%para_env
    1352          18 :       context => fm_mat_S_ia_bse%matrix_struct%context
    1353             : 
    1354          18 :       CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
    1355          18 :       CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
    1356          18 :       CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
    1357             : 
    1358          18 :       CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, "fm_S_trunc")
    1359          18 :       CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
    1360          18 :       CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
    1361             : 
    1362             :       !Copy parts of original matrices to truncated ones
    1363          18 :       IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_empty > 0) THEN
    1364             :          !Truncate eigenvals
    1365          54 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1366         306 :          Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
    1367             : 
    1368             :          CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
    1369             :                           homo_red, virt_red, unit_nr, mp2_env, &
    1370          18 :                           nrow_offset=homo_incl)
    1371             :          CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
    1372             :                           homo_red, homo_red, unit_nr, mp2_env, &
    1373          18 :                           homo_incl, homo_incl)
    1374             :          CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, bse_lev_virt, &
    1375          18 :                           virt_red, virt_red, unit_nr, mp2_env)
    1376             : 
    1377             :       ELSE
    1378           0 :          IF (unit_nr > 0) THEN
    1379           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
    1380           0 :             WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1381             :          END IF
    1382           0 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1383           0 :          Eigenval_reduced(:) = Eigenval(:)
    1384             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
    1385           0 :                                          1, 1, 1, 1, context)
    1386             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
    1387           0 :                                          1, 1, 1, 1, context)
    1388             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
    1389           0 :                                          1, 1, 1, 1, context)
    1390             :       END IF
    1391             : 
    1392          18 :       CALL cp_fm_struct_release(fm_struct_ia)
    1393          18 :       CALL cp_fm_struct_release(fm_struct_ij)
    1394          18 :       CALL cp_fm_struct_release(fm_struct_ab)
    1395             : 
    1396          18 :       NULLIFY (para_env)
    1397          18 :       NULLIFY (context)
    1398             : 
    1399          18 :       CALL timestop(handle)
    1400             : 
    1401          18 :    END SUBROUTINE truncate_BSE_matrices
    1402             : 
    1403             : ! **************************************************************************************************
    1404             : !> \brief Compute and return BSE dipoles d_r^n = sqrt(2) sum_ia < ψ_i | µ | ψ_a > ( X_ia^n + Y_ia^n )
    1405             : !>    and oscillator strengths f^n = 2/3 * Ω^n sum_r∈(x,y,z) ( d_r^n )^2
    1406             : !>    Prelim Ref.: Eqs. (23), (24)
    1407             : !>    in J. Chem. Phys. 152, 044105 (2020); https://doi.org/10.1063/1.5123290
    1408             : !> \param fm_eigvec_X ...
    1409             : !> \param Exc_ens ...
    1410             : !> \param dipole_exc BSE dipole vectors in real space per excitation level
    1411             : !> \param oscill_str Oscillator strength per excitation level
    1412             : !> \param mp2_env ...
    1413             : !> \param qs_env ...
    1414             : !> \param mo_coeff ...
    1415             : !> \param homo_red ...
    1416             : !> \param virtual_red ...
    1417             : !> \param unit_nr ...
    1418             : !> \param fm_eigvec_Y ...
    1419             : ! **************************************************************************************************
    1420          18 :    SUBROUTINE get_oscillator_strengths(fm_eigvec_X, Exc_ens, &
    1421             :                                        dipole_exc, oscill_str, &
    1422          18 :                                        mp2_env, qs_env, mo_coeff, &
    1423             :                                        homo_red, virtual_red, unit_nr, &
    1424             :                                        fm_eigvec_Y)
    1425             : 
    1426             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec_X
    1427             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
    1428             :          INTENT(IN)                                      :: Exc_ens
    1429             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
    1430             :          INTENT(OUT)                                     :: dipole_exc
    1431             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
    1432             :          INTENT(OUT)                                     :: oscill_str
    1433             :       TYPE(mp2_type), INTENT(IN)                         :: mp2_env
    1434             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1435             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1436             :       INTEGER, INTENT(IN)                                :: homo_red, virtual_red, unit_nr
    1437             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: fm_eigvec_Y
    1438             : 
    1439             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_oscillator_strengths'
    1440             : 
    1441             :       INTEGER                                            :: handle, idir, n, n_occ, n_virt, nao
    1442             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_dipole_exc, fm_struct_dipoles, &
    1443             :                                                             fm_struct_dipoles_reordered, &
    1444             :                                                             fm_struct_dipoles_trunc
    1445             :       TYPE(cp_fm_type)                                   :: fm_eigvec_XYsum
    1446         126 :       TYPE(cp_fm_type), DIMENSION(3)                     :: fm_dipole_exc, fm_dipole_per_dir, &
    1447          72 :                                                             fm_dipole_per_dir_reordered, &
    1448          72 :                                                             fm_dipoles_per_dir_trunc
    1449             : 
    1450          18 :       CALL timeset(routineN, handle)
    1451             : 
    1452             :       ! Extract dimension from qs_env
    1453          18 :       nao = qs_env%mos(1)%nao
    1454             :       ! Writing homo to n_occ instead if nmo,
    1455             :       ! takes care of ADDED_MOS, which would overwrite nmo, if invoked
    1456          18 :       n_occ = qs_env%mos(1)%homo
    1457          18 :       n_virt = nao - n_occ
    1458             :       ! Original fm_struct_dipoles is invoked with a different blacs env to ensure correct
    1459             :       ! multiplication in get_dipoles_mo using cp_dbcsr_sm_fm_multiply
    1460             :       ! We use general copy afterwards, such that fm_dipoles_per_dir_trunc is already
    1461             :       ! back to BSE blacs envs
    1462             :       CALL cp_fm_struct_create(fm_struct_dipoles, mo_coeff(1)%matrix_struct%para_env, &
    1463          18 :                                mo_coeff(1)%matrix_struct%context, n_virt, n_occ)
    1464             :       CALL cp_fm_struct_create(fm_struct_dipoles_trunc, fm_eigvec_X%matrix_struct%para_env, &
    1465          18 :                                fm_eigvec_X%matrix_struct%context, virtual_red, homo_red)
    1466             :       CALL cp_fm_struct_create(fm_struct_dipoles_reordered, fm_eigvec_X%matrix_struct%para_env, &
    1467          18 :                                fm_eigvec_X%matrix_struct%context, 1, homo_red*virtual_red)
    1468             :       CALL cp_fm_struct_create(fm_struct_dipole_exc, fm_eigvec_X%matrix_struct%para_env, &
    1469          18 :                                fm_eigvec_X%matrix_struct%context, 1, homo_red*virtual_red)
    1470             : 
    1471          72 :       DO idir = 1, 3
    1472             :          !Create final dipole matrix
    1473          54 :          CALL cp_fm_create(fm_dipole_per_dir(idir), matrix_struct=fm_struct_dipoles, name="dipoles_mo")
    1474          72 :          CALL cp_fm_set_all(fm_dipole_per_dir(idir), 0.0_dp)
    1475             :       END DO
    1476             : 
    1477             :       ! Obtain dipoles in MO basis with index structure n_virt x n_occ: \vec{D}_ai = <a|\vec{r}|i>
    1478          18 :       CALL get_dipoles_mo(fm_dipole_per_dir, qs_env, mo_coeff)
    1479             : 
    1480             :       ! Include excitonic amplitudes in dipoles, i.e. obtain "BSE dipoles":
    1481             :       ! \vec{D}_n = sqrt(2) * sum_{i,a} \vec{D}_ai (X_{ai}^{(n)} + Y_{ai}^{(n)})
    1482             : 
    1483             :       ! Reorder dipoles in order to execute the sum ovver i and a by parallel gemm
    1484          72 :       DO idir = 1, 3
    1485             :          CALL cp_fm_create(fm_dipoles_per_dir_trunc(idir), matrix_struct=fm_struct_dipoles_trunc, &
    1486          54 :                            name="dipoles_mo_trunc")
    1487             : 
    1488             :          CALL cp_fm_to_fm_submat_general(fm_dipole_per_dir(idir), &
    1489             :                                          fm_dipoles_per_dir_trunc(idir), &
    1490             :                                          virtual_red, &
    1491             :                                          homo_red, &
    1492             :                                          1, &
    1493             :                                          n_occ - homo_red + 1, &
    1494             :                                          1, &
    1495             :                                          1, &
    1496          54 :                                          fm_struct_dipoles_trunc%context)
    1497             :          CALL cp_fm_create(fm_dipole_per_dir_reordered(idir), matrix_struct=fm_struct_dipoles_reordered, &
    1498          54 :                            name="dipoles_mo_reordered")
    1499          54 :          CALL cp_fm_set_all(fm_dipole_per_dir_reordered(idir), 0.0_dp)
    1500             :          CALL fm_general_add_bse(fm_dipole_per_dir_reordered(idir), fm_dipoles_per_dir_trunc(idir), 1.0_dp, &
    1501             :                                  1, 1, &
    1502             :                                  1, virtual_red, &
    1503          54 :                                  unit_nr, (/2, 4, 3, 1/), mp2_env)
    1504          72 :          CALL cp_fm_release(fm_dipole_per_dir(idir))
    1505             :       END DO
    1506             : 
    1507          72 :       DO idir = 1, 3
    1508             :          CALL cp_fm_create(fm_dipole_exc(idir), matrix_struct=fm_struct_dipole_exc, &
    1509          54 :                            name="excitonic_dipoles")
    1510          72 :          CALL cp_fm_set_all(fm_dipole_exc(idir), 0.0_dp)
    1511             :       END DO
    1512             : 
    1513             :       ! If TDA is invoked, Y is not present as it is simply 0
    1514          18 :       CALL cp_fm_create(fm_eigvec_XYsum, matrix_struct=fm_eigvec_X%matrix_struct, name="excit_amplitude_sum")
    1515          18 :       CALL cp_fm_set_all(fm_eigvec_XYsum, 0.0_dp)
    1516          18 :       CALL cp_fm_to_fm(fm_eigvec_X, fm_eigvec_XYsum)
    1517          18 :       IF (PRESENT(fm_eigvec_Y)) THEN
    1518           6 :          CALL cp_fm_to_fm(fm_eigvec_Y, fm_eigvec_XYsum)
    1519             :       END IF
    1520          72 :       DO idir = 1, 3
    1521             :          CALL parallel_gemm('N', 'N', 1, homo_red*virtual_red, homo_red*virtual_red, SQRT(2.0_dp), &
    1522          72 :                             fm_dipole_per_dir_reordered(idir), fm_eigvec_XYsum, 0.0_dp, fm_dipole_exc(idir))
    1523             :       END DO
    1524             : 
    1525             :       ! Get oscillator strengths themselves
    1526          54 :       ALLOCATE (oscill_str(homo_red*virtual_red))
    1527          54 :       ALLOCATE (dipole_exc(3, 1, homo_red*virtual_red))
    1528        4338 :       dipole_exc(:, :, :) = 0.0_dp
    1529             : 
    1530             :       ! Sum over all directions
    1531          72 :       DO idir = 1, 3
    1532          72 :          CALL cp_fm_get_submatrix(fm_dipole_exc(idir), dipole_exc(idir, :, :))
    1533             :       END DO
    1534             : 
    1535         882 :       DO n = 1, homo_red*virtual_red
    1536        4338 :          oscill_str(n) = 2.0_dp/3.0_dp*Exc_ens(n)*SUM(ABS(dipole_exc(:, :, n))**2)
    1537             :       END DO
    1538             : 
    1539          18 :       CALL cp_fm_struct_release(fm_struct_dipoles)
    1540          18 :       CALL cp_fm_struct_release(fm_struct_dipoles_reordered)
    1541          18 :       CALL cp_fm_struct_release(fm_struct_dipole_exc)
    1542          18 :       CALL cp_fm_struct_release(fm_struct_dipoles_trunc)
    1543          72 :       DO idir = 1, 3
    1544          54 :          CALL cp_fm_release(fm_dipole_per_dir_reordered(idir))
    1545          54 :          CALL cp_fm_release(fm_dipole_exc(idir))
    1546          54 :          CALL cp_fm_release(fm_dipoles_per_dir_trunc(idir))
    1547          72 :          CALL cp_fm_release(fm_dipole_per_dir(idir))
    1548             :       END DO
    1549          18 :       CALL cp_fm_release(fm_eigvec_XYsum)
    1550             : 
    1551          18 :       CALL timestop(handle)
    1552             : 
    1553          36 :    END SUBROUTINE get_oscillator_strengths
    1554             : 
    1555             : ! **************************************************************************************************
    1556             : !> \brief Computes and returns dipole in MO basis, i.e. < ψ_i | µ | ψ_a >
    1557             : !> \param fm_dipole_per_dir Dipoles in MO basis with blacs environment from DFT
    1558             : !> \param qs_env ...
    1559             : !> \param mo_coeff mo_coeff in layout nao x nao from mp2, i.e. all occ and virt indices in both axes
    1560             : ! **************************************************************************************************
    1561          18 :    SUBROUTINE get_dipoles_mo(fm_dipole_per_dir, qs_env, mo_coeff)
    1562             : 
    1563             :       TYPE(cp_fm_type), DIMENSION(3), INTENT(INOUT)      :: fm_dipole_per_dir
    1564             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1565             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1566             : 
    1567             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_dipoles_mo'
    1568             : 
    1569             :       INTEGER                                            :: handle, idir, n_occ, n_virt, nao
    1570          18 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: rpoint
    1571          18 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: ref_point
    1572             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_coeff_occ, &
    1573             :                                                             fm_struct_coeff_virt, &
    1574             :                                                             fm_struct_dipoles_ao, fm_struct_nao_nao
    1575             :       TYPE(cp_fm_type)                                   :: fm_coeff_occ, fm_coeff_virt, &
    1576             :                                                             fm_dip_intermed
    1577          18 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_r, matrix_s
    1578          18 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1579             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1580          18 :          POINTER                                         :: sab_orb
    1581             : 
    1582          18 :       CALL timeset(routineN, handle)
    1583             : 
    1584             :       !First, we calculate the AO dipoles
    1585          18 :       NULLIFY (sab_orb, matrix_s)
    1586             :       CALL get_qs_env(qs_env, &
    1587             :                       mos=mos, &
    1588             :                       matrix_s=matrix_s, &
    1589          18 :                       sab_orb=sab_orb)
    1590             : 
    1591             :       ! Use the same blacs environment as for the MO coefficients to ensure correct multiplication dbcsr x fm later on
    1592          18 :       fm_struct_dipoles_ao => mos(1)%mo_coeff%matrix_struct
    1593             : 
    1594          18 :       NULLIFY (matrix_r)
    1595          18 :       CALL dbcsr_allocate_matrix_set(matrix_r, 3)
    1596          72 :       DO idir = 1, 3
    1597          54 :          CALL dbcsr_init_p(matrix_r(idir)%matrix)
    1598             :          CALL dbcsr_create(matrix_r(idir)%matrix, name="DIPOLE MATRIX X/Y/Z", &
    1599          54 :                            template=matrix_s(1)%matrix, matrix_type=dbcsr_type_symmetric)
    1600          54 :          CALL cp_dbcsr_alloc_block_from_nbl(matrix_r(idir)%matrix, sab_orb)
    1601          72 :          CALL dbcsr_set(matrix_r(idir)%matrix, 0._dp)
    1602             :       END DO
    1603             : 
    1604          18 :       ALLOCATE (rpoint(3))
    1605          18 :       CALL get_reference_point(rpoint, qs_env=qs_env, reference=use_mom_ref_coac, ref_point=ref_point)
    1606             : 
    1607          18 :       CALL build_local_moment_matrix(qs_env, matrix_r, 1, ref_point=rpoint)
    1608          18 :       DEALLOCATE (rpoint)
    1609          18 :       NULLIFY (sab_orb)
    1610             : 
    1611             :       ! Now we transform them to MO
    1612             :       ! n_occ is the number of occupied MOs, nao the number of all AOs
    1613             :       ! Writing homo to n_occ instead if nmo,
    1614             :       ! takes care of ADDED_MOS, which would overwrite nmo, if invoked
    1615          18 :       CALL get_mo_set(mo_set=mos(1), homo=n_occ, nao=nao)
    1616          18 :       n_virt = nao - n_occ
    1617             : 
    1618             :       ! At the end, we need four different layouts of matrices in this multiplication:
    1619             :       ! D = dipoles
    1620             :       ! Final result: D = C_{mu a}        <\mu|\vec{r}|\nu>        C_{\nu i}              EQ.I
    1621             :       !                   \_______/         \___________/          \______/
    1622             :       !               fm_coeff_virt            matrix_r          fm_coeff_occ
    1623             :       !                    (EQ.Ia)             (EQ.Ib)              (EQ.Ia)
    1624             :       ! Intermediate work matrices:
    1625             :       ! fm_dip_intermed =                 <\mu|\vec{r}|\nu>        C_{\nu i}              EQ.II
    1626             :       CALL cp_fm_struct_create(fm_struct_nao_nao, &
    1627             :                                fm_struct_dipoles_ao%para_env, fm_struct_dipoles_ao%context, &
    1628          18 :                                nao, nao)
    1629             :       CALL cp_fm_struct_create(fm_struct_coeff_virt, fm_struct_dipoles_ao%para_env, fm_struct_dipoles_ao%context, &
    1630          18 :                                nao, n_virt)
    1631             :       CALL cp_fm_struct_create(fm_struct_coeff_occ, fm_struct_dipoles_ao%para_env, fm_struct_dipoles_ao%context, &
    1632          18 :                                nao, n_occ)
    1633             : 
    1634             :       ! Split between unoccupied and occupied MO coefficients
    1635             :       ! First, the unoccupied coefficients "C_\mu a"
    1636          18 :       CALL cp_fm_create(fm_coeff_virt, matrix_struct=fm_struct_coeff_virt, name="mo_coeff_virt")
    1637          18 :       CALL cp_fm_set_all(fm_coeff_virt, 0.0_dp)
    1638             :       ! Coefficients from Diagonalization of the KS matrix, starting from the first unoccupied index, i.e. n_occ+1
    1639             :       ! We are writing between different contexts here, so we need submat_general
    1640             :       CALL cp_fm_to_fm_submat_general(mo_coeff(1), fm_coeff_virt, &
    1641             :                                       nao, n_virt, &
    1642             :                                       1, n_occ + 1, &
    1643             :                                       1, 1, &
    1644          18 :                                       mo_coeff(1)%matrix_struct%context)
    1645             : 
    1646             :       ! Now occupied ones "C_\mu i"
    1647          18 :       CALL cp_fm_create(fm_coeff_occ, matrix_struct=fm_struct_coeff_occ, name="mo_coeff_occ")
    1648          18 :       CALL cp_fm_set_all(fm_coeff_occ, 0.0_dp)
    1649             : 
    1650             :       ! Coefficients from Diagonalization of the KS matrix, starting from the first occupied index
    1651             :       CALL cp_fm_to_fm_submat_general(mo_coeff(1), fm_coeff_occ, &
    1652             :                                       nao, n_occ, &
    1653             :                                       1, 1, &
    1654             :                                       1, 1, &
    1655          18 :                                       mo_coeff(1)%matrix_struct%context)
    1656             : 
    1657             :       ! Need another temporary matrix to store intermediate result from right multiplication
    1658             :       ! D = C_{mu a}        <\mu|\vec{r}|\nu>        C_{\nu i}
    1659          18 :       CALL cp_fm_create(fm_dip_intermed, matrix_struct=fm_struct_coeff_occ, name="dip_intermed")
    1660          18 :       CALL cp_fm_set_all(fm_dip_intermed, 0.0_dp)
    1661             : 
    1662          72 :       DO idir = 1, 3
    1663             :          ! Fill final (MO) dipole matrix
    1664             :          ! Specifying beta=1.0_dp would sum up all directions
    1665             :          CALL cp_dbcsr_sm_fm_multiply(matrix_r(idir)%matrix, fm_coeff_occ, &
    1666          54 :                                       fm_dip_intermed, ncol=n_occ)
    1667             :          ! Now obtain the dipoles by the final multiplication;
    1668             :          ! We do that inside the loop to obtain dipoles per axis for print
    1669          72 :          CALL parallel_gemm('T', 'N', n_virt, n_occ, nao, 1.0_dp, fm_coeff_virt, fm_dip_intermed, 0.0_dp, fm_dipole_per_dir(idir))
    1670             :       END DO
    1671             : 
    1672             :       !Release matrices and structs
    1673          18 :       NULLIFY (fm_struct_dipoles_ao)
    1674          18 :       CALL cp_fm_struct_release(fm_struct_nao_nao)
    1675          18 :       CALL cp_fm_struct_release(fm_struct_coeff_virt)
    1676          18 :       CALL cp_fm_struct_release(fm_struct_coeff_occ)
    1677          18 :       CALL cp_fm_release(fm_coeff_virt)
    1678          18 :       CALL cp_fm_release(fm_coeff_occ)
    1679          18 :       CALL cp_fm_release(fm_dip_intermed)
    1680          18 :       CALL dbcsr_deallocate_matrix_set(matrix_r)
    1681             : 
    1682          18 :       CALL timestop(handle)
    1683             : 
    1684          90 :    END SUBROUTINE get_dipoles_mo
    1685             : 
    1686             : ! **************************************************************************************************
    1687             : !> \brief Computes and returns absorption spectrum for the frequency range and broadening
    1688             : !>    provided by the user.
    1689             : !>    Prelim Ref.: C. Ullrich, Time-Dependent Density-Functional Theory: Concepts and Applications
    1690             : !>    (Oxford University Press, Oxford, 2012), Eq. 7.51
    1691             : !> \param oscill_str ...
    1692             : !> \param Exc_ens ...
    1693             : !> \param info_approximation ...
    1694             : !> \param unit_nr ...
    1695             : !> \param mp2_env ...
    1696             : ! **************************************************************************************************
    1697           0 :    SUBROUTINE compute_absorption_spectrum(oscill_str, Exc_ens, &
    1698             :                                           info_approximation, unit_nr, mp2_env)
    1699             : 
    1700             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
    1701             :          INTENT(IN)                                      :: oscill_str, Exc_ens
    1702             :       CHARACTER(LEN=10)                                  :: info_approximation
    1703             :       INTEGER, INTENT(IN)                                :: unit_nr
    1704             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1705             : 
    1706             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_absorption_spectrum'
    1707             : 
    1708             :       CHARACTER(LEN=10)                                  :: eta_str
    1709             :       CHARACTER(LEN=28)                                  :: file_name_spectrum
    1710             :       INTEGER                                            :: handle, i, j, k, num_steps, unit_nr_file
    1711             :       REAL(KIND=dp)                                      :: eta, freq_end, freq_start, freq_step, &
    1712             :                                                             omega
    1713           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: abs_spectrum
    1714           0 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: eta_list
    1715             : 
    1716           0 :       CALL timeset(routineN, handle)
    1717             : 
    1718           0 :       freq_step = mp2_env%ri_g0w0%bse_spectrum_freq_step_size
    1719           0 :       freq_start = mp2_env%ri_g0w0%bse_spectrum_freq_start
    1720           0 :       freq_end = mp2_env%ri_g0w0%bse_spectrum_freq_end
    1721           0 :       eta_list => mp2_env%ri_g0w0%bse_eta_spectrum_list
    1722             : 
    1723             :       ! Calculate number of steps to fit given frequency range
    1724           0 :       num_steps = NINT((freq_end - freq_start)/freq_step) + 1
    1725             : 
    1726           0 :       DO k = 1, SIZE(eta_list)
    1727           0 :          eta = eta_list(k)
    1728             :          ! Convert to str
    1729           0 :          WRITE (eta_str, '(F6.3)') eta
    1730           0 :          file_name_spectrum = 'BSE'//TRIM(ADJUSTL(info_approximation))//'eta='//TRIM(eta_str)//'.spectrum'
    1731           0 :          ALLOCATE (abs_spectrum(num_steps, 2))
    1732           0 :          abs_spectrum(:, :) = 0.0_dp
    1733             : 
    1734             :          ! Calculate the imaginary part of the mean dipole polarizability α_{avg}(ω)
    1735             :          ! which is given by (cf. C. Ullrichs Book on TDDFT, Eq. 7.51)
    1736             :          ! α_{avg}(ω) = \sum_{n=1}^{N_exc} \frac{f_n}{(ω+iη)² - (Ω^n)²}
    1737             :          ! and then the imaginary part is (in the limit η -> 0)
    1738             :          ! Im[α_{avg}(ω)] = \sum_{n=1}^{N_exc} f_n * η / ((ω - Ω^n)² + η²)
    1739             :          ! where f_n are the oscillator strengths and E_exc the excitation energies
    1740           0 :          DO i = 1, num_steps
    1741           0 :             omega = freq_start + (i - 1)*freq_step
    1742           0 :             abs_spectrum(i, 1) = omega
    1743           0 :             DO j = 1, SIZE(oscill_str)
    1744             :                abs_spectrum(i, 2) = abs_spectrum(i, 2) - oscill_str(j)* &
    1745           0 :                                     AIMAG(1/((omega + CMPLX(0.0, eta, kind=dp))**2 - Exc_ens(j)**2))
    1746             :             END DO
    1747             :          END DO
    1748             :          ! Print it to file
    1749           0 :          unit_nr_file = cp_logger_get_default_unit_nr()
    1750           0 :          IF (unit_nr_file > 0) THEN
    1751             :             CALL open_file(file_name_spectrum, unit_number=unit_nr_file, &
    1752           0 :                            file_status="UNKNOWN", file_action="WRITE")
    1753           0 :             WRITE (unit_nr_file, '(A62,A6)') "# Absorption spectrum from Bethe Salpeter equation for method ", &
    1754           0 :                TRIM(ADJUSTL(info_approximation))
    1755           0 :             DO i = 1, num_steps
    1756           0 :                WRITE (unit_nr_file, '(F17.10, F17.10)') abs_spectrum(i, 1)*evolt, abs_spectrum(i, 2)
    1757             :             END DO
    1758           0 :             CALL close_file(unit_nr_file)
    1759             :          END IF
    1760           0 :          DEALLOCATE (abs_spectrum)
    1761             :       END DO
    1762             : 
    1763           0 :       IF (unit_nr > 0) THEN
    1764           0 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1765             :          WRITE (unit_nr, '(T2,A4,T7,A50,A)') &
    1766           0 :             'BSE|', "Printed optical absorption spectrum to local file ", file_name_spectrum
    1767             :          WRITE (unit_nr, '(T2,A4,T7,A50)') &
    1768           0 :             'BSE|', "using the Eq. 7.51 from C. Ullrichs Book on TDDFT:"
    1769           0 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1770             :          WRITE (unit_nr, '(T2,A4,T10,A73)') &
    1771           0 :             'BSE|', "Im{α_{avg}(ω)} = Im{\sum_{n=1}^{N_exc} \frac{f_n}{(ω+iη)² - (Ω^n)²}}"
    1772           0 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1773             :          WRITE (unit_nr, '(T2,A4,T7,A59)') &
    1774           0 :             'BSE|', "with oscillator strengths f_n and excitation energies Ω^n."
    1775           0 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1776             :       END IF
    1777             : 
    1778           0 :       CALL timestop(handle)
    1779             : 
    1780           0 :    END SUBROUTINE
    1781             : END MODULE

Generated by: LCOV version 1.15