LCOV - code coverage report
Current view: top level - src - bse_util.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:15a58fb) Lines: 604 719 84.0 %
Date: 2025-02-18 08:24:35 Functions: 14 15 93.3 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
      10             : !> \par History
      11             : !>      11.2023 created [Maximilian Graml]
      12             : ! **************************************************************************************************
      13             : MODULE bse_util
      14             :    USE atomic_kind_types,               ONLY: atomic_kind_type
      15             :    USE cell_types,                      ONLY: cell_type
      16             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17             :    USE cp_control_types,                ONLY: dft_control_type
      18             :    USE cp_dbcsr_api,                    ONLY: dbcsr_create,&
      19             :                                               dbcsr_init_p,&
      20             :                                               dbcsr_p_type,&
      21             :                                               dbcsr_set,&
      22             :                                               dbcsr_type_symmetric
      23             :    USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_alloc_block_from_nbl
      24             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply,&
      25             :                                               dbcsr_allocate_matrix_set,&
      26             :                                               dbcsr_deallocate_matrix_set
      27             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_trace,&
      28             :                                               cp_fm_uplo_to_full
      29             :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose,&
      30             :                                               cp_fm_cholesky_invert
      31             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      32             :                                               cp_fm_struct_release,&
      33             :                                               cp_fm_struct_type
      34             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      35             :                                               cp_fm_get_info,&
      36             :                                               cp_fm_release,&
      37             :                                               cp_fm_set_all,&
      38             :                                               cp_fm_to_fm_submat,&
      39             :                                               cp_fm_to_fm_submat_general,&
      40             :                                               cp_fm_type
      41             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      42             :                                               cp_logger_type
      43             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      44             :                                               cp_print_key_unit_nr
      45             :    USE cp_realspace_grid_cube,          ONLY: cp_pw_to_cube
      46             :    USE input_constants,                 ONLY: bse_screening_alpha,&
      47             :                                               bse_screening_rpa,&
      48             :                                               bse_screening_tdhf,&
      49             :                                               use_mom_ref_coac
      50             :    USE input_section_types,             ONLY: section_vals_type
      51             :    USE kinds,                           ONLY: default_path_length,&
      52             :                                               dp,&
      53             :                                               int_8
      54             :    USE message_passing,                 ONLY: mp_para_env_type,&
      55             :                                               mp_request_type
      56             :    USE moments_utils,                   ONLY: get_reference_point
      57             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      58             :                                               mp2_type
      59             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      60             :    USE particle_list_types,             ONLY: particle_list_type
      61             :    USE particle_types,                  ONLY: particle_type
      62             :    USE physcon,                         ONLY: evolt
      63             :    USE pw_env_types,                    ONLY: pw_env_get,&
      64             :                                               pw_env_type
      65             :    USE pw_poisson_types,                ONLY: pw_poisson_type
      66             :    USE pw_pool_types,                   ONLY: pw_pool_p_type,&
      67             :                                               pw_pool_type
      68             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      69             :                                               pw_r3d_rs_type
      70             :    USE qs_collocate_density,            ONLY: calculate_wavefunction
      71             :    USE qs_environment_types,            ONLY: get_qs_env,&
      72             :                                               qs_environment_type
      73             :    USE qs_kind_types,                   ONLY: qs_kind_type
      74             :    USE qs_mo_types,                     ONLY: get_mo_set,&
      75             :                                               mo_set_type
      76             :    USE qs_moments,                      ONLY: build_local_moment_matrix
      77             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      78             :    USE qs_subsys_types,                 ONLY: qs_subsys_get,&
      79             :                                               qs_subsys_type
      80             :    USE rpa_communication,               ONLY: communicate_buffer
      81             :    USE util,                            ONLY: sort,&
      82             :                                               sort_unique
      83             : #include "./base/base_uses.f90"
      84             : 
      85             :    IMPLICIT NONE
      86             : 
      87             :    PRIVATE
      88             : 
      89             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
      90             : 
      91             :    PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, &
      92             :              deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
      93             :              estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices, &
      94             :              determine_cutoff_indices, adapt_BSE_input_params, get_multipoles_mo, &
      95             :              reshuffle_eigvec, print_bse_nto_cubes, trace_exciton_descr
      96             : 
      97             : CONTAINS
      98             : 
      99             : ! **************************************************************************************************
     100             : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
     101             : !> \param fm_mat_S_ij_bse ...
     102             : !> \param fm_mat_S_ia_bse ...
     103             : !> \param fm_mat_S_bar_ia_bse ...
     104             : !> \param fm_mat_S_bar_ij_bse ...
     105             : !> \param fm_mat_Q_static_bse_gemm ...
     106             : !> \param dimen_RI ...
     107             : !> \param homo ...
     108             : !> \param virtual ...
     109             : ! **************************************************************************************************
     110         252 :    SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
     111             :                             fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
     112             :                             dimen_RI, homo, virtual)
     113             : 
     114             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
     115             :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
     116             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_Q_static_bse_gemm
     117             :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
     118             : 
     119             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mult_B_with_W'
     120             : 
     121             :       INTEGER                                            :: handle, i_global, iiB, info_chol, &
     122             :                                                             j_global, jjB, ncol_local, nrow_local
     123          42 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     124             :       TYPE(cp_fm_type)                                   :: fm_work
     125             : 
     126          42 :       CALL timeset(routineN, handle)
     127             : 
     128          42 :       CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
     129          42 :       CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
     130             : 
     131          42 :       CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
     132          42 :       CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
     133             : 
     134          42 :       CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
     135          42 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
     136             : 
     137             :       ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
     138             :       CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
     139             :                           nrow_local=nrow_local, &
     140             :                           ncol_local=ncol_local, &
     141             :                           row_indices=row_indices, &
     142          42 :                           col_indices=col_indices)
     143             : 
     144        3528 :       DO jjB = 1, ncol_local
     145        3486 :          j_global = col_indices(jjB)
     146      148197 :          DO iiB = 1, nrow_local
     147      144669 :             i_global = row_indices(iiB)
     148      148155 :             IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
     149        1743 :                fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
     150             :             END IF
     151             :          END DO
     152             :       END DO
     153             : 
     154             :       ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
     155          42 :       CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
     156             : 
     157          42 :       IF (info_chol /= 0) THEN
     158           0 :          CALL cp_abort(__LOCATION__, 'Cholesky decomposition failed for static polarization in BSE')
     159             :       END IF
     160             : 
     161             :       ! calculate [1+Q(i0)]^-1
     162          42 :       CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
     163             : 
     164             :       ! symmetrize the result
     165          42 :       CALL cp_fm_uplo_to_full(fm_mat_Q_static_bse_gemm, fm_work)
     166             : 
     167             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
     168             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
     169          42 :                          matrix_c=fm_mat_S_bar_ij_bse)
     170             : 
     171             :       ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
     172             :       ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
     173             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
     174             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
     175          42 :                          matrix_c=fm_mat_S_bar_ia_bse)
     176             : 
     177          42 :       CALL cp_fm_release(fm_work)
     178             : 
     179          42 :       CALL timestop(handle)
     180             : 
     181          42 :    END SUBROUTINE
     182             : 
     183             : ! **************************************************************************************************
     184             : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
     185             : !> to A_ia, jb which needs MPI communication.
     186             : !> \param fm_out ...
     187             : !> \param fm_in ...
     188             : !> \param beta ...
     189             : !> \param nrow_secidx_in ...
     190             : !> \param ncol_secidx_in ...
     191             : !> \param nrow_secidx_out ...
     192             : !> \param ncol_secidx_out ...
     193             : !> \param unit_nr ...
     194             : !> \param reordering ...
     195             : !> \param mp2_env ...
     196             : ! **************************************************************************************************
     197         490 :    SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
     198             :                                  nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
     199             : 
     200             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     201             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     202             :       REAL(kind=dp)                                      :: beta
     203             :       INTEGER, INTENT(IN)                                :: nrow_secidx_in, ncol_secidx_in, &
     204             :                                                             nrow_secidx_out, ncol_secidx_out
     205             :       INTEGER                                            :: unit_nr
     206             :       INTEGER, DIMENSION(4)                              :: reordering
     207             :       TYPE(mp2_type), INTENT(IN)                         :: mp2_env
     208             : 
     209             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
     210             : 
     211             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
     212             :          iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, &
     213             :          nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, &
     214             :          send_pcol, send_prow
     215         490 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     216             :                                                             num_entries_send
     217             :       INTEGER, DIMENSION(4)                              :: indices_in
     218         490 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     219         490 :                                                             row_indices_in, row_indices_out
     220             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     221         490 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     222             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     223         490 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     224             : 
     225         490 :       CALL timeset(routineN, handle)
     226         490 :       CALL timeset(routineN//"_1_setup", handle2)
     227             : 
     228         490 :       para_env_out => fm_out%matrix_struct%para_env
     229             :       ! A_iajb
     230             :       ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
     231             :       CALL cp_fm_get_info(matrix=fm_out, &
     232             :                           nrow_local=nrow_local_out, &
     233             :                           ncol_local=ncol_local_out, &
     234             :                           row_indices=row_indices_out, &
     235             :                           col_indices=col_indices_out, &
     236             :                           nrow_block=nrow_block_out, &
     237         490 :                           ncol_block=ncol_block_out)
     238             : 
     239        1470 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     240        1470 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     241             : 
     242        1470 :       num_entries_rec(:) = 0
     243        1470 :       num_entries_send(:) = 0
     244             : 
     245         490 :       dummy = 0
     246             : 
     247             :       CALL cp_fm_get_info(matrix=fm_in, &
     248             :                           nrow_local=nrow_local_in, &
     249             :                           ncol_local=ncol_local_in, &
     250             :                           row_indices=row_indices_in, &
     251             :                           col_indices=col_indices_in, &
     252             :                           nrow_block=nrow_block_in, &
     253         490 :                           ncol_block=ncol_block_in)
     254             : 
     255         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     256          94 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     257         188 :             fm_out%matrix_struct%nrow_global
     258          94 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     259         188 :             fm_out%matrix_struct%ncol_global
     260             : 
     261          94 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     262          94 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     263             : 
     264          94 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     265         188 :             fm_in%matrix_struct%nrow_global
     266          94 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     267         188 :             fm_in%matrix_struct%ncol_global
     268             : 
     269          94 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     270          94 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     271             :       END IF
     272             : 
     273             :       ! Use scalapack wrapper to find process index in fm_out
     274             :       ! To that end, we obtain the global index in fm_out from the level indices
     275         490 :       indices_in(:) = 0
     276        9342 :       DO row_idx_loc = 1, nrow_local_in
     277        8852 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     278        8852 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     279       93294 :          DO col_idx_loc = 1, ncol_local_in
     280       83952 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     281       83952 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     282             : 
     283       83952 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     284       83952 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     285             : 
     286       83952 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     287       83952 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     288             : 
     289       83952 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     290             : 
     291       92804 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     292             : 
     293             :          END DO
     294             :       END DO
     295             : 
     296         490 :       CALL timestop(handle2)
     297             : 
     298         490 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     299         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     300          94 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     301             :       END IF
     302             : 
     303         490 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     304             : 
     305         490 :       CALL timestop(handle2)
     306             : 
     307         490 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     308         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     309          94 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     310             :       END IF
     311             : 
     312             :       ! Buffers for entries and their indices
     313        2450 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     314        2450 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     315             : 
     316             :       ! allocate data message and corresponding indices
     317        1470 :       DO iproc = 0, para_env_out%num_pe - 1
     318             : 
     319        2750 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     320       85422 :          buffer_rec(iproc)%msg = 0.0_dp
     321             : 
     322             :       END DO
     323             : 
     324        1470 :       DO iproc = 0, para_env_out%num_pe - 1
     325             : 
     326        2750 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     327       85422 :          buffer_send(iproc)%msg = 0.0_dp
     328             : 
     329             :       END DO
     330             : 
     331        1470 :       DO iproc = 0, para_env_out%num_pe - 1
     332             : 
     333        2750 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     334      171334 :          buffer_rec(iproc)%indx = 0
     335             : 
     336             :       END DO
     337             : 
     338        1470 :       DO iproc = 0, para_env_out%num_pe - 1
     339             : 
     340        2750 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     341      171334 :          buffer_send(iproc)%indx = 0
     342             : 
     343             :       END DO
     344             : 
     345         490 :       CALL timestop(handle2)
     346             : 
     347         490 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     348         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     349          94 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     350             :       END IF
     351             : 
     352        1470 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     353        1470 :       entry_counter(:) = 0
     354             : 
     355             :       ! Now we can write the actual data and indices to the send-buffer
     356        9342 :       DO row_idx_loc = 1, nrow_local_in
     357        8852 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     358        8852 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     359       93294 :          DO col_idx_loc = 1, ncol_local_in
     360       83952 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     361       83952 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     362             : 
     363       83952 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     364       83952 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     365             : 
     366       83952 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     367       83952 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     368             : 
     369       83952 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     370       83952 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     371             : 
     372             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     373       83952 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     374             : 
     375       83952 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
     376       92804 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     377             : 
     378             :          END DO
     379             :       END DO
     380             : 
     381        7350 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     382             : 
     383         490 :       CALL timestop(handle2)
     384             : 
     385         490 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     386         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     387          94 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     388             :       END IF
     389             : 
     390             :       ! communicate the buffer
     391             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     392         490 :                               buffer_send, req_array)
     393             : 
     394         490 :       CALL timestop(handle2)
     395             : 
     396         490 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     397         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     398          94 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     399             :       END IF
     400             : 
     401             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     402         490 :       nprocs = para_env_out%num_pe
     403             : 
     404             : !$OMP PARALLEL DO DEFAULT(NONE) &
     405             : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) &
     406         490 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     407             :       DO iproc = 0, nprocs - 1
     408             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     409             :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     410             :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     411             : 
     412             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
     413             :          END DO
     414             :       END DO
     415             : !$OMP END PARALLEL DO
     416             : 
     417         490 :       CALL timestop(handle2)
     418             : 
     419         490 :       CALL timeset(routineN//"_7_cleanup", handle2)
     420         490 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     421          94 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     422             :       END IF
     423             : 
     424             :       !Clean up all the arrays from the communication process
     425        1470 :       DO iproc = 0, para_env_out%num_pe - 1
     426         980 :          DEALLOCATE (buffer_rec(iproc)%msg)
     427         980 :          DEALLOCATE (buffer_rec(iproc)%indx)
     428         980 :          DEALLOCATE (buffer_send(iproc)%msg)
     429        1470 :          DEALLOCATE (buffer_send(iproc)%indx)
     430             :       END DO
     431        2450 :       DEALLOCATE (buffer_rec, buffer_send)
     432         490 :       DEALLOCATE (req_array)
     433         490 :       DEALLOCATE (entry_counter)
     434         490 :       DEALLOCATE (num_entries_rec, num_entries_send)
     435             : 
     436         490 :       CALL timestop(handle2)
     437         490 :       CALL timestop(handle)
     438             : 
     439        3430 :    END SUBROUTINE fm_general_add_bse
     440             : 
     441             : ! **************************************************************************************************
     442             : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
     443             : !>  Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in  for the incoming (untruncated) matrix
     444             : !>  and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
     445             : !>  via parallel communication.
     446             : !> \param fm_out ...
     447             : !> \param fm_in ...
     448             : !> \param ncol_in ...
     449             : !> \param nrow_out ...
     450             : !> \param ncol_out ...
     451             : !> \param unit_nr ...
     452             : !> \param mp2_env ...
     453             : !> \param nrow_offset ...
     454             : !> \param ncol_offset ...
     455             : ! **************************************************************************************************
     456         126 :    SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
     457             :                           nrow_out, ncol_out, unit_nr, mp2_env, &
     458             :                           nrow_offset, ncol_offset)
     459             : 
     460             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     461             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     462             :       INTEGER                                            :: ncol_in, nrow_out, ncol_out, unit_nr
     463             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
     464             :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow_offset, ncol_offset
     465             : 
     466             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'truncate_fm'
     467             : 
     468             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
     469             :          idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
     470             :          ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, &
     471             :          nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
     472         126 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     473             :                                                             num_entries_send
     474         126 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     475         126 :                                                             row_indices_in, row_indices_out
     476             :       LOGICAL                                            :: correct_ncol, correct_nrow
     477             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     478         126 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     479             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     480         126 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     481             : 
     482         126 :       CALL timeset(routineN, handle)
     483         126 :       CALL timeset(routineN//"_1_setup", handle2)
     484             : 
     485         126 :       correct_nrow = .FALSE.
     486         126 :       correct_ncol = .FALSE.
     487             :       !In case of truncation in the occupied space, we need to correct the interval of indices
     488         126 :       IF (PRESENT(nrow_offset)) THEN
     489          84 :          correct_nrow = .TRUE.
     490             :       END IF
     491         126 :       IF (PRESENT(ncol_offset)) THEN
     492          42 :          correct_ncol = .TRUE.
     493             :       END IF
     494             : 
     495         126 :       para_env_out => fm_out%matrix_struct%para_env
     496             : 
     497             :       CALL cp_fm_get_info(matrix=fm_out, &
     498             :                           nrow_local=nrow_local_out, &
     499             :                           ncol_local=ncol_local_out, &
     500             :                           row_indices=row_indices_out, &
     501             :                           col_indices=col_indices_out, &
     502             :                           nrow_block=nrow_block_out, &
     503         126 :                           ncol_block=ncol_block_out)
     504             : 
     505         378 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     506         378 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     507             : 
     508         378 :       num_entries_rec(:) = 0
     509         378 :       num_entries_send(:) = 0
     510             : 
     511         126 :       dummy = 0
     512             : 
     513             :       CALL cp_fm_get_info(matrix=fm_in, &
     514             :                           nrow_local=nrow_local_in, &
     515             :                           ncol_local=ncol_local_in, &
     516             :                           row_indices=row_indices_in, &
     517             :                           col_indices=col_indices_in, &
     518             :                           nrow_block=nrow_block_in, &
     519         126 :                           ncol_block=ncol_block_in)
     520             : 
     521         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     522          12 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     523          24 :             fm_out%matrix_struct%nrow_global
     524          12 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     525          24 :             fm_out%matrix_struct%ncol_global
     526             : 
     527          12 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     528          12 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     529             : 
     530          12 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     531          24 :             fm_in%matrix_struct%nrow_global
     532          12 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     533          24 :             fm_in%matrix_struct%ncol_global
     534             : 
     535          12 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     536          12 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     537             :       END IF
     538             : 
     539             :       ! We find global indices in S with nrow_in and ncol_in for truncation
     540       10038 :       DO col_idx_loc = 1, ncol_local_in
     541        9912 :          idx_col_in = col_indices_in(col_idx_loc)
     542             : 
     543        9912 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     544        9912 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     545             : 
     546             :          ! If occupied orbitals are included, these have to be handled differently
     547             :          ! due to their reversed indexing
     548        9912 :          IF (correct_nrow) THEN
     549        3864 :             idx_col_first = idx_col_first - nrow_offset + 1
     550        3864 :             IF (idx_col_first .LE. 0) CYCLE
     551             :          ELSE
     552        6048 :             IF (idx_col_first > nrow_out) EXIT
     553             :          END IF
     554        9912 :          IF (correct_ncol) THEN
     555         672 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     556         672 :             IF (idx_col_sec .LE. 0) CYCLE
     557             :          ELSE
     558        9240 :             IF (idx_col_sec > ncol_out) CYCLE
     559             :          END IF
     560             : 
     561        8736 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     562             : 
     563      371406 :          DO row_idx_loc = 1, nrow_local_in
     564      362544 :             idx_row_in = row_indices_in(row_idx_loc)
     565             : 
     566      362544 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     567      362544 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     568             : 
     569      362544 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     570             : 
     571      372456 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     572             : 
     573             :          END DO
     574             :       END DO
     575             : 
     576         126 :       CALL timestop(handle2)
     577             : 
     578         126 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     579         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     580          12 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     581             :       END IF
     582             : 
     583         126 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     584             : 
     585         126 :       CALL timestop(handle2)
     586             : 
     587         126 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     588         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     589          12 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     590             :       END IF
     591             : 
     592             :       ! Buffers for entries and their indices
     593         630 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     594         630 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     595             : 
     596             :       ! allocate data message and corresponding indices
     597         378 :       DO iproc = 0, para_env_out%num_pe - 1
     598             : 
     599         672 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     600      362922 :          buffer_rec(iproc)%msg = 0.0_dp
     601             : 
     602             :       END DO
     603             : 
     604         378 :       DO iproc = 0, para_env_out%num_pe - 1
     605             : 
     606         672 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     607      362922 :          buffer_send(iproc)%msg = 0.0_dp
     608             : 
     609             :       END DO
     610             : 
     611         378 :       DO iproc = 0, para_env_out%num_pe - 1
     612             : 
     613         672 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     614      725970 :          buffer_rec(iproc)%indx = 0
     615             : 
     616             :       END DO
     617             : 
     618         378 :       DO iproc = 0, para_env_out%num_pe - 1
     619             : 
     620         672 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     621      725970 :          buffer_send(iproc)%indx = 0
     622             : 
     623             :       END DO
     624             : 
     625         126 :       CALL timestop(handle2)
     626             : 
     627         126 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     628         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     629          12 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     630             :       END IF
     631             : 
     632         378 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     633         378 :       entry_counter(:) = 0
     634             : 
     635             :       ! Now we can write the actual data and indices to the send-buffer
     636       10038 :       DO col_idx_loc = 1, ncol_local_in
     637        9912 :          idx_col_in = col_indices_in(col_idx_loc)
     638             : 
     639        9912 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     640        9912 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     641             : 
     642             :          ! If occupied orbitals are included, these have to be handled differently
     643             :          ! due to their reversed indexing
     644        9912 :          IF (correct_nrow) THEN
     645        3864 :             idx_col_first = idx_col_first - nrow_offset + 1
     646        3864 :             IF (idx_col_first .LE. 0) CYCLE
     647             :          ELSE
     648        6048 :             IF (idx_col_first > nrow_out) EXIT
     649             :          END IF
     650        9912 :          IF (correct_ncol) THEN
     651         672 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     652         672 :             IF (idx_col_sec .LE. 0) CYCLE
     653             :          ELSE
     654        9240 :             IF (idx_col_sec > ncol_out) CYCLE
     655             :          END IF
     656             : 
     657        8736 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     658             : 
     659      371406 :          DO row_idx_loc = 1, nrow_local_in
     660      362544 :             idx_row_in = row_indices_in(row_idx_loc)
     661             : 
     662      362544 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     663             : 
     664      362544 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     665             : 
     666      362544 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     667      362544 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     668             : 
     669             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     670      362544 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     671             :             !No need to create row_out, since it is identical to incoming
     672             :             !We dont change the RI index for any fm_mat_XX_BSE
     673      362544 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
     674      372456 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     675             : 
     676             :          END DO
     677             :       END DO
     678             : 
     679        1890 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     680             : 
     681         126 :       CALL timestop(handle2)
     682             : 
     683         126 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     684         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     685          12 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     686             :       END IF
     687             : 
     688             :       ! communicate the buffer
     689             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     690         126 :                               buffer_send, req_array)
     691             : 
     692         126 :       CALL timestop(handle2)
     693             : 
     694         126 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     695         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     696          12 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     697             :       END IF
     698             : 
     699             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     700         126 :       nprocs = para_env_out%num_pe
     701             : 
     702             : !$OMP PARALLEL DO DEFAULT(NONE) &
     703             : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) &
     704         126 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     705             :       DO iproc = 0, nprocs - 1
     706             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     707             :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     708             :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     709             : 
     710             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
     711             :          END DO
     712             :       END DO
     713             : !$OMP END PARALLEL DO
     714             : 
     715         126 :       CALL timestop(handle2)
     716             : 
     717         126 :       CALL timeset(routineN//"_7_cleanup", handle2)
     718         126 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     719          12 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     720             :       END IF
     721             : 
     722             :       !Clean up all the arrays from the communication process
     723         378 :       DO iproc = 0, para_env_out%num_pe - 1
     724         252 :          DEALLOCATE (buffer_rec(iproc)%msg)
     725         252 :          DEALLOCATE (buffer_rec(iproc)%indx)
     726         252 :          DEALLOCATE (buffer_send(iproc)%msg)
     727         378 :          DEALLOCATE (buffer_send(iproc)%indx)
     728             :       END DO
     729         630 :       DEALLOCATE (buffer_rec, buffer_send)
     730         126 :       DEALLOCATE (req_array)
     731         126 :       DEALLOCATE (entry_counter)
     732         126 :       DEALLOCATE (num_entries_rec, num_entries_send)
     733             : 
     734         126 :       CALL timestop(handle2)
     735         126 :       CALL timestop(handle)
     736             : 
     737        1008 :    END SUBROUTINE truncate_fm
     738             : 
     739             : ! **************************************************************************************************
     740             : !> \brief ...
     741             : !> \param fm_mat_S_bar_ia_bse ...
     742             : !> \param fm_mat_S_bar_ij_bse ...
     743             : !> \param fm_mat_S_trunc ...
     744             : !> \param fm_mat_S_ij_trunc ...
     745             : !> \param fm_mat_S_ab_trunc ...
     746             : !> \param fm_mat_Q_static_bse_gemm ...
     747             : !> \param mp2_env ...
     748             : ! **************************************************************************************************
     749          42 :    SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
     750             :                                       fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
     751             :                                       fm_mat_Q_static_bse_gemm, mp2_env)
     752             : 
     753             :       TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
     754             :          fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse_gemm
     755             :       TYPE(mp2_type)                                     :: mp2_env
     756             : 
     757             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
     758             : 
     759             :       INTEGER                                            :: handle
     760             : 
     761          42 :       CALL timeset(routineN, handle)
     762             : 
     763          42 :       CALL cp_fm_release(fm_mat_S_bar_ia_bse)
     764          42 :       CALL cp_fm_release(fm_mat_S_bar_ij_bse)
     765          42 :       CALL cp_fm_release(fm_mat_S_trunc)
     766          42 :       CALL cp_fm_release(fm_mat_S_ij_trunc)
     767          42 :       CALL cp_fm_release(fm_mat_S_ab_trunc)
     768          42 :       CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
     769          42 :       IF (mp2_env%bse%do_nto_analysis) THEN
     770           4 :          DEALLOCATE (mp2_env%bse%bse_nto_state_list_final)
     771             :       END IF
     772             : 
     773          42 :       CALL timestop(handle)
     774             : 
     775          42 :    END SUBROUTINE deallocate_matrices_bse
     776             : 
     777             : ! **************************************************************************************************
     778             : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
     779             : !>  multiplication with the eigenvalues
     780             : !> \param fm_work ...
     781             : !> \param eig_vals ...
     782             : !> \param beta ...
     783             : !> \param gamma ...
     784             : !> \param do_transpose ...
     785             : ! **************************************************************************************************
     786         104 :    SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
     787             : 
     788             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_work
     789             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
     790             :          INTENT(IN)                                      :: eig_vals
     791             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     792             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: gamma
     793             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_transpose
     794             : 
     795             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
     796             : 
     797             :       INTEGER                                            :: handle, i_row_global, ii, j_col_global, &
     798             :                                                             jj, ncol_local, nrow_local
     799          52 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     800             :       LOGICAL                                            :: my_do_transpose
     801             :       REAL(KIND=dp)                                      :: coeff, my_gamma
     802             : 
     803          52 :       CALL timeset(routineN, handle)
     804             : 
     805          52 :       IF (PRESENT(gamma)) THEN
     806          52 :          my_gamma = gamma
     807             :       ELSE
     808             :          my_gamma = 2.0_dp
     809             :       END IF
     810             : 
     811          52 :       IF (PRESENT(do_transpose)) THEN
     812          52 :          my_do_transpose = do_transpose
     813             :       ELSE
     814             :          my_do_transpose = .FALSE.
     815             :       END IF
     816             : 
     817             :       CALL cp_fm_get_info(matrix=fm_work, &
     818             :                           nrow_local=nrow_local, &
     819             :                           ncol_local=ncol_local, &
     820             :                           row_indices=row_indices, &
     821          52 :                           col_indices=col_indices)
     822             : 
     823          52 :       IF (my_do_transpose) THEN
     824        2548 :          DO jj = 1, ncol_local
     825        2496 :             j_col_global = col_indices(jj)
     826       62452 :             DO ii = 1, nrow_local
     827       59904 :                coeff = (eig_vals(j_col_global)**beta)/my_gamma
     828       62400 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     829             :             END DO
     830             :          END DO
     831             :       ELSE
     832           0 :          DO jj = 1, ncol_local
     833           0 :             DO ii = 1, nrow_local
     834           0 :                i_row_global = row_indices(ii)
     835           0 :                coeff = (eig_vals(i_row_global)**beta)/my_gamma
     836           0 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     837             :             END DO
     838             :          END DO
     839             :       END IF
     840             : 
     841          52 :       CALL timestop(handle)
     842             : 
     843          52 :    END SUBROUTINE
     844             : 
     845             : ! **************************************************************************************************
     846             : !> \brief ...
     847             : !> \param idx_prim ...
     848             : !> \param idx_sec ...
     849             : !> \param eigvec_entries ...
     850             : ! **************************************************************************************************
     851        1700 :    SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
     852             : 
     853             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim, idx_sec
     854             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     855             : 
     856             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'sort_excitations'
     857             : 
     858             :       INTEGER                                            :: handle, ii, kk, num_entries, num_mults
     859        1700 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim_work, idx_sec_work, tmp_index
     860             :       LOGICAL                                            :: unique_entries
     861        1700 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries_work
     862             : 
     863        1700 :       CALL timeset(routineN, handle)
     864             : 
     865        1700 :       num_entries = SIZE(idx_prim)
     866             : 
     867        4450 :       ALLOCATE (tmp_index(num_entries))
     868             : 
     869        1700 :       CALL sort(idx_prim, num_entries, tmp_index)
     870             : 
     871        2750 :       ALLOCATE (idx_sec_work(num_entries))
     872        4450 :       ALLOCATE (eigvec_entries_work(num_entries))
     873             : 
     874        4274 :       DO ii = 1, num_entries
     875        2574 :          idx_sec_work(ii) = idx_sec(tmp_index(ii))
     876        4274 :          eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
     877             :       END DO
     878             : 
     879        1700 :       DEALLOCATE (tmp_index)
     880        1700 :       DEALLOCATE (idx_sec)
     881        1700 :       DEALLOCATE (eigvec_entries)
     882             : 
     883        1700 :       CALL MOVE_ALLOC(idx_sec_work, idx_sec)
     884        1700 :       CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
     885             : 
     886             :       !Now check for multiple entries in first idx to check necessity of sorting in second idx
     887        1700 :       CALL sort_unique(idx_prim, unique_entries)
     888        1700 :       IF (.NOT. unique_entries) THEN
     889         508 :          ALLOCATE (idx_prim_work(num_entries))
     890        1452 :          idx_prim_work(:) = idx_prim(:)
     891             :          ! Find duplicate entries in idx_prim
     892        1452 :          DO ii = 1, num_entries
     893        1198 :             IF (idx_prim_work(ii) == 0) CYCLE
     894        4908 :             num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
     895         804 :             IF (num_mults > 1) THEN
     896             :                !Set all duplicate entries to 0
     897        1034 :                idx_prim_work(ii:ii + num_mults - 1) = 0
     898             :                !Start sorting in secondary index
     899         960 :                ALLOCATE (idx_sec_work(num_mults))
     900         960 :                ALLOCATE (eigvec_entries_work(num_mults))
     901        1034 :                idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
     902        1034 :                eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
     903         640 :                ALLOCATE (tmp_index(num_mults))
     904         320 :                CALL sort(idx_sec_work, num_mults, tmp_index)
     905             : 
     906             :                !Now write newly sorted indices to original arrays
     907        1034 :                DO kk = ii, ii + num_mults - 1
     908         714 :                   idx_sec(kk) = idx_sec_work(kk - ii + 1)
     909        1034 :                   eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
     910             :                END DO
     911             :                !Deallocate work arrays
     912         320 :                DEALLOCATE (tmp_index)
     913         320 :                DEALLOCATE (idx_sec_work)
     914         320 :                DEALLOCATE (eigvec_entries_work)
     915             :             END IF
     916        1452 :             idx_prim_work(ii) = idx_prim(ii)
     917             :          END DO
     918         254 :          DEALLOCATE (idx_prim_work)
     919             :       END IF
     920             : 
     921        1700 :       CALL timestop(handle)
     922             : 
     923        5100 :    END SUBROUTINE sort_excitations
     924             : 
     925             : ! **************************************************************************************************
     926             : !> \brief Roughly estimates the needed runtime and memory during the BSE run
     927             : !> \param homo_red ...
     928             : !> \param virtual_red ...
     929             : !> \param unit_nr ...
     930             : !> \param bse_abba ...
     931             : !> \param para_env ...
     932             : !> \param diag_runtime_est ...
     933             : ! **************************************************************************************************
     934          42 :    SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
     935             :                                      para_env, diag_runtime_est)
     936             : 
     937             :       INTEGER                                            :: homo_red, virtual_red, unit_nr
     938             :       LOGICAL                                            :: bse_abba
     939             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     940             :       REAL(KIND=dp)                                      :: diag_runtime_est
     941             : 
     942             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
     943             : 
     944             :       INTEGER                                            :: handle, num_BSE_matrices
     945             :       INTEGER(KIND=int_8)                                :: full_dim
     946             :       REAL(KIND=dp)                                      :: mem_est, mem_est_per_rank
     947             : 
     948          42 :       CALL timeset(routineN, handle)
     949             : 
     950             :       ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
     951          42 :       num_BSE_matrices = 2
     952             :       ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
     953             :       ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
     954          42 :       IF (bse_abba) THEN
     955          26 :          num_BSE_matrices = 10
     956             :       END IF
     957             : 
     958          42 :       full_dim = (INT(homo_red, KIND=int_8)**2*INT(virtual_red, KIND=int_8)**2)*INT(num_BSE_matrices, KIND=int_8)
     959          42 :       mem_est = REAL(8*full_dim, KIND=dp)/REAL(1024**3, KIND=dp)
     960          42 :       mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
     961             : 
     962          42 :       IF (unit_nr > 0) THEN
     963             :          ! WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
     964             :          !    mem_est
     965          21 :          WRITE (unit_nr, '(T2,A4,T7,A40,T68,ES13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
     966          42 :             mem_est
     967          21 :          WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
     968          42 :             mem_est_per_rank
     969          21 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
     970             :       END IF
     971             :       ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
     972             :       ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
     973             :       diag_runtime_est = REAL(INT(homo_red, KIND=int_8)*INT(virtual_red, KIND=int_8)/11000_int_8, KIND=dp)**3* &
     974          42 :                          10*32/REAL(para_env%num_pe, KIND=dp)
     975             : 
     976          42 :       CALL timestop(handle)
     977             : 
     978          42 :    END SUBROUTINE estimate_BSE_resources
     979             : 
     980             : ! **************************************************************************************************
     981             : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
     982             : !> singleparticle basis
     983             : !> \param fm_eigvec ...
     984             : !> \param idx_homo ...
     985             : !> \param idx_virt ...
     986             : !> \param eigvec_entries ...
     987             : !> \param i_exc ...
     988             : !> \param virtual ...
     989             : !> \param num_entries ...
     990             : !> \param mp2_env ...
     991             : ! **************************************************************************************************
     992        1700 :    SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
     993             :                                     i_exc, virtual, num_entries, mp2_env)
     994             : 
     995             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
     996             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_homo, idx_virt
     997             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     998             :       INTEGER                                            :: i_exc, virtual, num_entries
     999             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1000             : 
    1001             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
    1002             : 
    1003             :       INTEGER                                            :: eigvec_idx, handle, ii, iproc, jj, kk, &
    1004             :                                                             ncol_local, nrow_local, &
    1005             :                                                             num_entries_local
    1006             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_entries_to_comm
    1007        1700 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1008             :       REAL(KIND=dp)                                      :: eigvec_entry
    1009             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1010        1700 :          DIMENSION(:)                                    :: buffer_entries
    1011             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1012             : 
    1013        1700 :       CALL timeset(routineN, handle)
    1014             : 
    1015        1700 :       para_env => fm_eigvec%matrix_struct%para_env
    1016             : 
    1017             :       CALL cp_fm_get_info(matrix=fm_eigvec, &
    1018             :                           nrow_local=nrow_local, &
    1019             :                           ncol_local=ncol_local, &
    1020             :                           row_indices=row_indices, &
    1021        1700 :                           col_indices=col_indices)
    1022             : 
    1023        5100 :       ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
    1024        5100 :       num_entries_to_comm(:) = 0
    1025             : 
    1026       83300 :       DO jj = 1, ncol_local
    1027             :          !First check if i is localized on this proc
    1028       81600 :          IF (col_indices(jj) /= i_exc) THEN
    1029             :             CYCLE
    1030             :          END IF
    1031       44200 :          DO ii = 1, nrow_local
    1032       40800 :             eigvec_idx = row_indices(ii)
    1033       40800 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1034      122400 :             IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
    1035        1287 :                num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
    1036             :             END IF
    1037             :          END DO
    1038             :       END DO
    1039             : 
    1040             :       !Gather number of entries of other processes
    1041        1700 :       CALL para_env%sum(num_entries_to_comm)
    1042             : 
    1043        1700 :       num_entries_local = num_entries_to_comm(para_env%mepos)
    1044             : 
    1045        8500 :       ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
    1046             : 
    1047        5100 :       DO iproc = 0, para_env%num_pe - 1
    1048        8454 :          ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
    1049        8454 :          ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
    1050        5974 :          buffer_entries(iproc)%msg = 0.0_dp
    1051       17048 :          buffer_entries(iproc)%indx = 0
    1052             :       END DO
    1053             : 
    1054             :       kk = 1
    1055       83300 :       DO jj = 1, ncol_local
    1056             :          !First check if i is localized on this proc
    1057       81600 :          IF (col_indices(jj) /= i_exc) THEN
    1058             :             CYCLE
    1059             :          END IF
    1060       44200 :          DO ii = 1, nrow_local
    1061       40800 :             eigvec_idx = row_indices(ii)
    1062       40800 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1063      122400 :             IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
    1064        1287 :                buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
    1065        1287 :                buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
    1066        1287 :                buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
    1067        1287 :                kk = kk + 1
    1068             :             END IF
    1069             :          END DO
    1070             :       END DO
    1071             : 
    1072        5100 :       DO iproc = 0, para_env%num_pe - 1
    1073        3400 :          CALL para_env%sum(buffer_entries(iproc)%msg)
    1074        5100 :          CALL para_env%sum(buffer_entries(iproc)%indx)
    1075             :       END DO
    1076             : 
    1077             :       !Now sum up gathered information
    1078        5100 :       num_entries = SUM(num_entries_to_comm)
    1079        4450 :       ALLOCATE (idx_homo(num_entries))
    1080        2750 :       ALLOCATE (idx_virt(num_entries))
    1081        4450 :       ALLOCATE (eigvec_entries(num_entries))
    1082             : 
    1083        1700 :       kk = 1
    1084        5100 :       DO iproc = 0, para_env%num_pe - 1
    1085        5100 :          IF (num_entries_to_comm(iproc) /= 0) THEN
    1086        4228 :             DO ii = 1, num_entries_to_comm(iproc)
    1087        2574 :                idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
    1088        2574 :                idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
    1089        2574 :                eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
    1090        4228 :                kk = kk + 1
    1091             :             END DO
    1092             :          END IF
    1093             :       END DO
    1094             : 
    1095             :       !Deallocate all the used arrays
    1096        5100 :       DO iproc = 0, para_env%num_pe - 1
    1097        3400 :          DEALLOCATE (buffer_entries(iproc)%msg)
    1098        5100 :          DEALLOCATE (buffer_entries(iproc)%indx)
    1099             :       END DO
    1100        6800 :       DEALLOCATE (buffer_entries)
    1101        1700 :       DEALLOCATE (num_entries_to_comm)
    1102        1700 :       NULLIFY (row_indices)
    1103        1700 :       NULLIFY (col_indices)
    1104             : 
    1105             :       !Now sort the results according to the involved singleparticle orbitals
    1106             :       ! (homo first, then virtual)
    1107        1700 :       CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
    1108             : 
    1109        1700 :       CALL timestop(handle)
    1110             : 
    1111        1700 :    END SUBROUTINE
    1112             : 
    1113             : ! **************************************************************************************************
    1114             : !> \brief Reads cutoffs for BSE from mp2_env and compares to energies in Eigenval to extract
    1115             : !>        reduced homo/virtual and
    1116             : !> \param Eigenval array (1d) with energies, can be e.g. from GW or DFT
    1117             : !> \param homo Total number of occupied orbitals
    1118             : !> \param virtual Total number of unoccupied orbitals
    1119             : !> \param homo_red Total number of occupied orbitals to include after cutoff
    1120             : !> \param virt_red Total number of unoccupied orbitals to include after ctuoff
    1121             : !> \param homo_incl First occupied index to include after cutoff
    1122             : !> \param virt_incl Last unoccupied index to include after cutoff
    1123             : !> \param mp2_env ...
    1124             : ! **************************************************************************************************
    1125          84 :    SUBROUTINE determine_cutoff_indices(Eigenval, &
    1126             :                                        homo, virtual, &
    1127             :                                        homo_red, virt_red, &
    1128             :                                        homo_incl, virt_incl, &
    1129             :                                        mp2_env)
    1130             : 
    1131             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
    1132             :       INTEGER, INTENT(IN)                                :: homo, virtual
    1133             :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red, homo_incl, virt_incl
    1134             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1135             : 
    1136             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'determine_cutoff_indices'
    1137             : 
    1138             :       INTEGER                                            :: handle, i_homo, j_virt
    1139             : 
    1140          84 :       CALL timeset(routineN, handle)
    1141             :       ! Determine index in homo and virtual for truncation
    1142             :       ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
    1143          84 :       IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
    1144             :          IF (-mp2_env%bse%bse_cutoff_occ .LT. Eigenval(1) - Eigenval(homo) &
    1145          84 :              .OR. mp2_env%bse%bse_cutoff_occ < 0) THEN
    1146          84 :             homo_red = homo
    1147          84 :             homo_incl = 1
    1148             :          ELSE
    1149           0 :             homo_incl = 1
    1150           0 :             DO i_homo = 1, homo
    1151           0 :                IF (Eigenval(i_homo) - Eigenval(homo) .GT. -mp2_env%bse%bse_cutoff_occ) THEN
    1152           0 :                   homo_incl = i_homo
    1153           0 :                   EXIT
    1154             :                END IF
    1155             :             END DO
    1156           0 :             homo_red = homo - homo_incl + 1
    1157             :          END IF
    1158             : 
    1159             :          IF (mp2_env%bse%bse_cutoff_empty .GT. Eigenval(homo + virtual) - Eigenval(homo + 1) &
    1160          84 :              .OR. mp2_env%bse%bse_cutoff_empty < 0) THEN
    1161           0 :             virt_red = virtual
    1162           0 :             virt_incl = virtual
    1163             :          ELSE
    1164          84 :             virt_incl = homo + 1
    1165        1092 :             DO j_virt = 1, virtual
    1166        1092 :                IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) .GT. mp2_env%bse%bse_cutoff_empty) THEN
    1167          84 :                   virt_incl = j_virt - 1
    1168          84 :                   EXIT
    1169             :                END IF
    1170             :             END DO
    1171          84 :             virt_red = virt_incl
    1172             :          END IF
    1173             :       ELSE
    1174           0 :          homo_red = homo
    1175           0 :          virt_red = virtual
    1176           0 :          homo_incl = 1
    1177           0 :          virt_incl = virtual
    1178             :       END IF
    1179             : 
    1180          84 :       CALL timestop(handle)
    1181             : 
    1182          84 :    END SUBROUTINE
    1183             : 
    1184             : ! **************************************************************************************************
    1185             : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
    1186             : !> \param fm_mat_S_ia_bse ...
    1187             : !> \param fm_mat_S_ij_bse ...
    1188             : !> \param fm_mat_S_ab_bse ...
    1189             : !> \param fm_mat_S_trunc ...
    1190             : !> \param fm_mat_S_ij_trunc ...
    1191             : !> \param fm_mat_S_ab_trunc ...
    1192             : !> \param Eigenval_scf ...
    1193             : !> \param Eigenval ...
    1194             : !> \param Eigenval_reduced ...
    1195             : !> \param homo ...
    1196             : !> \param virtual ...
    1197             : !> \param dimen_RI ...
    1198             : !> \param unit_nr ...
    1199             : !> \param bse_lev_virt ...
    1200             : !> \param homo_red ...
    1201             : !> \param virt_red ...
    1202             : !> \param mp2_env ...
    1203             : ! **************************************************************************************************
    1204         210 :    SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
    1205             :                                     fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
    1206          42 :                                     Eigenval_scf, Eigenval, Eigenval_reduced, &
    1207             :                                     homo, virtual, dimen_RI, unit_nr, &
    1208             :                                     bse_lev_virt, &
    1209             :                                     homo_red, virt_red, &
    1210             :                                     mp2_env)
    1211             : 
    1212             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
    1213             :                                                             fm_mat_S_ab_bse
    1214             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
    1215             :                                                             fm_mat_S_ab_trunc
    1216             :       REAL(KIND=dp), DIMENSION(:)                        :: Eigenval_scf, Eigenval
    1217             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: Eigenval_reduced
    1218             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_RI, unit_nr, &
    1219             :                                                             bse_lev_virt
    1220             :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red
    1221             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1222             : 
    1223             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
    1224             : 
    1225             :       INTEGER                                            :: handle, homo_incl, virt_incl
    1226             :       TYPE(cp_blacs_env_type), POINTER                   :: context
    1227             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ab, fm_struct_ia, fm_struct_ij
    1228             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1229             : 
    1230          42 :       CALL timeset(routineN, handle)
    1231             : 
    1232             :       ! Determine index in homo and virtual for truncation
    1233             :       ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
    1234             : 
    1235             :       CALL determine_cutoff_indices(Eigenval_scf, &
    1236             :                                     homo, virtual, &
    1237             :                                     homo_red, virt_red, &
    1238             :                                     homo_incl, virt_incl, &
    1239          42 :                                     mp2_env)
    1240             : 
    1241          42 :       IF (unit_nr > 0) THEN
    1242          21 :          IF (mp2_env%bse%bse_cutoff_occ > 0) THEN
    1243          21 :             WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
    1244          42 :                mp2_env%bse%bse_cutoff_occ*evolt
    1245             :          ELSE
    1246           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
    1247             :          END IF
    1248          21 :          IF (mp2_env%bse%bse_cutoff_empty > 0) THEN
    1249          21 :             WRITE (unit_nr, '(T2,A4,T7,A26,T71,F10.3)') 'BSE|', 'Cutoff empty orbitals [eV]', &
    1250          42 :                mp2_env%bse%bse_cutoff_empty*evolt
    1251             :          ELSE
    1252           0 :             WRITE (unit_nr, '(T2,A4,T7,A34)') 'BSE|', 'No cutoff given for empty orbitals'
    1253             :          END IF
    1254          21 :          WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
    1255          21 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Last empty index (not MO index!)', virt_incl
    1256          21 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
    1257          21 :          WRITE (unit_nr, '(T2,A4,T7,A31,T71,F10.3)') 'BSE|', 'Energy of last empty index [eV]', Eigenval(homo + virt_incl)*evolt
    1258          21 :          WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
    1259          42 :             -(Eigenval(homo_incl) - Eigenval(homo))*evolt
    1260          21 :          WRITE (unit_nr, '(T2,A4,T7,A50,T71,F10.3)') 'BSE|', 'Energy difference of last empty index to LUMO [eV]', &
    1261          42 :             (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
    1262          21 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
    1263          21 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Number of GW-corrected empty MOs', mp2_env%ri_g0w0%corr_mos_virt
    1264          21 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1265             :       END IF
    1266          42 :       IF (unit_nr > 0) THEN
    1267          21 :          IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
    1268           0 :             CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
    1269             :          END IF
    1270          21 :          IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
    1271           0 :             CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
    1272             :          END IF
    1273             :       END IF
    1274             :       !Truncate full fm_S matrices
    1275             :       !Allocate new truncated matrices of proper size
    1276          42 :       para_env => fm_mat_S_ia_bse%matrix_struct%para_env
    1277          42 :       context => fm_mat_S_ia_bse%matrix_struct%context
    1278             : 
    1279          42 :       CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
    1280          42 :       CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
    1281          42 :       CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
    1282             : 
    1283          42 :       CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, "fm_S_trunc")
    1284          42 :       CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
    1285          42 :       CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
    1286             : 
    1287             :       !Copy parts of original matrices to truncated ones
    1288          42 :       IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
    1289             :          !Truncate eigenvals
    1290         126 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1291             :          ! Include USE_KS_ENERGIES input
    1292          42 :          IF (mp2_env%bse%use_ks_energies) THEN
    1293           0 :             Eigenval_reduced(:) = Eigenval_scf(homo_incl:homo + virt_incl)
    1294             :          ELSE
    1295         714 :             Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
    1296             :          END IF
    1297             : 
    1298             :          CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
    1299             :                           homo_red, virt_red, unit_nr, mp2_env, &
    1300          42 :                           nrow_offset=homo_incl)
    1301             :          CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
    1302             :                           homo_red, homo_red, unit_nr, mp2_env, &
    1303          42 :                           homo_incl, homo_incl)
    1304             :          CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, bse_lev_virt, &
    1305          42 :                           virt_red, virt_red, unit_nr, mp2_env)
    1306             : 
    1307             :       ELSE
    1308           0 :          IF (unit_nr > 0) THEN
    1309           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
    1310           0 :             WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1311             :          END IF
    1312           0 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1313             :          ! Include USE_KS_ENERGIES input
    1314           0 :          IF (mp2_env%bse%use_ks_energies) THEN
    1315           0 :             Eigenval_reduced(:) = Eigenval_scf(:)
    1316             :          ELSE
    1317           0 :             Eigenval_reduced(:) = Eigenval(:)
    1318             :          END IF
    1319             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
    1320           0 :                                          1, 1, 1, 1, context)
    1321             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
    1322           0 :                                          1, 1, 1, 1, context)
    1323             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
    1324           0 :                                          1, 1, 1, 1, context)
    1325             :       END IF
    1326             : 
    1327          42 :       CALL cp_fm_struct_release(fm_struct_ia)
    1328          42 :       CALL cp_fm_struct_release(fm_struct_ij)
    1329          42 :       CALL cp_fm_struct_release(fm_struct_ab)
    1330             : 
    1331          42 :       NULLIFY (para_env)
    1332          42 :       NULLIFY (context)
    1333             : 
    1334          42 :       CALL timestop(handle)
    1335             : 
    1336          42 :    END SUBROUTINE truncate_BSE_matrices
    1337             : 
    1338             : ! **************************************************************************************************
    1339             : !> \brief ...
    1340             : !> \param fm_eigvec ...
    1341             : !> \param fm_eigvec_reshuffled ...
    1342             : !> \param homo ...
    1343             : !> \param virtual ...
    1344             : !> \param n_exc ...
    1345             : !> \param do_transpose ...
    1346             : !> \param unit_nr ...
    1347             : !> \param mp2_env ...
    1348             : ! **************************************************************************************************
    1349         900 :    SUBROUTINE reshuffle_eigvec(fm_eigvec, fm_eigvec_reshuffled, homo, virtual, n_exc, do_transpose, &
    1350             :                                unit_nr, mp2_env)
    1351             : 
    1352             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
    1353             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_eigvec_reshuffled
    1354             :       INTEGER, INTENT(IN)                                :: homo, virtual, n_exc
    1355             :       LOGICAL, INTENT(IN)                                :: do_transpose
    1356             :       INTEGER, INTENT(IN)                                :: unit_nr
    1357             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1358             : 
    1359             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'reshuffle_eigvec'
    1360             : 
    1361             :       INTEGER                                            :: handle, my_m_col, my_n_row
    1362             :       INTEGER, DIMENSION(4)                              :: reordering
    1363             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_eigvec_col, &
    1364             :                                                             fm_struct_eigvec_reshuffled
    1365             :       TYPE(cp_fm_type)                                   :: fm_eigvec_col
    1366             : 
    1367         300 :       CALL timeset(routineN, handle)
    1368             : 
    1369             :       ! Define reordering:
    1370             :       ! (ia,11) to (a1,i1) for transposition
    1371             :       ! (ia,11) to (i1,a1) for default
    1372         300 :       IF (do_transpose) THEN
    1373          50 :          reordering = (/2, 3, 1, 4/)
    1374          50 :          my_n_row = virtual
    1375          50 :          my_m_col = homo
    1376             :       ELSE
    1377         250 :          reordering = (/1, 3, 2, 4/)
    1378         250 :          my_n_row = homo
    1379         250 :          my_m_col = virtual
    1380             :       END IF
    1381             : 
    1382             :       CALL cp_fm_struct_create(fm_struct_eigvec_col, &
    1383             :                                fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
    1384         300 :                                homo*virtual, 1)
    1385             :       CALL cp_fm_struct_create(fm_struct_eigvec_reshuffled, &
    1386             :                                fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
    1387         300 :                                my_n_row, my_m_col)
    1388             : 
    1389             :       ! Resort indices
    1390         300 :       CALL cp_fm_create(fm_eigvec_col, fm_struct_eigvec_col, name="BSE_column_vector")
    1391         300 :       CALL cp_fm_set_all(fm_eigvec_col, 0.0_dp)
    1392         300 :       CALL cp_fm_create(fm_eigvec_reshuffled, fm_struct_eigvec_reshuffled, name="BSE_reshuffled_eigenvector")
    1393         300 :       CALL cp_fm_set_all(fm_eigvec_reshuffled, 0.0_dp)
    1394             :       ! Fill matrix
    1395             :       CALL cp_fm_to_fm_submat(fm_eigvec, fm_eigvec_col, &
    1396             :                               homo*virtual, 1, &
    1397             :                               1, n_exc, &
    1398         300 :                               1, 1)
    1399             :       ! Reshuffle
    1400             :       CALL fm_general_add_bse(fm_eigvec_reshuffled, fm_eigvec_col, 1.0_dp, &
    1401             :                               virtual, 1, &
    1402             :                               1, 1, &
    1403         300 :                               unit_nr, reordering, mp2_env)
    1404             : 
    1405         300 :       CALL cp_fm_release(fm_eigvec_col)
    1406         300 :       CALL cp_fm_struct_release(fm_struct_eigvec_col)
    1407         300 :       CALL cp_fm_struct_release(fm_struct_eigvec_reshuffled)
    1408             : 
    1409         300 :       CALL timestop(handle)
    1410             : 
    1411         300 :    END SUBROUTINE reshuffle_eigvec
    1412             : 
    1413             : ! **************************************************************************************************
    1414             : !> \brief Borrowed from the tddfpt module with slight adaptions
    1415             : !> \param qs_env ...
    1416             : !> \param mos ...
    1417             : !> \param istate ...
    1418             : !> \param info_approximation ...
    1419             : !> \param stride ...
    1420             : !> \param append_cube ...
    1421             : !> \param print_section ...
    1422             : ! **************************************************************************************************
    1423           0 :    SUBROUTINE print_bse_nto_cubes(qs_env, mos, istate, info_approximation, &
    1424             :                                   stride, append_cube, print_section)
    1425             : 
    1426             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1427             :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN)        :: mos
    1428             :       INTEGER, INTENT(IN)                                :: istate
    1429             :       CHARACTER(LEN=10)                                  :: info_approximation
    1430             :       INTEGER, DIMENSION(:), POINTER                     :: stride
    1431             :       LOGICAL, INTENT(IN)                                :: append_cube
    1432             :       TYPE(section_vals_type), POINTER                   :: print_section
    1433             : 
    1434             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'print_bse_nto_cubes'
    1435             : 
    1436             :       CHARACTER(LEN=default_path_length)                 :: filename, info_approx_trunc, &
    1437             :                                                             my_pos_cube, title
    1438             :       INTEGER                                            :: handle, i, iset, nmo, unit_nr_cube
    1439             :       LOGICAL                                            :: mpi_io
    1440           0 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    1441             :       TYPE(cell_type), POINTER                           :: cell
    1442             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    1443             :       TYPE(cp_logger_type), POINTER                      :: logger
    1444             :       TYPE(dft_control_type), POINTER                    :: dft_control
    1445             :       TYPE(particle_list_type), POINTER                  :: particles
    1446           0 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1447             :       TYPE(pw_c1d_gs_type)                               :: wf_g
    1448             :       TYPE(pw_env_type), POINTER                         :: pw_env
    1449           0 :       TYPE(pw_pool_p_type), DIMENSION(:), POINTER        :: pw_pools
    1450             :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
    1451             :       TYPE(pw_r3d_rs_type)                               :: wf_r
    1452           0 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1453             :       TYPE(qs_subsys_type), POINTER                      :: subsys
    1454             : 
    1455           0 :       logger => cp_get_default_logger()
    1456           0 :       CALL timeset(routineN, handle)
    1457             : 
    1458           0 :       CALL get_qs_env(qs_env=qs_env, dft_control=dft_control, pw_env=pw_env)
    1459           0 :       CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool, pw_pools=pw_pools)
    1460           0 :       CALL auxbas_pw_pool%create_pw(wf_r)
    1461           0 :       CALL auxbas_pw_pool%create_pw(wf_g)
    1462             : 
    1463           0 :       CALL get_qs_env(qs_env, subsys=subsys)
    1464           0 :       CALL qs_subsys_get(subsys, particles=particles)
    1465             : 
    1466           0 :       my_pos_cube = "REWIND"
    1467           0 :       IF (append_cube) THEN
    1468           0 :          my_pos_cube = "APPEND"
    1469             :       END IF
    1470             : 
    1471             :       CALL get_qs_env(qs_env=qs_env, &
    1472             :                       atomic_kind_set=atomic_kind_set, &
    1473             :                       qs_kind_set=qs_kind_set, &
    1474             :                       cell=cell, &
    1475           0 :                       particle_set=particle_set)
    1476             : 
    1477           0 :       DO iset = 1, 2
    1478           0 :          CALL get_mo_set(mo_set=mos(iset), mo_coeff=mo_coeff, nmo=nmo)
    1479           0 :          DO i = 1, nmo
    1480             :             CALL calculate_wavefunction(mo_coeff, i, wf_r, wf_g, atomic_kind_set, qs_kind_set, &
    1481           0 :                                         cell, dft_control, particle_set, pw_env)
    1482           0 :             IF (iset == 1) THEN
    1483           0 :                WRITE (filename, '(A6,I3.3,A5,I2.2,a11)') "_NEXC_", istate, "_NTO_", i, "_Hole_State"
    1484           0 :             ELSEIF (iset == 2) THEN
    1485           0 :                WRITE (filename, '(A6,I3.3,A5,I2.2,a15)') "_NEXC_", istate, "_NTO_", i, "_Particle_State"
    1486             :             END IF
    1487           0 :             info_approx_trunc = TRIM(ADJUSTL(info_approximation))
    1488           0 :             info_approx_trunc = info_approx_trunc(2:LEN_TRIM(info_approx_trunc) - 1)
    1489           0 :             filename = TRIM(info_approx_trunc)//TRIM(filename)
    1490           0 :             mpi_io = .TRUE.
    1491             :             unit_nr_cube = cp_print_key_unit_nr(logger, print_section, '', extension=".cube", &
    1492             :                                                 middle_name=TRIM(filename), file_position=my_pos_cube, &
    1493           0 :                                                 log_filename=.FALSE., ignore_should_output=.TRUE., mpi_io=mpi_io)
    1494           0 :             IF (iset == 1) THEN
    1495           0 :                WRITE (title, *) "Natural Transition Orbital Hole State", i
    1496           0 :             ELSEIF (iset == 2) THEN
    1497           0 :                WRITE (title, *) "Natural Transition Orbital Particle State", i
    1498             :             END IF
    1499           0 :             CALL cp_pw_to_cube(wf_r, unit_nr_cube, title, particles=particles, stride=stride, mpi_io=mpi_io)
    1500             :             CALL cp_print_key_finished_output(unit_nr_cube, logger, print_section, '', &
    1501           0 :                                               ignore_should_output=.TRUE., mpi_io=mpi_io)
    1502             :          END DO
    1503             :       END DO
    1504             : 
    1505           0 :       CALL auxbas_pw_pool%give_back_pw(wf_g)
    1506           0 :       CALL auxbas_pw_pool%give_back_pw(wf_r)
    1507             : 
    1508           0 :       CALL timestop(handle)
    1509           0 :    END SUBROUTINE print_bse_nto_cubes
    1510             : 
    1511             : ! **************************************************************************************************
    1512             : !> \brief Checks BSE input section and adapts them if necessary
    1513             : !> \param homo ...
    1514             : !> \param virtual ...
    1515             : !> \param unit_nr ...
    1516             : !> \param mp2_env ...
    1517             : !> \param qs_env ...
    1518             : ! **************************************************************************************************
    1519          42 :    SUBROUTINE adapt_BSE_input_params(homo, virtual, unit_nr, mp2_env, qs_env)
    1520             : 
    1521             :       INTEGER, INTENT(IN)                                :: homo, virtual, unit_nr
    1522             :       TYPE(mp2_type)                                     :: mp2_env
    1523             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1524             : 
    1525             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'adapt_BSE_input_params'
    1526             : 
    1527             :       INTEGER                                            :: handle, i, j, n, ndim_periodic_cell, &
    1528             :                                                             ndim_periodic_poisson, &
    1529             :                                                             num_state_list_exceptions
    1530             :       TYPE(cell_type), POINTER                           :: cell_ref
    1531             :       TYPE(pw_env_type), POINTER                         :: pw_env
    1532             :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
    1533             : 
    1534          42 :       CALL timeset(routineN, handle)
    1535             :       ! Get environment infos for later usage
    1536          42 :       NULLIFY (pw_env, cell_ref, poisson_env)
    1537          42 :       CALL get_qs_env(qs_env, pw_env=pw_env, cell_ref=cell_ref)
    1538          42 :       CALL pw_env_get(pw_env, poisson_env=poisson_env)
    1539         168 :       ndim_periodic_poisson = COUNT(poisson_env%parameters%periodic == 1)
    1540         168 :       ndim_periodic_cell = SUM(cell_ref%perd(1:3)) ! Borrowed from cell_methods.F/write_cell_low
    1541             : 
    1542             :       ! Handle negative NUM_PRINT_EXC
    1543          42 :       IF (mp2_env%bse%num_print_exc < 0 .OR. &
    1544             :           mp2_env%bse%num_print_exc > homo*virtual) THEN
    1545           0 :          mp2_env%bse%num_print_exc = homo*virtual
    1546           0 :          IF (unit_nr > 0) THEN
    1547             :             CALL cp_hint(__LOCATION__, &
    1548             :                          "Keyword NUM_PRINT_EXC is either negative or too large. "// &
    1549           0 :                          "Printing all computed excitations.")
    1550             :          END IF
    1551             :       END IF
    1552             : 
    1553             :       ! Default to NUM_PRINT_EXC if too large or negative,
    1554             :       ! but only if NTOs are called - would be confusing for the user otherwise
    1555             :       ! Prepare and adapt user inputs for NTO analysis
    1556             :       ! Logic: Explicit state list overrides NUM_PRINT_EXC_NTOS
    1557             :       !        If only NUM_PRINT_EXC_NTOS is given, we write the array 1,...,NUM_PRINT_EXC_NTOS to
    1558             :       !        bse_nto_state_list
    1559          42 :       IF (mp2_env%bse%do_nto_analysis) THEN
    1560           4 :          IF (mp2_env%bse%explicit_nto_list) THEN
    1561           0 :             IF (mp2_env%bse%num_print_exc_ntos > 0) THEN
    1562           0 :                IF (unit_nr > 0) THEN
    1563             :                   CALL cp_hint(__LOCATION__, &
    1564             :                                "Keywords NUM_PRINT_EXC_NTOS and STATE_LIST are both given in input. "// &
    1565           0 :                                "Overriding NUM_PRINT_EXC_NTOS.")
    1566             :                END IF
    1567             :             END IF
    1568             :             ! Check if all states are within the range
    1569             :             ! Count them and initialize new array afterwards
    1570           0 :             num_state_list_exceptions = 0
    1571           0 :             DO i = 1, SIZE(mp2_env%bse%bse_nto_state_list)
    1572           0 :                IF (mp2_env%bse%bse_nto_state_list(i) < 1 .OR. &
    1573           0 :                    mp2_env%bse%bse_nto_state_list(i) > mp2_env%bse%num_print_exc) THEN
    1574           0 :                   num_state_list_exceptions = num_state_list_exceptions + 1
    1575             :                END IF
    1576             :             END DO
    1577           0 :             IF (num_state_list_exceptions > 0) THEN
    1578           0 :                IF (unit_nr > 0) THEN
    1579             :                   CALL cp_hint(__LOCATION__, &
    1580             :                                "STATE_LIST contains indices outside the range of included excitation levels. "// &
    1581           0 :                                "Ignoring these states.")
    1582             :                END IF
    1583             :             END IF
    1584           0 :             n = SIZE(mp2_env%bse%bse_nto_state_list) - num_state_list_exceptions
    1585           0 :             ALLOCATE (mp2_env%bse%bse_nto_state_list_final(n))
    1586           0 :             mp2_env%bse%bse_nto_state_list_final(:) = 0
    1587             :             i = 1
    1588           0 :             DO j = 1, SIZE(mp2_env%bse%bse_nto_state_list)
    1589           0 :                IF (mp2_env%bse%bse_nto_state_list(j) >= 1 .AND. &
    1590           0 :                    mp2_env%bse%bse_nto_state_list(j) <= mp2_env%bse%num_print_exc) THEN
    1591           0 :                   mp2_env%bse%bse_nto_state_list_final(i) = mp2_env%bse%bse_nto_state_list(j)
    1592           0 :                   i = i + 1
    1593             :                END IF
    1594             :             END DO
    1595             : 
    1596           0 :             mp2_env%bse%num_print_exc_ntos = SIZE(mp2_env%bse%bse_nto_state_list_final)
    1597             :          ELSE
    1598           4 :             IF (mp2_env%bse%num_print_exc_ntos > mp2_env%bse%num_print_exc .OR. &
    1599             :                 mp2_env%bse%num_print_exc_ntos < 0) THEN
    1600           4 :                mp2_env%bse%num_print_exc_ntos = mp2_env%bse%num_print_exc
    1601             :             END IF
    1602          12 :             ALLOCATE (mp2_env%bse%bse_nto_state_list_final(mp2_env%bse%num_print_exc_ntos))
    1603         104 :             DO i = 1, mp2_env%bse%num_print_exc_ntos
    1604         104 :                mp2_env%bse%bse_nto_state_list_final(i) = i
    1605             :             END DO
    1606             :          END IF
    1607             :       END IF
    1608             : 
    1609             :       ! Takes care of triplet states, when oscillator strengths are 0
    1610          42 :       IF (mp2_env%bse%bse_spin_config /= 0 .AND. &
    1611             :           mp2_env%bse%eps_nto_osc_str > 0) THEN
    1612           0 :          IF (unit_nr > 0) THEN
    1613             :             CALL cp_warn(__LOCATION__, &
    1614             :                          "Cannot apply EPS_OSC_STR for Triplet excitations. "// &
    1615           0 :                          "Resetting EPS_OSC_STR to default.")
    1616             :          END IF
    1617           0 :          mp2_env%bse%eps_nto_osc_str = -1.0_dp
    1618             :       END IF
    1619             : 
    1620             :       ! Take care of number for computed exciton descriptors
    1621          42 :       IF (mp2_env%bse%num_print_exc_descr < 0 .OR. &
    1622             :           mp2_env%bse%num_print_exc_descr > mp2_env%bse%num_print_exc) THEN
    1623           4 :          IF (unit_nr > 0) THEN
    1624             :             CALL cp_hint(__LOCATION__, &
    1625             :                          "Keyword NUM_PRINT_EXC_DESCR is either negative or too large. "// &
    1626           2 :                          "Printing exciton descriptors up to NUM_PRINT_EXC.")
    1627             :          END IF
    1628           4 :          mp2_env%bse%num_print_exc_descr = mp2_env%bse%num_print_exc
    1629             :       END IF
    1630             : 
    1631             :       ! Handle screening factor options
    1632          42 :       IF (mp2_env%BSE%screening_factor > 0.0_dp) THEN
    1633           2 :          IF (mp2_env%BSE%screening_method /= bse_screening_alpha) THEN
    1634           0 :             IF (unit_nr > 0) THEN
    1635             :                CALL cp_warn(__LOCATION__, &
    1636             :                             "Screening factor is only supported for &SCREENING_IN_W ALPHA. "// &
    1637           0 :                             "Resetting SCREENING_IN_W to ALPHA.")
    1638             :             END IF
    1639           0 :             mp2_env%BSE%screening_method = bse_screening_alpha
    1640             :          END IF
    1641           2 :          IF (mp2_env%BSE%screening_factor > 1.0_dp) THEN
    1642           0 :             IF (unit_nr > 0) THEN
    1643             :                CALL cp_warn(__LOCATION__, &
    1644           0 :                             "Screening factor is larger than 1.0. ")
    1645             :             END IF
    1646             :          END IF
    1647             :       END IF
    1648             : 
    1649          42 :       IF (mp2_env%BSE%screening_factor < 0.0_dp .AND. &
    1650             :           mp2_env%BSE%screening_method == bse_screening_alpha) THEN
    1651           0 :          IF (unit_nr > 0) THEN
    1652             :             CALL cp_warn(__LOCATION__, &
    1653           0 :                          "Screening factor is negative. Defaulting to 0.25")
    1654             :          END IF
    1655           0 :          mp2_env%BSE%screening_factor = 0.25_dp
    1656             :       END IF
    1657             : 
    1658          42 :       IF (mp2_env%BSE%screening_factor == 0.0_dp) THEN
    1659             :          ! Use RPA internally in this case
    1660           0 :          mp2_env%BSE%screening_method = bse_screening_rpa
    1661             :       END IF
    1662          42 :       IF (mp2_env%BSE%screening_factor == 1.0_dp) THEN
    1663             :          ! Use TDHF internally in this case
    1664           0 :          mp2_env%BSE%screening_method = bse_screening_tdhf
    1665             :       END IF
    1666             : 
    1667             :       ! Add warning for usage of KS energies
    1668          42 :       IF (mp2_env%bse%use_ks_energies) THEN
    1669           0 :          IF (unit_nr > 0) THEN
    1670             :             CALL cp_warn(__LOCATION__, &
    1671             :                          "Using KS energies for BSE calculations. Therefore, no quantities "// &
    1672           0 :                          "of the preceeding GW calculation enter the BSE.")
    1673             :          END IF
    1674             :       END IF
    1675             : 
    1676             :       ! Add warning if periodic calculation is invoked
    1677          42 :       IF (ndim_periodic_poisson /= 0) THEN
    1678           0 :          IF (unit_nr > 0) THEN
    1679             :             CALL cp_warn(__LOCATION__, &
    1680             :                          "Poisson solver should be invoked by PERIODIC NONE. "// &
    1681             :                          "The applied length gauge might give misleading results for "// &
    1682           0 :                          "oscillator strengths.")
    1683             :          END IF
    1684             :       END IF
    1685          42 :       IF (ndim_periodic_cell /= 0) THEN
    1686           0 :          IF (unit_nr > 0) THEN
    1687             :             CALL cp_warn(__LOCATION__, &
    1688             :                          "CELL in SUBSYS should be invoked with PERIODIC NONE. "// &
    1689             :                          "The applied length gauge might give misleading results for "// &
    1690           0 :                          "oscillator strengths.")
    1691             :          END IF
    1692             :       END IF
    1693             : 
    1694          42 :       CALL timestop(handle)
    1695          42 :    END SUBROUTINE adapt_BSE_input_params
    1696             : 
    1697             : ! **************************************************************************************************
    1698             : 
    1699             : ! **************************************************************************************************
    1700             : !> \brief ...
    1701             : !> \param fm_multipole_ai_trunc ...
    1702             : !> \param fm_multipole_ij_trunc ...
    1703             : !> \param fm_multipole_ab_trunc ...
    1704             : !> \param qs_env ...
    1705             : !> \param mo_coeff ...
    1706             : !> \param rpoint ...
    1707             : !> \param n_moments ...
    1708             : !> \param homo_red ...
    1709             : !> \param virtual_red ...
    1710             : !> \param context_BSE ...
    1711             : ! **************************************************************************************************
    1712          48 :    SUBROUTINE get_multipoles_mo(fm_multipole_ai_trunc, fm_multipole_ij_trunc, fm_multipole_ab_trunc, &
    1713          48 :                                 qs_env, mo_coeff, rpoint, n_moments, &
    1714             :                                 homo_red, virtual_red, context_BSE)
    1715             : 
    1716             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    1717             :          INTENT(INOUT)                                   :: fm_multipole_ai_trunc, &
    1718             :                                                             fm_multipole_ij_trunc, &
    1719             :                                                             fm_multipole_ab_trunc
    1720             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1721             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1722             :       REAL(dp), ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: rpoint
    1723             :       INTEGER, INTENT(IN)                                :: n_moments, homo_red, virtual_red
    1724             :       TYPE(cp_blacs_env_type), POINTER                   :: context_BSE
    1725             : 
    1726             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_multipoles_mo'
    1727             : 
    1728             :       INTEGER                                            :: handle, idir, n_multipole, n_occ, &
    1729             :                                                             n_virt, nao
    1730          48 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: ref_point
    1731             :       TYPE(cp_fm_struct_type), POINTER :: fm_struct_mp_ab_trunc, fm_struct_mp_ai_trunc, &
    1732             :          fm_struct_mp_ij_trunc, fm_struct_multipoles_ao, fm_struct_nao_nao
    1733             :       TYPE(cp_fm_type)                                   :: fm_work
    1734          48 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_multipole_per_dir
    1735          48 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_multipole, matrix_s
    1736          48 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1737             :       TYPE(mp_para_env_type), POINTER                    :: para_env_BSE
    1738             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1739          48 :          POINTER                                         :: sab_orb
    1740             : 
    1741          48 :       CALL timeset(routineN, handle)
    1742             : 
    1743             :       !First, we calculate the AO dipoles
    1744          48 :       NULLIFY (sab_orb, matrix_s)
    1745             :       CALL get_qs_env(qs_env, &
    1746             :                       mos=mos, &
    1747             :                       matrix_s=matrix_s, &
    1748          48 :                       sab_orb=sab_orb)
    1749             : 
    1750             :       ! Use the same blacs environment as for the MO coefficients to ensure correct multiplication dbcsr x fm later on
    1751          48 :       fm_struct_multipoles_ao => mos(1)%mo_coeff%matrix_struct
    1752             :       ! BSE has different contexts and blacsenvs
    1753          48 :       para_env_BSE => context_BSE%para_env
    1754             :       ! Get size of multipole tensor
    1755          48 :       n_multipole = (6 + 11*n_moments + 6*n_moments**2 + n_moments**3)/6 - 1
    1756          48 :       NULLIFY (matrix_multipole)
    1757          48 :       CALL dbcsr_allocate_matrix_set(matrix_multipole, n_multipole)
    1758         324 :       ALLOCATE (fm_multipole_per_dir(n_multipole))
    1759         228 :       DO idir = 1, n_multipole
    1760         180 :          CALL dbcsr_init_p(matrix_multipole(idir)%matrix)
    1761             :          CALL dbcsr_create(matrix_multipole(idir)%matrix, name="ao_multipole", &
    1762         180 :                            template=matrix_s(1)%matrix, matrix_type=dbcsr_type_symmetric)
    1763         180 :          CALL cp_dbcsr_alloc_block_from_nbl(matrix_multipole(idir)%matrix, sab_orb)
    1764         228 :          CALL dbcsr_set(matrix_multipole(idir)%matrix, 0._dp)
    1765             :       END DO
    1766             : 
    1767          48 :       CALL get_reference_point(rpoint, qs_env=qs_env, reference=use_mom_ref_coac, ref_point=ref_point)
    1768             : 
    1769          48 :       CALL build_local_moment_matrix(qs_env, matrix_multipole, n_moments, ref_point=rpoint)
    1770             : 
    1771          48 :       NULLIFY (sab_orb)
    1772             : 
    1773             :       ! Now we transform them to MO
    1774             :       ! n_occ is the number of occupied MOs, nao the number of all AOs
    1775             :       ! Writing homo to n_occ instead if nmo,
    1776             :       ! takes care of ADDED_MOS, which would overwrite nmo, if invoked
    1777          48 :       CALL get_mo_set(mo_set=mos(1), homo=n_occ, nao=nao)
    1778          48 :       n_virt = nao - n_occ
    1779             : 
    1780             :       ! At the end, we need four different layouts of matrices in this multiplication, e.g. for a dipole:
    1781             :       ! D_pq = full multipole matrix for occupied and unoccupied
    1782             :       ! Final result:D_pq= C_{mu p}        <\mu|\vec{r}|\nu>        C_{\nu q}              EQ.I
    1783             :       !                   \_______/         \___________/          \______/
    1784             :       !                    fm_coeff            matrix_multipole              fm_coeff
    1785             :       !                    (EQ.Ia)             (EQ.Ib)              (EQ.Ia)
    1786             :       ! Intermediate work matrices:
    1787             :       ! fm_work =                 <\mu|\vec{r}|\nu>        C_{\nu q}              EQ.II
    1788             : 
    1789             :       ! Struct for the full multipole matrix
    1790             :       CALL cp_fm_struct_create(fm_struct_nao_nao, &
    1791             :                                fm_struct_multipoles_ao%para_env, fm_struct_multipoles_ao%context, &
    1792          48 :                                nao, nao)
    1793             : 
    1794             :       ! At the very end, we copy the multipoles corresponding to truncated BSE indices in i and a
    1795             :       CALL cp_fm_struct_create(fm_struct_mp_ai_trunc, para_env_BSE, &
    1796          48 :                                context_BSE, virtual_red, homo_red)
    1797             :       CALL cp_fm_struct_create(fm_struct_mp_ij_trunc, para_env_BSE, &
    1798          48 :                                context_BSE, homo_red, homo_red)
    1799             :       CALL cp_fm_struct_create(fm_struct_mp_ab_trunc, para_env_BSE, &
    1800          48 :                                context_BSE, virtual_red, virtual_red)
    1801         228 :       DO idir = 1, n_multipole
    1802             :          CALL cp_fm_create(fm_multipole_ai_trunc(idir), matrix_struct=fm_struct_mp_ai_trunc, &
    1803         180 :                            name="dipoles_mo_ai_trunc")
    1804         180 :          CALL cp_fm_set_all(fm_multipole_ai_trunc(idir), 0.0_dp)
    1805             :          CALL cp_fm_create(fm_multipole_ij_trunc(idir), matrix_struct=fm_struct_mp_ij_trunc, &
    1806         180 :                            name="dipoles_mo_ij_trunc")
    1807         180 :          CALL cp_fm_set_all(fm_multipole_ij_trunc(idir), 0.0_dp)
    1808             :          CALL cp_fm_create(fm_multipole_ab_trunc(idir), matrix_struct=fm_struct_mp_ab_trunc, &
    1809         180 :                            name="dipoles_mo_ab_trunc")
    1810         228 :          CALL cp_fm_set_all(fm_multipole_ab_trunc(idir), 0.0_dp)
    1811             :       END DO
    1812             : 
    1813             :       ! Need another temporary matrix to store intermediate result from right multiplication
    1814             :       ! D = C_{mu a}        <\mu|\vec{r}|\nu>        C_{\nu i}
    1815          48 :       CALL cp_fm_create(fm_work, matrix_struct=fm_struct_nao_nao, name="multipole_work")
    1816          48 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
    1817             : 
    1818         228 :       DO idir = 1, n_multipole
    1819             :          ! Create the full multipole matrix per direction
    1820         180 :          CALL cp_fm_create(fm_multipole_per_dir(idir), matrix_struct=fm_struct_nao_nao, name="multipoles_mo")
    1821         180 :          CALL cp_fm_set_all(fm_multipole_per_dir(idir), 0.0_dp)
    1822             :          ! Fill final (MO) multipole matrix
    1823             :          CALL cp_dbcsr_sm_fm_multiply(matrix_multipole(idir)%matrix, mo_coeff(1), &
    1824         180 :                                       fm_work, ncol=nao)
    1825             :          ! Now obtain the multipoles by the final multiplication;
    1826             :          ! We do that inside the loop to obtain multipoles per axis for print
    1827         180 :          CALL parallel_gemm('T', 'N', nao, nao, nao, 1.0_dp, mo_coeff(1), fm_work, 0.0_dp, fm_multipole_per_dir(idir))
    1828             :          ! Truncate full matrix to the BSE indices
    1829             :          ! D_ai
    1830             :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1831             :                                          fm_multipole_ai_trunc(idir), &
    1832             :                                          virtual_red, &
    1833             :                                          homo_red, &
    1834             :                                          n_occ + 1, &
    1835             :                                          n_occ - homo_red + 1, &
    1836             :                                          1, &
    1837             :                                          1, &
    1838         180 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1839             :          ! D_ij
    1840             :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1841             :                                          fm_multipole_ij_trunc(idir), &
    1842             :                                          homo_red, &
    1843             :                                          homo_red, &
    1844             :                                          n_occ - homo_red + 1, &
    1845             :                                          n_occ - homo_red + 1, &
    1846             :                                          1, &
    1847             :                                          1, &
    1848         180 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1849             :          ! D_ab
    1850             :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1851             :                                          fm_multipole_ab_trunc(idir), &
    1852             :                                          virtual_red, &
    1853             :                                          virtual_red, &
    1854             :                                          n_occ + 1, &
    1855             :                                          n_occ + 1, &
    1856             :                                          1, &
    1857             :                                          1, &
    1858         228 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1859             :       END DO
    1860             : 
    1861             :       !Release matrices and structs
    1862          48 :       NULLIFY (fm_struct_multipoles_ao)
    1863          48 :       CALL cp_fm_struct_release(fm_struct_mp_ai_trunc)
    1864          48 :       CALL cp_fm_struct_release(fm_struct_mp_ij_trunc)
    1865          48 :       CALL cp_fm_struct_release(fm_struct_mp_ab_trunc)
    1866          48 :       CALL cp_fm_struct_release(fm_struct_nao_nao)
    1867         228 :       DO idir = 1, n_multipole
    1868         228 :          CALL cp_fm_release(fm_multipole_per_dir(idir))
    1869             :       END DO
    1870          48 :       DEALLOCATE (fm_multipole_per_dir)
    1871          48 :       CALL cp_fm_release(fm_work)
    1872          48 :       CALL dbcsr_deallocate_matrix_set(matrix_multipole)
    1873             : 
    1874          48 :       CALL timestop(handle)
    1875             : 
    1876         144 :    END SUBROUTINE get_multipoles_mo
    1877             : 
    1878             : ! **************************************************************************************************
    1879             : !> \brief Computes trace of form Tr{A^T B C} for exciton descriptors
    1880             : !> \param fm_A Full Matrix, typically X or Y, in format homo x virtual
    1881             : !> \param fm_B ...
    1882             : !> \param fm_C ...
    1883             : !> \param alpha ...
    1884             : ! **************************************************************************************************
    1885       11520 :    SUBROUTINE trace_exciton_descr(fm_A, fm_B, fm_C, alpha)
    1886             : 
    1887             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_A, fm_B, fm_C
    1888             :       REAL(KIND=dp), INTENT(OUT)                         :: alpha
    1889             : 
    1890             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'trace_exciton_descr'
    1891             : 
    1892             :       INTEGER                                            :: handle, ncol_A, ncol_B, ncol_C, nrow_A, &
    1893             :                                                             nrow_B, nrow_C
    1894             :       TYPE(cp_fm_type)                                   :: fm_work_ia
    1895             : 
    1896        1920 :       CALL timeset(routineN, handle)
    1897             : 
    1898        1920 :       CALL cp_fm_create(fm_work_ia, fm_A%matrix_struct)
    1899        1920 :       CALL cp_fm_get_info(fm_A, nrow_global=nrow_A, ncol_global=ncol_A)
    1900        1920 :       CALL cp_fm_get_info(fm_B, nrow_global=nrow_B, ncol_global=ncol_B)
    1901        1920 :       CALL cp_fm_get_info(fm_C, nrow_global=nrow_C, ncol_global=ncol_C)
    1902             : 
    1903             :       ! Check matrix sizes
    1904        1920 :       CPASSERT(nrow_A == nrow_B .AND. ncol_A == ncol_C .AND. ncol_B == nrow_C)
    1905             : 
    1906        1920 :       CALL cp_fm_set_all(fm_work_ia, 0.0_dp)
    1907             : 
    1908             :       CALL parallel_gemm("N", "N", nrow_A, ncol_A, nrow_C, 1.0_dp, &
    1909        1920 :                          fm_B, fm_C, 0.0_dp, fm_work_ia)
    1910             : 
    1911        1920 :       CALL cp_fm_trace(fm_A, fm_work_ia, alpha)
    1912             : 
    1913        1920 :       CALL cp_fm_release(fm_work_ia)
    1914             : 
    1915        1920 :       CALL timestop(handle)
    1916             : 
    1917        1920 :    END SUBROUTINE trace_exciton_descr
    1918             : 
    1919             : END MODULE

Generated by: LCOV version 1.15