LCOV - code coverage report
Current view: top level - src - xas_tdp_utils.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 1226 1316 93.2 %
Date: 2024-11-21 06:45:46 Functions: 17 19 89.5 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Utilities for X-ray absorption spectroscopy using TDDFPT
      10             : !> \author AB (01.2018)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE xas_tdp_utils
      14             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      15             :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      16             :    USE cp_cfm_types,                    ONLY: cp_cfm_create,&
      17             :                                               cp_cfm_get_info,&
      18             :                                               cp_cfm_get_submatrix,&
      19             :                                               cp_cfm_release,&
      20             :                                               cp_cfm_type,&
      21             :                                               cp_fm_to_cfm
      22             :    USE cp_dbcsr_api,                    ONLY: &
      23             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
      24             :         dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
      25             :         dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      26             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      27             :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_reserve_all_blocks, dbcsr_set, &
      28             :         dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
      29             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      30             :                                               cp_dbcsr_cholesky_invert
      31             :    USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
      32             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      33             :                                               copy_fm_to_dbcsr,&
      34             :                                               cp_dbcsr_sm_fm_multiply,&
      35             :                                               dbcsr_allocate_matrix_set,&
      36             :                                               dbcsr_deallocate_matrix_set
      37             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
      38             :                                               cp_fm_scale,&
      39             :                                               cp_fm_transpose,&
      40             :                                               cp_fm_upper_to_full
      41             :    USE cp_fm_diag,                      ONLY: choose_eigv_solver,&
      42             :                                               cp_fm_geeig
      43             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      44             :                                               cp_fm_struct_release,&
      45             :                                               cp_fm_struct_type
      46             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      47             :                                               cp_fm_get_diag,&
      48             :                                               cp_fm_get_info,&
      49             :                                               cp_fm_get_submatrix,&
      50             :                                               cp_fm_release,&
      51             :                                               cp_fm_set_element,&
      52             :                                               cp_fm_to_fm_submat,&
      53             :                                               cp_fm_type
      54             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      55             :    USE input_constants,                 ONLY: ot_precond_full_single,&
      56             :                                               tddfpt_singlet,&
      57             :                                               tddfpt_spin_cons,&
      58             :                                               tddfpt_spin_flip,&
      59             :                                               tddfpt_triplet,&
      60             :                                               xas_dip_len
      61             :    USE kinds,                           ONLY: dp
      62             :    USE mathlib,                         ONLY: get_diag
      63             :    USE message_passing,                 ONLY: mp_para_env_type
      64             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      65             :    USE physcon,                         ONLY: a_fine
      66             :    USE preconditioner_types,            ONLY: destroy_preconditioner,&
      67             :                                               init_preconditioner,&
      68             :                                               preconditioner_type
      69             :    USE qs_environment_types,            ONLY: get_qs_env,&
      70             :                                               qs_environment_type
      71             :    USE qs_mo_methods,                   ONLY: calculate_subspace_eigenvalues
      72             :    USE qs_mo_types,                     ONLY: get_mo_set,&
      73             :                                               mo_set_type
      74             :    USE qs_ot_eigensolver,               ONLY: ot_eigensolver
      75             :    USE xas_tdp_kernel,                  ONLY: kernel_coulomb_xc,&
      76             :                                               kernel_exchange
      77             :    USE xas_tdp_types,                   ONLY: donor_state_type,&
      78             :                                               xas_tdp_control_type,&
      79             :                                               xas_tdp_env_type
      80             : 
      81             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
      82             : #include "./base/base_uses.f90"
      83             : 
      84             :    IMPLICIT NONE
      85             :    PRIVATE
      86             : 
      87             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_utils'
      88             : 
      89             :    PUBLIC :: setup_xas_tdp_prob, solve_xas_tdp_prob, include_rcs_soc, &
      90             :              include_os_soc, rcs_amew_soc_elements
      91             : 
      92             :    !A helper type for SOC
      93             :    TYPE dbcsr_soc_package_type
      94             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sg => NULL()
      95             :       TYPE(dbcsr_type), POINTER     :: dbcsr_tp => NULL()
      96             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sc => NULL()
      97             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sf => NULL()
      98             :       TYPE(dbcsr_type), POINTER     :: dbcsr_prod => NULL()
      99             :       TYPE(dbcsr_type), POINTER     :: dbcsr_ovlp => NULL()
     100             :       TYPE(dbcsr_type), POINTER     :: dbcsr_tmp => NULL()
     101             :       TYPE(dbcsr_type), POINTER     :: dbcsr_work => NULL()
     102             :    END TYPE dbcsr_soc_package_type
     103             : 
     104             : CONTAINS
     105             : 
     106             : ! **************************************************************************************************
     107             : !> \brief Builds the matrix that defines the XAS TDDFPT generalized eigenvalue problem to be solved
     108             : !>        for excitation energies omega. The problem has the form omega*G*C = M*C, where C contains
     109             : !>        the response orbitals coefficients. The matrix M and the metric G are stored in the given
     110             : !>        donor_state
     111             : !> \param donor_state the donor_state for which the problem is restricted
     112             : !> \param qs_env ...
     113             : !> \param xas_tdp_env ...
     114             : !> \param xas_tdp_control ...
     115             : !> \note  the matrix M is symmetric and has the form | M_d   M_o |
     116             : !>                                                   | M_o   M_d |,
     117             : !>       -In the SPIN-RESTRICTED case:
     118             : !>        depending on whether we consider singlet or triplet excitation, the diagonal (M_d) and
     119             : !>        off-diagonal (M_o) parts of M differ:
     120             : !>        - For singlet: M_d = A + 2B + C_aa + C_ab - D
     121             : !>                       M_o = 2B + C_aa + C_ab - E
     122             : !>        - For triplet: M_d = A + C_aa - C_ab - D
     123             : !>                       M_o = C_aa - C_ab - E
     124             : !>        where other subroutines computes the matrices A, B, E, D and G, which are:
     125             : !>        - A: the ground-state contribution: F_pq*delta_IJ - epsilon_IJ*S_pq
     126             : !>        - B: the Coulomb kernel ~(pI|Jq)
     127             : !>        - C: the xc kernel c_aa (double derivatibe wrt to n_alpha) and C_ab (wrt n_alpha and n_beta)
     128             : !>        - D: the on-digonal exact exchange kernel ~(pq|IJ)
     129             : !>        - E: the off-diagonal exact exchange kernel ~(pJ|Iq)
     130             : !>        - G: the metric  S_pq*delta_IJ
     131             : !>        For the xc functionals, C_aa + C_ab or C_aa - C_ab are stored in the same matrix
     132             : !>        In the above definitions, I,J label the donnor MOs and p,q the sgfs of the basis
     133             : !>
     134             : !>       -In the SPIN-UNRESTRICTED, spin-conserving case:
     135             : !>        the on- and off-diagonal elements of M are:
     136             : !>                     M_d = A + B + C -D
     137             : !>                     M_o = B + C - E
     138             : !>        where the submatrices A, B, C, D and E are:
     139             : !>        - A: the groun-state contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab
     140             : !>        - B: the Coulomb kernel: (pI_a|J_b q)
     141             : !>        - C: the xc kernel: (pI_a|fxc_ab|J_b q)
     142             : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     143             : !>        - E: the off-diagonal exact-exchange kernel: (pJ_b|I_a q) delta_ab
     144             : !>        - G: the metric S_pq*delta_IJ*delta_ab
     145             : !>        p,q label the sgfs, I,J the donro MOs and a,b the spins
     146             : !>
     147             : !>       -In both above cases, the matrix M is always  projected onto the unperturbed unoccupied
     148             : !>        ground state: M <= Q * M * Q^T = (1 - SP) * M * (1 - PS)
     149             : !>
     150             : !>       -In the SPIN-FLIP case:
     151             : !>        Only the TDA is implemented, that is, there are only on-diagonal elements:
     152             : !>                    M_d = A + C - D
     153             : !>        where the submatrices A, C and D are:
     154             : !>        - A: the ground state-contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab, but here,
     155             : !>                                            the alph-alpha quadrant has the beta Fock matrix and
     156             : !>                                            the beta-beta quadrant has the alpha Fock matrix
     157             : !>        - C: the SF xc kernel: (pI_a|fxc|J_bq), fxc = 1/m * (vxc_a -vxc_b)
     158             : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     159             : !>        To ensure that all excitation start from a given spin to the opposite, we then multiply
     160             : !>        by a Q projector where we swap the alpha-alpha and beta-beta spin-quadrants
     161             : !>
     162             : !>        All possibilities: TDA or full-TDDFT, singlet or triplet, xc or hybrid, etc are treated
     163             : !>        in the same routine to avoid recomputing stuff
     164             : !>        Under TDA, only the on-diagonal elements of M are computed
     165             : !>        In the case of non-TDA, one turns the problem Hermitian
     166             : ! **************************************************************************************************
     167          56 :    SUBROUTINE setup_xas_tdp_prob(donor_state, qs_env, xas_tdp_env, xas_tdp_control)
     168             : 
     169             :       TYPE(donor_state_type), POINTER                    :: donor_state
     170             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     171             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     172             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     173             : 
     174             :       CHARACTER(len=*), PARAMETER :: routineN = 'setup_xas_tdp_prob'
     175             : 
     176             :       INTEGER                                            :: handle
     177          56 :       INTEGER, DIMENSION(:), POINTER                     :: submat_blk_size
     178             :       LOGICAL                                            :: do_coul, do_hfx, do_os, do_sc, do_sf, &
     179             :                                                             do_sg, do_tda, do_tp, do_xc
     180             :       REAL(dp)                                           :: eps_filter, sx
     181             :       TYPE(dbcsr_distribution_type), POINTER             :: submat_dist
     182          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker, xc_ker
     183             :       TYPE(dbcsr_type)                                   :: matrix_a, matrix_a_sf, matrix_b, proj_Q, &
     184             :                                                             proj_Q_sf, work
     185             :       TYPE(dbcsr_type), POINTER :: matrix_c_sc, matrix_c_sf, matrix_c_sg, matrix_c_tp, matrix_d, &
     186             :          matrix_e_sc, sc_matrix_tdp, sf_matrix_tdp, sg_matrix_tdp, tp_matrix_tdp
     187             : 
     188          56 :       NULLIFY (sg_matrix_tdp, tp_matrix_tdp, submat_dist, submat_blk_size, matrix_c_sf)
     189          56 :       NULLIFY (matrix_c_sg, matrix_c_tp, matrix_c_sc, matrix_d, matrix_e_sc)
     190          56 :       NULLIFY (sc_matrix_tdp, sf_matrix_tdp, ex_ker, xc_ker)
     191             : 
     192          56 :       CALL timeset(routineN, handle)
     193             : 
     194             : !  Initialization
     195          56 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     196          56 :       do_sc = xas_tdp_control%do_spin_cons
     197          56 :       do_sf = xas_tdp_control%do_spin_flip
     198          56 :       do_sg = xas_tdp_control%do_singlet
     199          56 :       do_tp = xas_tdp_control%do_triplet
     200          56 :       do_xc = xas_tdp_control%do_xc
     201          56 :       do_hfx = xas_tdp_control%do_hfx
     202          56 :       do_coul = xas_tdp_control%do_coulomb
     203          56 :       do_tda = xas_tdp_control%tamm_dancoff
     204          56 :       sx = xas_tdp_control%sx
     205          56 :       eps_filter = xas_tdp_control%eps_filter
     206          56 :       IF (do_sc) THEN
     207           8 :          ALLOCATE (donor_state%sc_matrix_tdp)
     208           8 :          sc_matrix_tdp => donor_state%sc_matrix_tdp
     209             :       END IF
     210          56 :       IF (do_sf) THEN
     211           2 :          ALLOCATE (donor_state%sf_matrix_tdp)
     212           2 :          sf_matrix_tdp => donor_state%sf_matrix_tdp
     213             :       END IF
     214          56 :       IF (do_sg) THEN
     215          48 :          ALLOCATE (donor_state%sg_matrix_tdp)
     216          48 :          sg_matrix_tdp => donor_state%sg_matrix_tdp
     217             :       END IF
     218          56 :       IF (do_tp) THEN
     219           2 :          ALLOCATE (donor_state%tp_matrix_tdp)
     220           2 :          tp_matrix_tdp => donor_state%tp_matrix_tdp
     221             :       END IF
     222             : 
     223             : !  Get the dist and block size of all matrices A, B, C, etc
     224          56 :       CALL compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
     225          56 :       submat_dist => donor_state%dbcsr_dist
     226          56 :       submat_blk_size => donor_state%blk_size
     227             : 
     228             : !  Allocate and compute all the matrices A, B, C, etc we will need
     229             : 
     230             :       ! The projector(s) on the unoccupied unperturbed ground state 1-SP and associated work matrix
     231          56 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     232          56 :          CALL get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env)
     233             :       END IF
     234          56 :       IF (do_sf) THEN !spin-flip
     235           2 :          CALL get_q_projector(proj_Q_sf, donor_state, do_os, xas_tdp_env, do_sf=.TRUE.)
     236             :       END IF
     237             :       CALL dbcsr_create(matrix=work, matrix_type=dbcsr_type_no_symmetry, dist=submat_dist, &
     238          56 :                         name="WORK", row_blk_size=submat_blk_size, col_blk_size=submat_blk_size)
     239             : 
     240             :       ! The ground state contribution(s)
     241          56 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     242          56 :          CALL build_gs_contribution(matrix_a, donor_state, do_os, qs_env)
     243             :       END IF
     244          56 :       IF (do_sf) THEN !spin-flip
     245           2 :          CALL build_gs_contribution(matrix_a_sf, donor_state, do_os, qs_env, do_sf=.TRUE.)
     246             :       END IF
     247             : 
     248             :       ! The Coulomb and XC kernels. Internal analysis to know which matrix to compute
     249          56 :       CALL dbcsr_allocate_matrix_set(xc_ker, 4)
     250          56 :       ALLOCATE (xc_ker(1)%matrix, xc_ker(2)%matrix, xc_ker(3)%matrix, xc_ker(4)%matrix)
     251          56 :       CALL kernel_coulomb_xc(matrix_b, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     252          56 :       matrix_c_sg => xc_ker(1)%matrix; matrix_c_tp => xc_ker(2)%matrix
     253          56 :       matrix_c_sc => xc_ker(3)%matrix; matrix_c_sf => xc_ker(4)%matrix
     254             : 
     255             :       ! The exact exchange. Internal analysis to know which matrices to compute
     256          56 :       CALL dbcsr_allocate_matrix_set(ex_ker, 2)
     257          56 :       ALLOCATE (ex_ker(1)%matrix, ex_ker(2)%matrix)
     258          56 :       CALL kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     259          56 :       matrix_d => ex_ker(1)%matrix; matrix_e_sc => ex_ker(2)%matrix
     260             : 
     261             :       ! Build the metric G, also need its inverse in case of full-TDDFT
     262          56 :       IF (do_tda) THEN
     263         100 :          ALLOCATE (donor_state%metric(1))
     264          50 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os)
     265             :       ELSE
     266          18 :          ALLOCATE (donor_state%metric(2))
     267           6 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os, do_inv=.TRUE.)
     268             :       END IF
     269             : 
     270             : !  Build the eigenvalue problem, depending on the case (TDA, singlet, triplet, hfx, etc ...)
     271          56 :       IF (do_tda) THEN
     272             : 
     273          50 :          IF (do_sc) THEN ! open-shell spin-conserving under TDA
     274             : 
     275             :             ! The final matrix is M = A + B + C - D
     276           8 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     277           8 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 1.0_dp)
     278             : 
     279           8 :             IF (do_xc) CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 1.0_dp) !xc kernel
     280           8 :             IF (do_hfx) CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     281             : 
     282             :             ! The product with the Q projector
     283           8 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     284           8 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     285             : 
     286             :          END IF !do_sc
     287             : 
     288          50 :          IF (do_sf) THEN ! open-shell spin-flip under TDA
     289             : 
     290             :             ! The final matrix is M = A + C - D
     291           2 :             CALL dbcsr_copy(sf_matrix_tdp, matrix_a_sf, name="OS MATRIX TDP")
     292             : 
     293           2 :             IF (do_xc) CALL dbcsr_add(sf_matrix_tdp, matrix_c_sf, 1.0_dp, 1.0_dp) !xc kernel
     294           2 :             IF (do_hfx) CALL dbcsr_add(sf_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     295             : 
     296             :             ! Take the product with the (spin-flip) Q projector
     297           2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q_sf, sf_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     298           2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q_sf, 0.0_dp, sf_matrix_tdp, filter_eps=eps_filter)
     299             : 
     300             :          END IF !do_sf
     301             : 
     302          50 :          IF (do_sg) THEN ! singlets under TDA
     303             : 
     304             :             ! The final matrix is M = A + 2B + (C_aa + C_ab) - D
     305          42 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     306          42 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     307             : 
     308          42 :             IF (do_xc) CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 1.0_dp) ! xc kernel
     309          42 :             IF (do_hfx) CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     310             : 
     311             :             ! Take the product with the Q projector:
     312          42 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     313          42 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     314             : 
     315             :          END IF !do_sg (TDA)
     316             : 
     317          50 :          IF (do_tp) THEN ! triplets under TDA
     318             : 
     319             :             ! The final matrix is M =  A + (C_aa - C_ab) - D
     320           2 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     321             : 
     322           2 :             IF (do_xc) CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 1.0_dp) ! xc_kernel
     323           2 :             IF (do_hfx) CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     324             : 
     325             :             ! Take the product with the Q projector:
     326           2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     327           2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     328             : 
     329             :          END IF !do_tp (TDA)
     330             : 
     331             :       ELSE ! not TDA
     332             : 
     333             :          ! In the case of full-TDDFT, the problem is turned Hermitian with the help of auxiliary
     334             :          ! matrices AUX = (A-D+E)^(+-0.5) that are stored in donor_state
     335             :          CALL build_aux_matrix(1.0E-8_dp, sx, matrix_a, matrix_d, matrix_e_sc, do_hfx, proj_Q, &
     336           6 :                                work, donor_state, eps_filter, qs_env)
     337             : 
     338           6 :          IF (do_sc) THEN !full-TDDFT open-shell spin-conserving
     339             : 
     340             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     341             :             ! M = A + 2B + 2C - D - E
     342           0 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     343           0 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     344             : 
     345           0 :             IF (do_hfx) THEN !scaled hfx
     346           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     347           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     348             :             END IF
     349           0 :             IF (do_xc) THEN
     350           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 2.0_dp)
     351             :             END IF
     352             : 
     353             :             ! Take the product with the Q projector
     354           0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     355           0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     356             : 
     357             :             ! Take the product with the inverse metric
     358             :             ! M <= G^-1 * M * G^-1
     359             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sc_matrix_tdp, &
     360           0 :                                 0.0_dp, work, filter_eps=eps_filter)
     361             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     362           0 :                                 sc_matrix_tdp, filter_eps=eps_filter)
     363             : 
     364             :          END IF
     365             : 
     366           6 :          IF (do_sg) THEN ! full-TDDFT singlets
     367             : 
     368             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     369             :             ! M = A + 4B + 2(C_aa + C_ab) - D - E
     370           6 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     371           6 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 4.0_dp)
     372             : 
     373           6 :             IF (do_hfx) THEN !scaled hfx
     374           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     375           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     376             :             END IF
     377           6 :             IF (do_xc) THEN !xc kernel
     378           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 2.0_dp)
     379             :             END IF
     380             : 
     381             :             ! Take the product with the Q projector
     382           6 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     383           6 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     384             : 
     385             :             ! Take the product with the inverse metric
     386             :             ! M <= G^-1 * M * G^-1
     387             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sg_matrix_tdp, &
     388           6 :                                 0.0_dp, work, filter_eps=eps_filter)
     389             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     390           6 :                                 sg_matrix_tdp, filter_eps=eps_filter)
     391             : 
     392             :          END IF ! singlets
     393             : 
     394           6 :          IF (do_tp) THEN ! full-TDDFT triplets
     395             : 
     396             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     397             :             ! M = A + 2(C_aa - C_ab) - D - E
     398           0 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     399             : 
     400           0 :             IF (do_hfx) THEN !scaled hfx
     401           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     402           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     403             :             END IF
     404           0 :             IF (do_xc) THEN
     405           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 2.0_dp)
     406             :             END IF
     407             : 
     408             :             ! Take the product with the Q projector
     409           0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     410           0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     411             : 
     412             :             ! Take the product with the inverse metric
     413             :             ! M <= G^-1 * M * G^-1
     414             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tp_matrix_tdp, &
     415           0 :                                 0.0_dp, work, filter_eps=eps_filter)
     416             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     417           0 :                                 tp_matrix_tdp, filter_eps=eps_filter)
     418             : 
     419             :          END IF ! triplets
     420             : 
     421             :       END IF ! test on TDA
     422             : 
     423             : !  Clean-up
     424          56 :       CALL dbcsr_release(matrix_a)
     425          56 :       CALL dbcsr_release(matrix_a_sf)
     426          56 :       CALL dbcsr_release(matrix_b)
     427          56 :       CALL dbcsr_release(proj_Q)
     428          56 :       CALL dbcsr_release(proj_Q_sf)
     429          56 :       CALL dbcsr_release(work)
     430          56 :       CALL dbcsr_deallocate_matrix_set(ex_ker)
     431          56 :       CALL dbcsr_deallocate_matrix_set(xc_ker)
     432             : 
     433          56 :       CALL timestop(handle)
     434             : 
     435          56 :    END SUBROUTINE setup_xas_tdp_prob
     436             : 
     437             : ! **************************************************************************************************
     438             : !> \brief Solves the XAS TDP generalized eigenvalue problem omega*C = matrix_tdp*C using standard
     439             : !>        full diagonalization methods. The problem is Hermitian (made that way even if not TDA)
     440             : !> \param donor_state ...
     441             : !> \param xas_tdp_control ...
     442             : !> \param xas_tdp_env ...
     443             : !> \param qs_env ...
     444             : !> \param ex_type whether we deal with singlets, triplets, spin-conserving open-shell or spin-flip
     445             : !> \note The computed eigenvalues and eigenvectors are stored in the donor_state
     446             : !>       The eigenvectors are the LR-coefficients. In case of TDA, c^- is stored. In the general
     447             : !>       case, the sum c^+ + c^- is stored.
     448             : !>      - Spin-restricted:
     449             : !>       In case both singlets and triplets are considered, this routine must be called twice. This
     450             : !>       is the choice that was made because the body of the routine is exactly the same in both cases
     451             : !>       Note that for singlet we solve for u = 1/sqrt(2)*(c_alpha + c_beta) = sqrt(2)*c
     452             : !>       and that for triplets we solve for v = 1/sqrt(2)*(c_alpha - c_beta) = sqrt(2)*c
     453             : !>      - Spin-unrestricted:
     454             : !>       The problem is solved for the LR coefficients c_pIa as they are (not linear combination)
     455             : !>       The routine might be called twice (once for spin-conservign, one for spin-flip)
     456             : ! **************************************************************************************************
     457          60 :    SUBROUTINE solve_xas_tdp_prob(donor_state, xas_tdp_control, xas_tdp_env, qs_env, ex_type)
     458             : 
     459             :       TYPE(donor_state_type), POINTER                    :: donor_state
     460             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     461             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     462             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     463             :       INTEGER, INTENT(IN)                                :: ex_type
     464             : 
     465             :       CHARACTER(len=*), PARAMETER :: routineN = 'solve_xas_tdp_prob'
     466             : 
     467             :       INTEGER                                            :: first_ex, handle, i, imo, ispin, nao, &
     468             :                                                             ndo_mo, nelectron, nevals, nocc, nrow, &
     469             :                                                             nspins, ot_nevals
     470             :       LOGICAL                                            :: do_os, do_range, do_sf
     471             :       REAL(dp)                                           :: ot_elb
     472          60 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling, tmp_evals
     473          60 :       REAL(dp), DIMENSION(:), POINTER                    :: lr_evals
     474             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     475             :       TYPE(cp_fm_struct_type), POINTER                   :: ex_struct, fm_struct, ot_fm_struct
     476             :       TYPE(cp_fm_type)                                   :: c_diff, c_sum, lhs_matrix, rhs_matrix, &
     477             :                                                             work
     478             :       TYPE(cp_fm_type), POINTER                          :: lr_coeffs
     479             :       TYPE(dbcsr_type)                                   :: tmp_mat, tmp_mat2
     480             :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp
     481             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     482             : 
     483          60 :       CALL timeset(routineN, handle)
     484             : 
     485          60 :       NULLIFY (para_env, blacs_env, fm_struct, matrix_tdp)
     486          60 :       NULLIFY (ex_struct, lr_evals, lr_coeffs)
     487          60 :       CPASSERT(ASSOCIATED(xas_tdp_env))
     488             : 
     489          60 :       do_os = .FALSE.
     490          60 :       do_sf = .FALSE.
     491          60 :       IF (ex_type == tddfpt_spin_cons) THEN
     492           8 :          matrix_tdp => donor_state%sc_matrix_tdp
     493           8 :          do_os = .TRUE.
     494          52 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     495           2 :          matrix_tdp => donor_state%sf_matrix_tdp
     496           2 :          do_os = .TRUE.
     497           2 :          do_sf = .TRUE.
     498          50 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     499          48 :          matrix_tdp => donor_state%sg_matrix_tdp
     500           2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     501           2 :          matrix_tdp => donor_state%tp_matrix_tdp
     502             :       END IF
     503          60 :       CALL get_qs_env(qs_env=qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_total=nelectron)
     504             : 
     505             : !     Initialization
     506          60 :       nspins = 1; IF (do_os) nspins = 2
     507          60 :       CALL cp_fm_get_info(donor_state%gs_coeffs, nrow_global=nao)
     508          60 :       CALL dbcsr_get_info(matrix_tdp, nfullrows_total=nrow)
     509          60 :       ndo_mo = donor_state%ndo_mo
     510          60 :       nocc = nelectron/2; IF (do_os) nocc = nelectron
     511          60 :       nocc = ndo_mo*nocc
     512             : 
     513             :       !solve by energy_range or number of states ?
     514          60 :       do_range = .FALSE.
     515          60 :       IF (xas_tdp_control%e_range > 0.0_dp) do_range = .TRUE.
     516             : 
     517             :       ! create the fm infrastructure
     518             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nrow, &
     519          60 :                                para_env=para_env, ncol_global=nrow)
     520          60 :       CALL cp_fm_create(rhs_matrix, fm_struct)
     521          60 :       CALL cp_fm_create(work, fm_struct)
     522             : 
     523             : !     Test on TDA
     524          60 :       IF (xas_tdp_control%tamm_dancoff) THEN
     525             : 
     526          54 :          IF (xas_tdp_control%do_ot) THEN
     527             : 
     528             :             !need to precompute the number of evals for OT
     529           4 :             IF (do_range) THEN
     530             : 
     531             :                !in case of energy range criterion, use LUMO eigenvalues as estimate
     532           4 :                ot_elb = xas_tdp_env%lumo_evals(1)%array(1)
     533           4 :                IF (do_os) ot_elb = MIN(ot_elb, xas_tdp_env%lumo_evals(2)%array(1))
     534             : 
     535        1028 :                ot_nevals = COUNT(xas_tdp_env%lumo_evals(1)%array - ot_elb .LE. xas_tdp_control%e_range)
     536           4 :                IF (do_os) ot_nevals = ot_nevals + &
     537           0 :                                       COUNT(xas_tdp_env%lumo_evals(2)%array - ot_elb .LE. xas_tdp_control%e_range)
     538             : 
     539             :             ELSE
     540             : 
     541           0 :                ot_nevals = nspins*nao - nocc/ndo_mo
     542           0 :                IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < ot_nevals) THEN
     543           0 :                   ot_nevals = xas_tdp_control%n_excited
     544             :                END IF
     545             :             END IF
     546           4 :             ot_nevals = ndo_mo*ot_nevals !as in input description, multiply by multiplicity of donor state
     547             : 
     548             : !           Organize results data
     549           4 :             first_ex = 1
     550          12 :             ALLOCATE (tmp_evals(ot_nevals))
     551             :             CALL cp_fm_struct_create(ot_fm_struct, context=blacs_env, para_env=para_env, &
     552           4 :                                      nrow_global=nrow, ncol_global=ot_nevals)
     553           4 :             CALL cp_fm_create(c_sum, ot_fm_struct)
     554             : 
     555             :             CALL xas_ot_solver(matrix_tdp, donor_state%metric(1)%matrix, c_sum, tmp_evals, ot_nevals, &
     556           4 :                                do_sf, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     557             : 
     558           8 :             CALL cp_fm_struct_release(ot_fm_struct)
     559             : 
     560             :          ELSE
     561             : 
     562             : !           Organize results data
     563          50 :             first_ex = nocc + 1 !where to find the first proper eigenvalue
     564         150 :             ALLOCATE (tmp_evals(nrow))
     565          50 :             CALL cp_fm_create(c_sum, fm_struct)
     566             : 
     567             : !           Get the main matrix_tdp as an fm
     568          50 :             CALL copy_dbcsr_to_fm(matrix_tdp, rhs_matrix)
     569             : 
     570             : !           Get the metric as a fm
     571          50 :             CALL cp_fm_create(lhs_matrix, fm_struct)
     572          50 :             CALL copy_dbcsr_to_fm(donor_state%metric(1)%matrix, lhs_matrix)
     573             : 
     574             :             !Diagonalisation (Cholesky decomposition). In TDA, c_sum = c^-
     575          50 :             CALL cp_fm_geeig(rhs_matrix, lhs_matrix, c_sum, tmp_evals, work)
     576             : 
     577             : !           TDA specific clean-up
     578         150 :             CALL cp_fm_release(lhs_matrix)
     579             : 
     580             :          END IF
     581             : 
     582             :       ELSE ! not TDA
     583             : 
     584             : !        Organize results data
     585           6 :          first_ex = nocc + 1
     586          18 :          ALLOCATE (tmp_evals(nrow))
     587           6 :          CALL cp_fm_create(c_sum, fm_struct)
     588             : 
     589             : !        Need to multiply the current matrix_tdp with the auxiliary matrix
     590             : !        tmp_mat =  (A-D+E)^0.5 * M * (A-D+E)^0.5
     591           6 :          CALL dbcsr_create(matrix=tmp_mat, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     592           6 :          CALL dbcsr_create(matrix=tmp_mat2, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     593             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, matrix_tdp, &
     594           6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     595             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, tmp_mat2, donor_state%matrix_aux, &
     596           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     597             : 
     598             : !        Get the matrix as a fm
     599           6 :          CALL copy_dbcsr_to_fm(tmp_mat, rhs_matrix)
     600             : 
     601             : !        Solve the "turned-Hermitian" eigenvalue problem
     602           6 :          CALL choose_eigv_solver(rhs_matrix, work, tmp_evals)
     603             : 
     604             : !        Currently, work = (A-D+E)^0.5 (c^+ - c^-) and tmp_evals = omega^2
     605             : !        Put tiny almost zero eigenvalues to zero (corresponding to occupied MOs)
     606         150 :          WHERE (tmp_evals < 1.0E-4_dp) tmp_evals = 0.0_dp
     607             : 
     608             : !        Retrieve c_diff = (c^+ - c^-) for normalization
     609             : !        (c^+ - c^-) = 1/omega^2 * M * (A-D+E)^0.5 * work
     610           6 :          CALL cp_fm_create(c_diff, fm_struct)
     611             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_tdp, donor_state%matrix_aux, &
     612           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     613           6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, work, c_diff, ncol=nrow)
     614             : 
     615          12 :          ALLOCATE (scaling(nrow))
     616         150 :          scaling = 0.0_dp
     617         150 :          WHERE (ABS(tmp_evals) > 1.0E-8_dp) scaling = 1.0_dp/tmp_evals
     618           6 :          CALL cp_fm_column_scale(c_diff, scaling)
     619             : 
     620             : !        Normalize with the metric: c_diff * G * c_diff = +- 1
     621         150 :          scaling = 0.0_dp
     622           6 :          CALL get_normal_scaling(scaling, c_diff, donor_state)
     623           6 :          CALL cp_fm_column_scale(c_diff, scaling)
     624             : 
     625             : !        Get the actual eigenvalues
     626         150 :          tmp_evals = SQRT(tmp_evals)
     627             : 
     628             : !        Get c_sum = (c^+ + c^-), which appears in all transition density related expressions
     629             : !        c_sum = -1/omega G^-1 * (A-D+E) * (c^+ - c^-)
     630             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, donor_state%matrix_aux, &
     631           6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     632             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tmp_mat2, &
     633           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     634           6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, c_diff, c_sum, ncol=nrow)
     635         150 :          WHERE (tmp_evals .NE. 0) scaling = -1.0_dp/tmp_evals
     636           6 :          CALL cp_fm_column_scale(c_sum, scaling)
     637             : 
     638             : !        Full TDDFT specific clean-up
     639           6 :          CALL cp_fm_release(c_diff)
     640           6 :          CALL dbcsr_release(tmp_mat)
     641           6 :          CALL dbcsr_release(tmp_mat2)
     642          18 :          DEALLOCATE (scaling)
     643             : 
     644             :       END IF ! TDA
     645             : 
     646             : !     Full matrix clean-up
     647          60 :       CALL cp_fm_release(rhs_matrix)
     648          60 :       CALL cp_fm_release(work)
     649             : 
     650             : !  Reorganize the eigenvalues, we want a lr_evals array with the proper dimension and where the
     651             : !  first element is the first eval. Need a case study on do_range/ot
     652          60 :       IF (xas_tdp_control%do_ot) THEN
     653             : 
     654           4 :          nevals = ot_nevals
     655             : 
     656          56 :       ELSE IF (do_range) THEN
     657             : 
     658          94 :          WHERE (tmp_evals > tmp_evals(first_ex) + xas_tdp_control%e_range) tmp_evals = 0.0_dp
     659          48 :          nevals = MAXLOC(tmp_evals, 1) - nocc
     660             : 
     661             :       ELSE
     662             : 
     663             :          !Determine the number of evals to keep base on N_EXCITED
     664          54 :          nevals = nspins*nao - nocc/ndo_mo
     665          54 :          IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < nevals) THEN
     666             :             nevals = xas_tdp_control%n_excited
     667             :          END IF
     668          54 :          nevals = ndo_mo*nevals !as in input description, multiply by # of donor MOs
     669             : 
     670             :       END IF
     671             : 
     672         180 :       ALLOCATE (lr_evals(nevals))
     673         964 :       lr_evals(:) = tmp_evals(first_ex:first_ex + nevals - 1)
     674             : 
     675             : !  Reorganize the eigenvectors in array of cp_fm so that each ndo_mo columns corresponds to an
     676             : !  excited state. Makes later calls to those easier and more efficient
     677             : !  In case of open-shell, we store the coeffs in the same logic as the matrix => first block where
     678             : !  the columns are the c_Ialpha and second block with columns as c_Ibeta
     679             :       CALL cp_fm_struct_create(ex_struct, nrow_global=nao, ncol_global=ndo_mo*nspins*nevals, &
     680          60 :                                para_env=para_env, context=blacs_env)
     681          60 :       ALLOCATE (lr_coeffs)
     682          60 :       CALL cp_fm_create(lr_coeffs, ex_struct)
     683             : 
     684         964 :       DO i = 1, nevals
     685        2100 :          DO ispin = 1, nspins
     686        3464 :             DO imo = 1, ndo_mo
     687             : 
     688             :                CALL cp_fm_to_fm_submat(msource=c_sum, mtarget=lr_coeffs, &
     689             :                                        nrow=nao, ncol=1, s_firstrow=((ispin - 1)*ndo_mo + imo - 1)*nao + 1, &
     690             :                                        s_firstcol=first_ex + i - 1, t_firstrow=1, &
     691        2560 :                                        t_firstcol=(i - 1)*ndo_mo*nspins + (ispin - 1)*ndo_mo + imo)
     692             :             END DO !imo
     693             :          END DO !ispin
     694             :       END DO !istate
     695             : 
     696          60 :       IF (ex_type == tddfpt_spin_cons) THEN
     697           8 :          donor_state%sc_coeffs => lr_coeffs
     698           8 :          donor_state%sc_evals => lr_evals
     699          52 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     700           2 :          donor_state%sf_coeffs => lr_coeffs
     701           2 :          donor_state%sf_evals => lr_evals
     702          50 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     703          48 :          donor_state%sg_coeffs => lr_coeffs
     704          48 :          donor_State%sg_evals => lr_evals
     705           2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     706           2 :          donor_state%tp_coeffs => lr_coeffs
     707           2 :          donor_state%tp_evals => lr_evals
     708             :       END IF
     709             : 
     710             : !  Clean-up
     711          60 :       CALL cp_fm_release(c_sum)
     712          60 :       CALL cp_fm_struct_release(fm_struct)
     713          60 :       CALL cp_fm_struct_release(ex_struct)
     714             : 
     715             : !  Perform a partial clean-up of the donor_state
     716          60 :       CALL dbcsr_release(matrix_tdp)
     717             : 
     718          60 :       CALL timestop(handle)
     719             : 
     720         300 :    END SUBROUTINE solve_xas_tdp_prob
     721             : 
     722             : ! **************************************************************************************************
     723             : !> \brief An iterative solver based on OT for the TDA generalized eigV problem lambda Sx = Hx
     724             : !> \param matrix_tdp the RHS matrix (dbcsr)
     725             : !> \param metric the LHS matrix (dbcsr)
     726             : !> \param evecs the corresponding eigenvectors (fm)
     727             : !> \param evals the corresponding eigenvalues
     728             : !> \param neig the number of wanted eigenvalues
     729             : !> \param do_sf whther spin-flip TDDFT is on
     730             : !> \param donor_state ...
     731             : !> \param xas_tdp_env ...
     732             : !> \param xas_tdp_control ...
     733             : !> \param qs_env ...
     734             : ! **************************************************************************************************
     735           4 :    SUBROUTINE xas_ot_solver(matrix_tdp, metric, evecs, evals, neig, do_sf, donor_state, xas_tdp_env, &
     736             :                             xas_tdp_control, qs_env)
     737             : 
     738             :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp, metric
     739             :       TYPE(cp_fm_type), INTENT(IN)                       :: evecs
     740             :       REAL(dp), DIMENSION(:)                             :: evals
     741             :       INTEGER, INTENT(IN)                                :: neig
     742             :       LOGICAL                                            :: do_sf
     743             :       TYPE(donor_state_type), POINTER                    :: donor_state
     744             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     745             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     746             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     747             : 
     748             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xas_ot_solver'
     749             : 
     750             :       INTEGER                                            :: handle, max_iter, ndo_mo, nelec_spin(2), &
     751             :                                                             nocc, nrow, output_unit
     752             :       LOGICAL                                            :: do_os
     753             :       REAL(dp)                                           :: eps_iter
     754             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     755             :       TYPE(cp_fm_struct_type), POINTER                   :: ortho_struct
     756             :       TYPE(cp_fm_type)                                   :: ortho_space
     757             :       TYPE(dbcsr_type), POINTER                          :: ot_prec
     758             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     759             :       TYPE(preconditioner_type), POINTER                 :: precond
     760             : 
     761           4 :       NULLIFY (para_env, blacs_env, ortho_struct, ot_prec)
     762             : 
     763           4 :       CALL timeset(routineN, handle)
     764             : 
     765           4 :       output_unit = cp_logger_get_default_io_unit()
     766           4 :       IF (output_unit > 0) THEN
     767             :          WRITE (output_unit, "(/,T5,A)") &
     768           2 :             "Using OT eigensolver for diagonalization: "
     769             :       END IF
     770             : 
     771           4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     772           4 :       ndo_mo = donor_state%ndo_mo
     773           4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_spin=nelec_spin)
     774           4 :       CALL cp_fm_get_info(evecs, nrow_global=nrow)
     775           4 :       max_iter = xas_tdp_control%ot_max_iter
     776           4 :       eps_iter = xas_tdp_control%ot_eps_iter
     777           4 :       nocc = nelec_spin(1)/2*ndo_mo
     778           4 :       IF (do_os) nocc = SUM(nelec_spin)*ndo_mo
     779             : 
     780             : !  Initialize relevent matrices
     781           4 :       ALLOCATE (ot_prec)
     782           4 :       CALL dbcsr_create(ot_prec, template=matrix_tdp)
     783             :       CALL cp_fm_struct_create(ortho_struct, context=blacs_env, para_env=para_env, &
     784           4 :                                nrow_global=nrow, ncol_global=nocc)
     785           4 :       CALL cp_fm_create(ortho_space, ortho_struct)
     786             : 
     787             :       CALL prep_for_ot(evecs, ortho_space, ot_prec, neig, do_sf, donor_state, xas_tdp_env, &
     788           4 :                        xas_tdp_control, qs_env)
     789             : 
     790             : !  Prepare the preconditioner
     791           4 :       ALLOCATE (precond)
     792           4 :       CALL init_preconditioner(precond, para_env, blacs_env)
     793           4 :       precond%in_use = ot_precond_full_single ! because applying this conditioner is only a mm
     794           4 :       precond%dbcsr_matrix => ot_prec
     795             : 
     796             : !  Actually solving the eigenvalue problem
     797             :       CALL ot_eigensolver(matrix_h=matrix_tdp, matrix_s=metric, matrix_c_fm=evecs, &
     798             :                           eps_gradient=eps_iter, iter_max=max_iter, silent=.FALSE., &
     799             :                           ot_settings=xas_tdp_control%ot_settings, &
     800             :                           matrix_orthogonal_space_fm=ortho_space, &
     801           4 :                           preconditioner=precond)
     802           4 :       CALL calculate_subspace_eigenvalues(evecs, matrix_tdp, evals_arg=evals)
     803             : 
     804             : !  Clean-up
     805           4 :       CALL cp_fm_struct_release(ortho_struct)
     806           4 :       CALL cp_fm_release(ortho_space)
     807           4 :       CALL dbcsr_release(ot_prec)
     808           4 :       CALL destroy_preconditioner(precond)
     809           4 :       DEALLOCATE (precond)
     810             : 
     811           4 :       CALL timestop(handle)
     812             : 
     813           4 :    END SUBROUTINE xas_ot_solver
     814             : 
     815             : ! **************************************************************************************************
     816             : !> \brief Prepares all required matrices for the OT eigensolver (precond, ortho space and guesses)
     817             : !> \param guess the guess eigenvectors absed on LUMOs, in fm format
     818             : !> \param ortho the orthogonal space in fm format (occupied MOs)
     819             : !> \param precond the OT preconditioner in DBCSR format
     820             : !> \param neig ...
     821             : !> \param do_sf ...
     822             : !> \param donor_state ...
     823             : !> \param xas_tdp_env ...
     824             : !> \param xas_tdp_control ...
     825             : !> \param qs_env ...
     826             : !> \note Matrices are allocate before entry
     827             : ! **************************************************************************************************
     828           8 :    SUBROUTINE prep_for_ot(guess, ortho, precond, neig, do_sf, donor_state, xas_tdp_env, &
     829             :                           xas_tdp_control, qs_env)
     830             : 
     831             :       TYPE(cp_fm_type), INTENT(IN)                       :: guess, ortho
     832             :       TYPE(dbcsr_type)                                   :: precond
     833             :       INTEGER                                            :: neig
     834             :       LOGICAL                                            :: do_sf
     835             :       TYPE(donor_state_type), POINTER                    :: donor_state
     836             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     837             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     838             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     839             : 
     840             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'prep_for_ot'
     841             : 
     842             :       INTEGER :: blk, handle, i, iblk, ido_mo, ispin, jblk, maxel, minel, nao, natom, ndo_mo, &
     843             :          nelec_spin(2), nhomo(2), nlumo(2), nspins, start_block, start_col, start_row
     844             :       LOGICAL                                            :: do_os, found
     845           4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
     846             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
     847             :       TYPE(dbcsr_iterator_type)                          :: iter
     848           4 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
     849             : 
     850           4 :       NULLIFY (mos, mo_coeff, pblock)
     851             : 
     852             :       !REMINDER on the organization of the xas_tdp matrix. It is DBCSR format, with a super bock
     853             :       !structure. First block structure is spin quadrants: upper left is alpha-alpha spin and lower
     854             :       !right is beta-beta spin. Then each quadrants is divided in a ndo_mo x ndo_mo grid (1x1 for 1s,
     855             :       !2s, 3x3 for 2p). Each block in this grid has the normal DBCSR structure and dist, simply
     856             :       !replicated. The resulting eigenvectors follow the same logic.
     857             : 
     858           4 :       CALL timeset(routineN, handle)
     859             : 
     860           4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     861           0 :       nspins = 1; IF (do_os) nspins = 2
     862           4 :       ndo_mo = donor_state%ndo_mo
     863           4 :       CALL cp_fm_get_info(xas_tdp_env%lumo_evecs(1), nrow_global=nao)
     864           4 :       CALL get_qs_env(qs_env, natom=natom, nelectron_spin=nelec_spin)
     865             : 
     866             :       !Compute the number of guesses for each spins
     867           4 :       IF (do_os) THEN
     868           0 :          minel = MINLOC(nelec_spin, 1)
     869           0 :          maxel = 3 - minel
     870           0 :          nlumo(minel) = (neig/ndo_mo + nelec_spin(maxel) - nelec_spin(minel))/2
     871           0 :          nlumo(maxel) = neig/ndo_mo - nlumo(minel)
     872             :       ELSE
     873           4 :          nlumo(1) = neig/ndo_mo
     874             :       END IF
     875             : 
     876             :       !Building the guess vectors based on the LUMOs. Copy LUMOs into approriate spin/do_mo
     877             :       !quadrant/block. Order within a block does not matter
     878             :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so guess are alpha LUMOs
     879             :       start_row = 0
     880             :       start_col = 0
     881           8 :       DO ispin = 1, nspins
     882          12 :          DO ido_mo = 1, ndo_mo
     883             : 
     884             :             CALL cp_fm_to_fm_submat(msource=xas_tdp_env%lumo_evecs(ispin), mtarget=guess, &
     885             :                                     nrow=nao, ncol=nlumo(ispin), s_firstrow=1, s_firstcol=1, &
     886           4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     887             : 
     888           4 :             start_row = start_row + nao
     889           8 :             start_col = start_col + nlumo(ispin)
     890             : 
     891             :          END DO
     892             :       END DO
     893             : 
     894             :       !Build the orthogonal space according to the same principles, but based on occupied MOs
     895             :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so ortho space is beta HOMOs
     896           4 :       CALL get_qs_env(qs_env, mos=mos)
     897           4 :       nhomo = 0
     898           8 :       DO ispin = 1, nspins
     899           8 :          CALL get_mo_set(mos(ispin), homo=nhomo(ispin))
     900             :       END DO
     901             : 
     902             :       start_row = 0
     903             :       start_col = 0
     904           8 :       DO i = 1, nspins
     905           4 :          ispin = i; IF (do_sf) ispin = 3 - i
     906           4 :          CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff)
     907             : 
     908          12 :          DO ido_mo = 1, ndo_mo
     909             : 
     910             :             CALL cp_fm_to_fm_submat(msource=mo_coeff, mtarget=ortho, nrow=nao, ncol=nhomo(ispin), &
     911             :                                     s_firstrow=1, s_firstcol=1, &
     912           4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     913             : 
     914           4 :             start_row = start_row + nao
     915           8 :             start_col = start_col + nhomo(ispin)
     916             : 
     917             :          END DO
     918             :       END DO
     919             : 
     920             :       !Build the preconditioner. Copy the "canonical" pre-computed matrix into the proper spin/do_mo
     921             :       !quadrants/blocks. The end matrix is purely block diagonal
     922           8 :       DO ispin = 1, nspins
     923             : 
     924           4 :          CALL dbcsr_iterator_start(iter, xas_tdp_env%ot_prec(ispin)%matrix)
     925        9316 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     926             : 
     927        9312 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
     928             : 
     929        9312 :             CALL dbcsr_get_block_p(xas_tdp_env%ot_prec(ispin)%matrix, iblk, jblk, pblock, found)
     930             : 
     931        9316 :             IF (found) THEN
     932             : 
     933        9312 :                start_block = (ispin - 1)*ndo_mo*natom
     934       18624 :                DO ido_mo = 1, ndo_mo
     935        9312 :                   CALL dbcsr_put_block(precond, start_block + iblk, start_block + jblk, pblock)
     936             : 
     937       18624 :                   start_block = start_block + natom
     938             : 
     939             :                END DO
     940             :             END IF
     941             : 
     942             :          END DO !dbcsr iter
     943          12 :          CALL dbcsr_iterator_stop(iter)
     944             :       END DO
     945             : 
     946           4 :       CALL dbcsr_finalize(precond)
     947             : 
     948           4 :       CALL timestop(handle)
     949             : 
     950           4 :    END SUBROUTINE prep_for_ot
     951             : 
     952             : ! **************************************************************************************************
     953             : !> \brief Returns the scaling to apply to normalize the LR eigenvectors.
     954             : !> \param scaling the scaling array to apply
     955             : !> \param lr_coeffs the linear response coefficients as a fm
     956             : !> \param donor_state ...
     957             : !> \note The LR coeffs are normalized when c^T G c = +- 1, G is the metric, c = c^- for TDA and
     958             : !>       c = c^+ - c^- for the full problem
     959             : ! **************************************************************************************************
     960           6 :    SUBROUTINE get_normal_scaling(scaling, lr_coeffs, donor_state)
     961             : 
     962             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling
     963             :       TYPE(cp_fm_type), INTENT(IN)                       :: lr_coeffs
     964             :       TYPE(donor_state_type), POINTER                    :: donor_state
     965             : 
     966             :       INTEGER                                            :: nrow, nscal, nvals
     967             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
     968             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     969             :       TYPE(cp_fm_struct_type), POINTER                   :: norm_struct, work_struct
     970             :       TYPE(cp_fm_type)                                   :: fm_norm, work
     971             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     972             : 
     973           6 :       NULLIFY (para_env, blacs_env, norm_struct, work_struct)
     974             : 
     975             : !  Creating the matrix structures and initializing the work matrices
     976             :       CALL cp_fm_get_info(lr_coeffs, context=blacs_env, para_env=para_env, &
     977           6 :                           matrix_struct=work_struct, ncol_global=nvals, nrow_global=nrow)
     978             :       CALL cp_fm_struct_create(norm_struct, para_env=para_env, context=blacs_env, &
     979           6 :                                nrow_global=nvals, ncol_global=nvals)
     980             : 
     981           6 :       CALL cp_fm_create(work, work_struct)
     982           6 :       CALL cp_fm_create(fm_norm, norm_struct)
     983             : 
     984             : !  Taking c^T * G * C
     985           6 :       CALL cp_dbcsr_sm_fm_multiply(donor_state%metric(1)%matrix, lr_coeffs, work, ncol=nvals)
     986           6 :       CALL parallel_gemm('T', 'N', nvals, nvals, nrow, 1.0_dp, lr_coeffs, work, 0.0_dp, fm_norm)
     987             : 
     988             : !  Computing the needed scaling
     989          18 :       ALLOCATE (diag(nvals))
     990           6 :       CALL cp_fm_get_diag(fm_norm, diag)
     991         150 :       WHERE (ABS(diag) > 1.0E-8_dp) diag = 1.0_dp/SQRT(ABS(diag))
     992             : 
     993           6 :       nscal = SIZE(scaling)
     994         150 :       scaling(1:nscal) = diag(1:nscal)
     995             : 
     996             : !  Clean-up
     997           6 :       CALL cp_fm_release(work)
     998           6 :       CALL cp_fm_release(fm_norm)
     999           6 :       CALL cp_fm_struct_release(norm_struct)
    1000             : 
    1001          18 :    END SUBROUTINE get_normal_scaling
    1002             : 
    1003             : ! **************************************************************************************************
    1004             : !> \brief This subroutine computes the row/column block structure as well as the dbcsr ditrinution
    1005             : !>        for the submatrices making up the generalized XAS TDP eigenvalue problem. They all share
    1006             : !>        the same properties, which are based on the replication of the KS matrix. Stored in the
    1007             : !>        donor_state_type
    1008             : !> \param donor_state ...
    1009             : !> \param do_os whether this is a open-shell calculation
    1010             : !> \param qs_env ...
    1011             : ! **************************************************************************************************
    1012          56 :    SUBROUTINE compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
    1013             : 
    1014             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1015             :       LOGICAL, INTENT(IN)                                :: do_os
    1016             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1017             : 
    1018             :       INTEGER                                            :: group, i, nao, nblk_row, ndo_mo, nspins, &
    1019             :                                                             scol_dist, srow_dist
    1020          56 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_sub, row_blk_size, &
    1021          56 :                                                             row_dist, row_dist_sub, submat_blk_size
    1022          56 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1023             :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, submat_dist
    1024          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks
    1025             : 
    1026          56 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, row_dist, col_dist, pgrid, col_dist_sub)
    1027          56 :       NULLIFY (row_dist_sub, submat_dist, submat_blk_size)
    1028             : 
    1029             : !  The submatrices are indexed by M_{pi sig,qj tau}, where p,q label basis functions and i,j donor
    1030             : !  MOs and sig,tau the spins. For each spin and donor MOs combination, one has a submatrix of the
    1031             : !  size of the KS matrix (nao x nao) with the same dbcsr block structure
    1032             : 
    1033             : !  Initialization
    1034          56 :       ndo_mo = donor_state%ndo_mo
    1035          56 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, dbcsr_dist=dbcsr_dist)
    1036          56 :       CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=row_blk_size)
    1037             :       CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
    1038          56 :                                   pgrid=pgrid)
    1039         658 :       nao = SUM(row_blk_size)
    1040          56 :       nblk_row = SIZE(row_blk_size)
    1041          56 :       srow_dist = SIZE(row_dist)
    1042          56 :       scol_dist = SIZE(col_dist)
    1043          56 :       nspins = 1; IF (do_os) nspins = 2
    1044             : 
    1045             : !  Creation if submatrix block size and col/row distribution
    1046         168 :       ALLOCATE (submat_blk_size(ndo_mo*nspins*nblk_row))
    1047         168 :       ALLOCATE (row_dist_sub(ndo_mo*nspins*srow_dist))
    1048         168 :       ALLOCATE (col_dist_sub(ndo_mo*nspins*scol_dist))
    1049             : 
    1050         132 :       DO i = 1, ndo_mo*nspins
    1051        1416 :          submat_blk_size((i - 1)*nblk_row + 1:i*nblk_row) = row_blk_size
    1052        1416 :          row_dist_sub((i - 1)*srow_dist + 1:i*srow_dist) = row_dist
    1053        1472 :          col_dist_sub((i - 1)*scol_dist + 1:i*scol_dist) = col_dist
    1054             :       END DO
    1055             : 
    1056             : !  Create the submatrix dbcsr distribution
    1057          56 :       ALLOCATE (submat_dist)
    1058             :       CALL dbcsr_distribution_new(submat_dist, group=group, pgrid=pgrid, row_dist=row_dist_sub, &
    1059          56 :                                   col_dist=col_dist_sub)
    1060             : 
    1061          56 :       donor_state%dbcsr_dist => submat_dist
    1062          56 :       donor_state%blk_size => submat_blk_size
    1063             : 
    1064             : !  Clean-up
    1065          56 :       DEALLOCATE (col_dist_sub, row_dist_sub)
    1066             : 
    1067         168 :    END SUBROUTINE compute_submat_dist_and_blk_size
    1068             : 
    1069             : ! **************************************************************************************************
    1070             : !> \brief Returns the projector on the unperturbed unoccupied ground state Q = 1 - SP on the block
    1071             : !>        diagonal of a matrix with the standard size and distribution.
    1072             : !> \param proj_Q the matrix with the projector
    1073             : !> \param donor_state ...
    1074             : !> \param do_os whether it is open-shell calculation
    1075             : !> \param xas_tdp_env ...
    1076             : !> \param do_sf whether the projector should be prepared for spin-flip excitations
    1077             : !> \note In the spin-flip case, the alpha spins are sent to beta and vice-versa. The structure of
    1078             : !>       the projector is changed by swapping the alpha-alpha with the beta-beta block, which
    1079             : !>       naturally take the spin change into account. Only for open-shell.
    1080             : ! **************************************************************************************************
    1081          58 :    SUBROUTINE get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env, do_sf)
    1082             : 
    1083             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q
    1084             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1085             :       LOGICAL, INTENT(IN)                                :: do_os
    1086             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1087             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1088             : 
    1089             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_q_projector'
    1090             : 
    1091             :       INTEGER                                            :: blk, handle, iblk, imo, ispin, jblk, &
    1092             :                                                             nblk_row, ndo_mo, nspins
    1093          58 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_q, row_blk_size
    1094             :       LOGICAL                                            :: found_block, my_dosf
    1095          58 :       REAL(dp), DIMENSION(:), POINTER                    :: work_block
    1096             :       TYPE(dbcsr_distribution_type), POINTER             :: dist_q
    1097             :       TYPE(dbcsr_iterator_type)                          :: iter
    1098             :       TYPE(dbcsr_type), POINTER                          :: one_sp
    1099             : 
    1100          58 :       NULLIFY (work_block, one_sp, row_blk_size, dist_q, blk_size_q)
    1101             : 
    1102          58 :       CALL timeset(routineN, handle)
    1103             : 
    1104             : !  Initialization
    1105          58 :       nspins = 1; IF (do_os) nspins = 2
    1106          58 :       ndo_mo = donor_state%ndo_mo
    1107          58 :       one_sp => xas_tdp_env%q_projector(1)%matrix
    1108          58 :       CALL dbcsr_get_info(one_sp, row_blk_size=row_blk_size)
    1109          58 :       nblk_row = SIZE(row_blk_size)
    1110          58 :       my_dosf = .FALSE.
    1111          58 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1112          58 :       dist_q => donor_state%dbcsr_dist
    1113          58 :       blk_size_q => donor_state%blk_size
    1114             : 
    1115             :       ! the projector is not symmetric
    1116             :       CALL dbcsr_create(matrix=proj_Q, name="PROJ Q", matrix_type=dbcsr_type_no_symmetry, dist=dist_q, &
    1117          58 :                         row_blk_size=blk_size_q, col_blk_size=blk_size_q)
    1118             : 
    1119             : !  Fill the projector by looping over 1-SP and duplicating blocks. (all on the spin-MO block diagonal)
    1120         126 :       DO ispin = 1, nspins
    1121          68 :          one_sp => xas_tdp_env%q_projector(ispin)%matrix
    1122             : 
    1123             :          !if spin-flip, swap the alpha-alpha and beta-beta blocks
    1124          68 :          IF (my_dosf) one_sp => xas_tdp_env%q_projector(3 - ispin)%matrix
    1125             : 
    1126          68 :          CALL dbcsr_iterator_start(iter, one_sp)
    1127       19154 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1128             : 
    1129       19086 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1130             : 
    1131             :             ! get the block
    1132       19086 :             CALL dbcsr_get_block_p(one_sp, iblk, jblk, work_block, found_block)
    1133             : 
    1134       19086 :             IF (found_block) THEN
    1135             : 
    1136       38182 :                DO imo = 1, ndo_mo
    1137             :                   CALL dbcsr_put_block(proj_Q, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1138       38182 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1139             :                END DO
    1140             : 
    1141             :             END IF
    1142       19086 :             NULLIFY (work_block)
    1143             : 
    1144             :          END DO !iterator
    1145         194 :          CALL dbcsr_iterator_stop(iter)
    1146             :       END DO !ispin
    1147             : 
    1148          58 :       CALL dbcsr_finalize(proj_Q)
    1149             : 
    1150          58 :       CALL timestop(handle)
    1151             : 
    1152          58 :    END SUBROUTINE get_q_projector
    1153             : 
    1154             : ! **************************************************************************************************
    1155             : !> \brief Builds the matrix containing the ground state contribution to the matrix_tdp (aka matrix A)
    1156             : !>         => A_{pis,qjt} = (F_pq*delta_ij - epsilon_ij*S_pq) delta_st, where:
    1157             : !>         F is the KS matrix
    1158             : !>         S is the overlap matrix
    1159             : !>         epsilon_ij is the donor MO eigenvalue
    1160             : !>         i,j labels the MOs, p,q the AOs and s,t the spins
    1161             : !> \param matrix_a  pointer to a DBCSR matrix containing A
    1162             : !> \param donor_state ...
    1163             : !> \param do_os ...
    1164             : !> \param qs_env ...
    1165             : !> \param do_sf whether the ground state contribution should accommodate spin-flip
    1166             : !> \note Even localized non-canonical MOs are diagonalized in their subsapce => eps_ij = eps_ii*delta_ij
    1167             : !>       Use GW2X corrected evals as eps_ii. If not GW2X correction, these are the default KS energies
    1168             : ! **************************************************************************************************
    1169          58 :    SUBROUTINE build_gs_contribution(matrix_a, donor_state, do_os, qs_env, do_sf)
    1170             : 
    1171             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a
    1172             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1173             :       LOGICAL, INTENT(IN)                                :: do_os
    1174             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1175             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1176             : 
    1177             :       CHARACTER(len=*), PARAMETER :: routineN = 'build_gs_contribution'
    1178             : 
    1179             :       INTEGER                                            :: blk, handle, iblk, imo, ispin, jblk, &
    1180             :                                                             nblk_row, ndo_mo, nspins
    1181          58 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_a, row_blk_size
    1182             :       LOGICAL                                            :: found_block, my_dosf
    1183          58 :       REAL(dp), DIMENSION(:), POINTER                    :: work_block
    1184             :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, dist_a
    1185             :       TYPE(dbcsr_iterator_type)                          :: iter
    1186          58 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: m_ks, matrix_ks, matrix_s
    1187             :       TYPE(dbcsr_type)                                   :: work_matrix
    1188             : 
    1189          58 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, work_block, matrix_s, m_ks)
    1190          58 :       NULLIFY (dist_a, blk_size_a)
    1191             : 
    1192             :       !  Note: matrix A is symmetric and block diagonal. If ADMM, the ks matrix is the total one,
    1193             :       !        and it is corrected for eigenvalues (done at xas_tdp_init)
    1194             : 
    1195          58 :       CALL timeset(routineN, handle)
    1196             : 
    1197             : !  Initialization
    1198          58 :       nspins = 1; IF (do_os) nspins = 2
    1199          58 :       ndo_mo = donor_state%ndo_mo
    1200          58 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, matrix_s=matrix_s, dbcsr_dist=dbcsr_dist)
    1201          58 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1202          58 :       nblk_row = SIZE(row_blk_size)
    1203          58 :       dist_a => donor_state%dbcsr_dist
    1204          58 :       blk_size_a => donor_state%blk_size
    1205             : 
    1206             : !  Prepare the KS matrix pointer
    1207         184 :       ALLOCATE (m_ks(nspins))
    1208          58 :       m_ks(1)%matrix => matrix_ks(1)%matrix
    1209          58 :       IF (do_os) m_ks(2)%matrix => matrix_ks(2)%matrix
    1210             : 
    1211             : ! If spin-flip, swap the KS alpha-alpha and beta-beta quadrants.
    1212          58 :       my_dosf = .FALSE.
    1213          58 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1214           2 :       IF (my_dosf .AND. do_os) THEN
    1215           2 :          m_ks(1)%matrix => matrix_ks(2)%matrix
    1216           2 :          m_ks(2)%matrix => matrix_ks(1)%matrix
    1217             :       END IF
    1218             : 
    1219             : !  Creating the symmetric matrix A (and work)
    1220             :       CALL dbcsr_create(matrix=matrix_a, name="MATRIX A", matrix_type=dbcsr_type_symmetric, &
    1221          58 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1222             :       CALL dbcsr_create(matrix=work_matrix, name="WORK MAT", matrix_type=dbcsr_type_symmetric, &
    1223          58 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1224             : 
    1225         126 :       DO ispin = 1, nspins
    1226             : 
    1227             : !     Loop over the blocks of KS and put them on the spin-MO block diagonal of matrix A
    1228          68 :          CALL dbcsr_iterator_start(iter, m_ks(ispin)%matrix)
    1229        9415 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1230             : 
    1231        9347 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1232             : 
    1233             : !           Get the block
    1234        9347 :             CALL dbcsr_get_block_p(m_ks(ispin)%matrix, iblk, jblk, work_block, found_block)
    1235             : 
    1236        9347 :             IF (found_block) THEN
    1237             : 
    1238             : !              The KS matrix only appears on diagonal of matrix A => loop over II donor MOs
    1239       18704 :                DO imo = 1, ndo_mo
    1240             : 
    1241             : !                 Put the block as it is
    1242             :                   CALL dbcsr_put_block(matrix_a, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1243       18704 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1244             : 
    1245             :                END DO !imo
    1246             :             END IF !found_block
    1247        9347 :             NULLIFY (work_block)
    1248             :          END DO ! iteration on KS blocks
    1249          68 :          CALL dbcsr_iterator_stop(iter)
    1250             : 
    1251             : !     Loop over the blocks of S and put them on the block diagonal of work
    1252             : 
    1253          68 :          CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1254        9415 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1255             : 
    1256        9347 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1257             : 
    1258             : !           Get the block
    1259        9347 :             CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1260             : 
    1261        9347 :             IF (found_block) THEN
    1262             : 
    1263             : !              Add S matrix on block diagonal as epsilon_ii*S_pq
    1264       18704 :                DO imo = 1, ndo_mo
    1265             : 
    1266             :                   CALL dbcsr_put_block(work_matrix, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1267             :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, &
    1268      210760 :                                        donor_state%gw2x_evals(imo, ispin)*work_block)
    1269             :                END DO !imo
    1270             :             END IF !found block
    1271        9347 :             NULLIFY (work_block)
    1272             :          END DO ! iteration on S blocks
    1273         262 :          CALL dbcsr_iterator_stop(iter)
    1274             : 
    1275             :       END DO !ispin
    1276          58 :       CALL dbcsr_finalize(matrix_a)
    1277          58 :       CALL dbcsr_finalize(work_matrix)
    1278             : 
    1279             : !  Take matrix_a = matrix_a - work
    1280          58 :       CALL dbcsr_add(matrix_a, work_matrix, 1.0_dp, -1.0_dp)
    1281          58 :       CALL dbcsr_finalize(matrix_a)
    1282             : 
    1283             : !  Clean-up
    1284          58 :       CALL dbcsr_release(work_matrix)
    1285          58 :       DEALLOCATE (m_ks)
    1286             : 
    1287          58 :       CALL timestop(handle)
    1288             : 
    1289          58 :    END SUBROUTINE build_gs_contribution
    1290             : 
    1291             : ! **************************************************************************************************
    1292             : !> \brief Creates the metric (aka  matrix G) needed for the generalized eigenvalue problem and inverse
    1293             : !>         => G_{pis,qjt} = S_pq*delta_ij*delta_st
    1294             : !> \param matrix_g dbcsr matrix containing G
    1295             : !> \param donor_state ...
    1296             : !> \param qs_env ...
    1297             : !> \param do_os if open-shell calculation
    1298             : !> \param do_inv if the inverse of G should be computed
    1299             : ! **************************************************************************************************
    1300         168 :    SUBROUTINE build_metric(matrix_g, donor_state, qs_env, do_os, do_inv)
    1301             : 
    1302             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_g
    1303             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1304             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1305             :       LOGICAL, INTENT(IN)                                :: do_os
    1306             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_inv
    1307             : 
    1308             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_metric'
    1309             : 
    1310             :       INTEGER                                            :: blk, handle, i, iblk, jblk, nao, &
    1311             :                                                             nblk_row, ndo_mo, nspins
    1312          56 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_g, row_blk_size
    1313             :       LOGICAL                                            :: found_block, my_do_inv
    1314          56 :       REAL(dp), DIMENSION(:), POINTER                    :: work_block
    1315             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1316             :       TYPE(dbcsr_distribution_type), POINTER             :: dist_g
    1317             :       TYPE(dbcsr_iterator_type)                          :: iter
    1318          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1319             :       TYPE(dbcsr_type)                                   :: matrix_sinv
    1320             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1321             : 
    1322          56 :       NULLIFY (matrix_s, row_blk_size, work_block, para_env, blacs_env, dist_g, blk_size_g)
    1323             : 
    1324          56 :       CALL timeset(routineN, handle)
    1325             : 
    1326             : !  Initialization
    1327          56 :       nspins = 1; IF (do_os) nspins = 2
    1328          56 :       ndo_mo = donor_state%ndo_mo
    1329          56 :       CALL get_qs_env(qs_env=qs_env, matrix_s=matrix_s)
    1330          56 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size, nfullrows_total=nao)
    1331          56 :       nblk_row = SIZE(row_blk_size)
    1332          56 :       my_do_inv = .FALSE.
    1333          56 :       IF (PRESENT(do_inv)) my_do_inv = do_inv
    1334          56 :       dist_g => donor_state%dbcsr_dist
    1335          56 :       blk_size_g => donor_state%blk_size
    1336             : 
    1337             : !  Creating the symmetric  matrices G and G^-1 with the right size and distribution
    1338          56 :       ALLOCATE (matrix_g(1)%matrix)
    1339             :       CALL dbcsr_create(matrix=matrix_g(1)%matrix, name="MATRIX G", matrix_type=dbcsr_type_symmetric, &
    1340          56 :                         dist=dist_g, row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1341             : 
    1342             : !  Fill the matrices G by looping over the block of S and putting them on the diagonal
    1343          56 :       CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1344        9384 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1345             : 
    1346        9328 :          CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1347             : 
    1348             : !        Get the block
    1349        9328 :          CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1350             : 
    1351        9328 :          IF (found_block) THEN
    1352             : 
    1353             : !           Go over the diagonal of G => donor MOs ii, spin ss
    1354       18679 :             DO i = 1, ndo_mo*nspins
    1355       18679 :                CALL dbcsr_put_block(matrix_g(1)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1356             :             END DO
    1357             : 
    1358             :          END IF
    1359        9328 :          NULLIFY (work_block)
    1360             : 
    1361             :       END DO ! dbcsr_iterator
    1362          56 :       CALL dbcsr_iterator_stop(iter)
    1363             : 
    1364             : !  Finalize
    1365          56 :       CALL dbcsr_finalize(matrix_g(1)%matrix)
    1366             : 
    1367             : !  If the inverse of G is required, do the same as above with the inverse
    1368          56 :       IF (my_do_inv) THEN
    1369             : 
    1370           6 :          CPASSERT(SIZE(matrix_g) == 2)
    1371             : 
    1372             :          ! Create the matrix
    1373           6 :          ALLOCATE (matrix_g(2)%matrix)
    1374             :          CALL dbcsr_create(matrix=matrix_g(2)%matrix, name="MATRIX GINV", &
    1375             :                            matrix_type=dbcsr_type_symmetric, dist=dist_g, &
    1376           6 :                            row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1377             : 
    1378             :          ! Invert the overlap matrix
    1379           6 :          CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1380           6 :          CALL dbcsr_copy(matrix_sinv, matrix_s(1)%matrix)
    1381           6 :          CALL cp_dbcsr_cholesky_decompose(matrix_sinv, para_env=para_env, blacs_env=blacs_env)
    1382           6 :          CALL cp_dbcsr_cholesky_invert(matrix_sinv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.TRUE.)
    1383             : 
    1384             : !     Fill the matrices G^-1 by looping over the block of S^-1 and putting them on the diagonal
    1385           6 :          CALL dbcsr_iterator_start(iter, matrix_sinv)
    1386          24 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1387             : 
    1388          18 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk, blk=blk)
    1389             : 
    1390             : !           Get the block
    1391          18 :             CALL dbcsr_get_block_p(matrix_sinv, iblk, jblk, work_block, found_block)
    1392             : 
    1393          18 :             IF (found_block) THEN
    1394             : 
    1395             : !              Go over the diagonal of G => donor MOs ii spin ss
    1396          36 :                DO i = 1, ndo_mo*nspins
    1397          36 :                   CALL dbcsr_put_block(matrix_g(2)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1398             :                END DO
    1399             : 
    1400             :             END IF
    1401          18 :             NULLIFY (work_block)
    1402             : 
    1403             :          END DO ! dbcsr_iterator
    1404           6 :          CALL dbcsr_iterator_stop(iter)
    1405             : 
    1406             :          !  Finalize
    1407           6 :          CALL dbcsr_finalize(matrix_g(2)%matrix)
    1408             : 
    1409             :          !  Clean-up
    1410           6 :          CALL dbcsr_release(matrix_sinv)
    1411             :       END IF !do_inv
    1412             : 
    1413          56 :       CALL timestop(handle)
    1414             : 
    1415          56 :    END SUBROUTINE build_metric
    1416             : 
    1417             : ! **************************************************************************************************
    1418             : !> \brief Builds the auxiliary matrix (A-D+E)^+0.5 needed for the transofrmation of the
    1419             : !>        full-TDDFT problem into an Hermitian one
    1420             : !> \param threshold a threshold for allowed negative eigenvalues
    1421             : !> \param sx the amount of exact exchange
    1422             : !> \param matrix_a the ground state contribution matrix A
    1423             : !> \param matrix_d the on-diagonal exchange kernel matrix (ab|IJ)
    1424             : !> \param matrix_e the off-diagonal exchange kernel matrix (aJ|Ib)
    1425             : !> \param do_hfx if exact exchange is included
    1426             : !> \param proj_Q ...
    1427             : !> \param work ...
    1428             : !> \param donor_state ...
    1429             : !> \param eps_filter for the dbcsr multiplication
    1430             : !> \param qs_env ...
    1431             : ! **************************************************************************************************
    1432           6 :    SUBROUTINE build_aux_matrix(threshold, sx, matrix_a, matrix_d, matrix_e, do_hfx, proj_Q, &
    1433             :                                work, donor_state, eps_filter, qs_env)
    1434             : 
    1435             :       REAL(dp), INTENT(IN)                               :: threshold, sx
    1436             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a, matrix_d, matrix_e
    1437             :       LOGICAL, INTENT(IN)                                :: do_hfx
    1438             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q, work
    1439             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1440             :       REAL(dp), INTENT(IN)                               :: eps_filter
    1441             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1442             : 
    1443             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_aux_matrix'
    1444             : 
    1445             :       INTEGER                                            :: handle, ndep
    1446             :       REAL(dp)                                           :: evals(2)
    1447             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1448             :       TYPE(dbcsr_type)                                   :: tmp_mat
    1449             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1450             : 
    1451           6 :       NULLIFY (blacs_env, para_env)
    1452             : 
    1453           6 :       CALL timeset(routineN, handle)
    1454             : 
    1455           6 :       CALL dbcsr_copy(tmp_mat, matrix_a)
    1456           6 :       IF (do_hfx) THEN
    1457           6 :          CALL dbcsr_add(tmp_mat, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
    1458           6 :          CALL dbcsr_add(tmp_mat, matrix_e, 1.0_dp, 1.0_dp*sx)
    1459             :       END IF
    1460             : 
    1461             :       ! Take the product with the Q projector:
    1462           6 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tmp_mat, 0.0_dp, work, filter_eps=eps_filter)
    1463           6 :       CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tmp_mat, filter_eps=eps_filter)
    1464             : 
    1465             :       ! Actually computing and storing the auxiliary matrix
    1466           6 :       ALLOCATE (donor_state%matrix_aux)
    1467           6 :       CALL dbcsr_create(matrix=donor_state%matrix_aux, template=matrix_a, name="MAT AUX")
    1468             : 
    1469           6 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1470             : 
    1471             :       ! good quality sqrt
    1472           6 :       CALL cp_dbcsr_power(tmp_mat, 0.5_dp, threshold, ndep, para_env, blacs_env, eigenvalues=evals)
    1473             : 
    1474           6 :       CALL dbcsr_copy(donor_state%matrix_aux, tmp_mat)
    1475             : 
    1476             :       ! Warn the user if matrix not positive semi-definite
    1477           6 :       IF (evals(1) < 0.0_dp .AND. ABS(evals(1)) > threshold) THEN
    1478           0 :          CPWARN("The full TDDFT problem might not have been soundly turned Hermitian. Try TDA.")
    1479             :       END IF
    1480             : 
    1481             :       ! clean-up
    1482           6 :       CALL dbcsr_release(tmp_mat)
    1483             : 
    1484           6 :       CALL timestop(handle)
    1485             : 
    1486           6 :    END SUBROUTINE build_aux_matrix
    1487             : 
    1488             : ! **************************************************************************************************
    1489             : !> \brief Includes the SOC effects on the precomputed spin-conserving and spin-flip excitations
    1490             : !>        from an open-shell calculation (UKS or ROKS). This is a perturbative treatment
    1491             : !> \param donor_state ...
    1492             : !> \param xas_tdp_env ...
    1493             : !> \param xas_tdp_control ...
    1494             : !> \param qs_env ...
    1495             : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1496             : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1497             : !>       energies and corresponding linear combinations of lr_coeffs.
    1498             : !>       The AMEWs are normalized
    1499             : !>       Only for open-shell calculations
    1500             : ! **************************************************************************************************
    1501           2 :    SUBROUTINE include_os_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1502             : 
    1503             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1504             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1505             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1506             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1507             : 
    1508             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_os_soc'
    1509             : 
    1510             :       INTEGER                                            :: group, handle, homo, iex, isc, isf, nao, &
    1511             :                                                             ndo_mo, ndo_so, nex, npcols, nprows, &
    1512             :                                                             nsc, nsf, ntot, tas(2), tbs(2)
    1513           2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1514           2 :                                                             row_dist, row_dist_new
    1515           2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1516             :       LOGICAL                                            :: do_roks, do_uks
    1517             :       REAL(dp)                                           :: eps_filter, gs_sum, soc
    1518           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1519           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1520           2 :                                                             gsex_block
    1521           2 :       REAL(dp), DIMENSION(:), POINTER                    :: sc_evals, sf_evals
    1522             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1523             :       TYPE(cp_cfm_type)                                  :: evecs_cfm, pert_cfm
    1524             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    1525             :                                                             vec_struct, work_struct
    1526             :       TYPE(cp_fm_type)                                   :: gsex_fm, img_fm, prod_work, real_fm, &
    1527             :                                                             vec_soc_x, vec_soc_y, vec_soc_z, &
    1528             :                                                             vec_work, work_fm
    1529             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sc_coeffs, sf_coeffs
    1530             :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1531           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1532             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1533             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sc, &
    1534             :                                                             dbcsr_sf, dbcsr_tmp, dbcsr_work, &
    1535             :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1536           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1537             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1538             : 
    1539           2 :       NULLIFY (gs_coeffs, sc_coeffs, sf_coeffs, matrix_s, orb_soc_x, orb_soc_y, orb_soc_z, mos)
    1540           2 :       NULLIFY (full_struct, para_env, blacs_env, mo_coeff, sc_evals, sf_evals, vec_struct, prod_struct)
    1541           2 :       NULLIFY (work_struct, gsex_struct, col_dist, row_dist)
    1542           2 :       NULLIFY (col_blk_size, row_blk_size, row_dist_new, pgrid, dbcsr_sc, dbcsr_sf, dbcsr_work)
    1543           2 :       NULLIFY (dbcsr_tmp, dbcsr_ovlp, dbcsr_prod)
    1544             : 
    1545           2 :       CALL timeset(routineN, handle)
    1546             : 
    1547             : ! Initialization
    1548           2 :       sc_coeffs => donor_state%sc_coeffs
    1549           2 :       sf_coeffs => donor_state%sf_coeffs
    1550           2 :       sc_evals => donor_state%sc_evals
    1551           2 :       sf_evals => donor_state%sf_evals
    1552           2 :       nsc = SIZE(sc_evals)
    1553           2 :       nsf = SIZE(sf_evals)
    1554           2 :       ntot = 1 + nsc + nsf
    1555           2 :       nex = nsc !by contrutciotn nsc == nsf, but keep 2 counts for clarity
    1556           2 :       ndo_mo = donor_state%ndo_mo
    1557           2 :       ndo_so = 2*ndo_mo
    1558           2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, mos=mos, matrix_s=matrix_s)
    1559           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1560           2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1561           2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    1562           2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    1563           2 :       do_roks = xas_tdp_control%do_roks
    1564           2 :       do_uks = xas_tdp_control%do_uks
    1565           2 :       eps_filter = xas_tdp_control%eps_filter
    1566             : 
    1567             :       ! For the GS coeffs, we use the same structure both for ROKS and UKS here => allows us to write
    1568             :       ! general code later on, and not use IF (do_roks) statements every second line
    1569           2 :       IF (do_uks) gs_coeffs => donor_state%gs_coeffs
    1570           2 :       IF (do_roks) THEN
    1571             :          CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1572           0 :                                   nrow_global=nao, ncol_global=ndo_so)
    1573           0 :          ALLOCATE (gs_coeffs)
    1574           0 :          CALL cp_fm_create(gs_coeffs, vec_struct)
    1575             : 
    1576             :          ! only alpha donor MOs are stored, need to copy them intoboth the alpha and the beta slot
    1577             :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1578             :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1579           0 :                                  t_firstcol=1)
    1580             :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1581             :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1582           0 :                                  t_firstcol=ndo_mo + 1)
    1583             : 
    1584           0 :          CALL cp_fm_struct_release(vec_struct)
    1585             :       END IF
    1586             : 
    1587             : ! Creating the real and the imaginary part of the SOC perturbation matrix
    1588             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    1589           2 :                                nrow_global=ntot, ncol_global=ntot)
    1590           2 :       CALL cp_fm_create(real_fm, full_struct)
    1591           2 :       CALL cp_fm_create(img_fm, full_struct)
    1592             : 
    1593             : ! Put the excitation energies on the diagonal of the real  matrix. Element 1,1 is the ground state
    1594          26 :       DO isc = 1, nsc
    1595          26 :          CALL cp_fm_set_element(real_fm, 1 + isc, 1 + isc, sc_evals(isc))
    1596             :       END DO
    1597          26 :       DO isf = 1, nsf
    1598          26 :          CALL cp_fm_set_element(real_fm, 1 + nsc + isf, 1 + nsc + isf, sf_evals(isf))
    1599             :       END DO
    1600             : 
    1601             : ! Create the bdcsr machinery
    1602           2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    1603             :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    1604           2 :                                   npcols=npcols, nprows=nprows)
    1605           8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    1606          26 :       DO iex = 1, nex
    1607          24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    1608          26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    1609             :       END DO
    1610           2 :       ALLOCATE (coeffs_dist, prod_dist)
    1611             :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    1612           2 :                                   col_dist=col_dist)
    1613             :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    1614           2 :                                   col_dist=col_dist)
    1615             : 
    1616             :       !Create the matrices
    1617           4 :       ALLOCATE (col_blk_size(nex))
    1618          26 :       col_blk_size = ndo_so
    1619           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1620             : 
    1621           2 :       ALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    1622             :       CALL dbcsr_create(matrix=dbcsr_sc, name="SPIN CONS", matrix_type=dbcsr_type_no_symmetry, &
    1623           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1624             :       CALL dbcsr_create(matrix=dbcsr_sf, name="SPIN FLIP", matrix_type=dbcsr_type_no_symmetry, &
    1625           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1626             :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    1627           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1628             :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    1629           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1630             :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    1631           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1632             : 
    1633          26 :       col_blk_size = 1
    1634             :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    1635           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1636           2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    1637             : 
    1638           2 :       dbcsr_soc_package%dbcsr_sc => dbcsr_sc
    1639           2 :       dbcsr_soc_package%dbcsr_sf => dbcsr_sf
    1640           2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    1641           2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    1642           2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    1643           2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    1644             : 
    1645             :       !Filling the coeffs matrices by copying from the stored fms
    1646           2 :       CALL copy_fm_to_dbcsr(sc_coeffs, dbcsr_sc)
    1647           2 :       CALL copy_fm_to_dbcsr(sf_coeffs, dbcsr_sf)
    1648             : 
    1649             : ! Precompute what we can before looping over excited states.
    1650             :       ! Need to compute the scalar: sum_i sum_sigma <phi^0_i,sigma|SOC|phi^0_i,sigma>, where all
    1651             :       ! occupied MOs are taken into account
    1652             : 
    1653             :       !start with the alpha MOs
    1654           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    1655           6 :       ALLOCATE (diag(homo))
    1656           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1657             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1658           2 :                                nrow_global=homo, ncol_global=homo)
    1659           2 :       CALL cp_fm_create(vec_work, vec_struct)
    1660           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1661             : 
    1662             :       ! <alpha|SOC_z|alpha> => spin integration yields +1
    1663           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1664           2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1665           2 :       CALL cp_fm_get_diag(prod_work, diag)
    1666          20 :       gs_sum = SUM(diag)
    1667             : 
    1668           2 :       CALL cp_fm_release(vec_work)
    1669           2 :       CALL cp_fm_release(prod_work)
    1670           2 :       CALL cp_fm_struct_release(prod_struct)
    1671           2 :       DEALLOCATE (diag)
    1672           2 :       NULLIFY (vec_struct)
    1673             : 
    1674             :       ! Now do the same with the beta gs coeffs
    1675           2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    1676           6 :       ALLOCATE (diag(homo))
    1677           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1678             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1679           2 :                                nrow_global=homo, ncol_global=homo)
    1680           2 :       CALL cp_fm_create(vec_work, vec_struct)
    1681           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1682             : 
    1683             :       ! <beta|SOC_z|beta> => spin integration yields -1
    1684           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1685           2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1686           2 :       CALL cp_fm_get_diag(prod_work, diag)
    1687          20 :       gs_sum = gs_sum - SUM(diag) ! -1 because of spin integration
    1688             : 
    1689           2 :       CALL cp_fm_release(vec_work)
    1690           2 :       CALL cp_fm_release(prod_work)
    1691           2 :       CALL cp_fm_struct_release(prod_struct)
    1692           2 :       DEALLOCATE (diag)
    1693             : 
    1694             :       ! Need to compute: <phi^0_Isigma|SOC|phi^0_Jtau> for the donor MOs
    1695             : 
    1696             :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1697           2 :                                nrow_global=nao, ncol_global=ndo_so)
    1698             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1699           2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    1700           2 :       CALL cp_fm_create(vec_soc_x, vec_struct) ! for SOC_x*gs_coeffs
    1701           2 :       CALL cp_fm_create(vec_soc_y, vec_struct) ! for SOC_y*gs_coeffs
    1702           2 :       CALL cp_fm_create(vec_soc_z, vec_struct) ! for SOC_z*gs_coeffs
    1703           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1704           6 :       ALLOCATE (diag(ndo_so))
    1705             : 
    1706          16 :       ALLOCATE (domo_soc_x(ndo_so, ndo_so), domo_soc_y(ndo_so, ndo_so), domo_soc_z(ndo_so, ndo_so))
    1707             : 
    1708           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_so)
    1709           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_work)
    1710           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_x)
    1711             : 
    1712           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_so)
    1713           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_work)
    1714           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_y)
    1715             : 
    1716           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_so)
    1717           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_work)
    1718           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_z)
    1719             : 
    1720             :       ! some more useful work matrices
    1721             :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    1722           2 :                                nrow_global=nex, ncol_global=nex)
    1723           2 :       CALL cp_fm_create(work_fm, work_struct)
    1724             : 
    1725             : !  Looping over the excited states, computing the SOC and filling the perturbation matrix
    1726             : !  There are 3 loops to do: sc-sc, sc-sf and sf-sf
    1727             : !  The final perturbation matrix is Hermitian, only the upper diagonal is needed
    1728             : 
    1729             :       !need some work matrices for the GS stuff
    1730             :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    1731           2 :                                nrow_global=nex*ndo_so, ncol_global=ndo_so)
    1732           2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    1733           6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    1734             : 
    1735             : !  Start with ground-state/spin-conserving SOC:
    1736             :       !  <Psi_0|SOC|Psi_Jsc> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,sigma>
    1737             : 
    1738             :       !compute -sc_coeffs*SOC_Z*gs_coeffs, minus sign because SOC_z antisymmetric
    1739           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_z, 0.0_dp, gsex_fm)
    1740             : 
    1741          26 :       DO isc = 1, nsc
    1742             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    1743          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1744          24 :          diag(:) = get_diag(gsex_block)
    1745         168 :          soc = SUM(diag(1:ndo_mo)) - SUM(diag(ndo_mo + 1:ndo_so)) !minus sign because of spin integration
    1746             : 
    1747             :          !purely imaginary contribution
    1748          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + isc, soc)
    1749             :       END DO !isc
    1750             : 
    1751             : !  Then ground-state/spin-flip SOC:
    1752             :       !<Psi_0|SOC|Psi_Jsf> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,tau>   sigma != tau
    1753             : 
    1754             :       !compute  -sc_coeffs*SOC_x*gs_coeffs, imaginary contribution
    1755           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_x, 0.0_dp, gsex_fm)
    1756             : 
    1757          26 :       DO isf = 1, nsf
    1758             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1759          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1760          24 :          diag(:) = get_diag(gsex_block)
    1761         168 :          soc = SUM(diag) !alpha and beta parts are simply added due to spin integration
    1762          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsc + isf, soc)
    1763             :       END DO !isf
    1764             : 
    1765             :       !compute -sc_coeffs*SOC_y*gs_coeffs, real contribution
    1766           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_y, 0.0_dp, gsex_fm)
    1767             : 
    1768          26 :       DO isf = 1, nsf
    1769             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1770          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1771          24 :          diag(:) = get_diag(gsex_block)
    1772          96 :          soc = SUM(diag(1:ndo_mo)) ! alpha-beta
    1773          96 :          soc = soc - SUM(diag(ndo_mo + 1:ndo_so)) !beta-alpha
    1774          26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsc + isf, soc)
    1775             :       END DO !isf
    1776             : 
    1777             :       !ground-state cleanup
    1778           2 :       CALL cp_fm_release(gsex_fm)
    1779           2 :       CALL cp_fm_release(vec_soc_x)
    1780           2 :       CALL cp_fm_release(vec_soc_y)
    1781           2 :       CALL cp_fm_release(vec_soc_z)
    1782           2 :       CALL cp_fm_release(prod_work)
    1783           2 :       CALL cp_fm_struct_release(gsex_struct)
    1784           2 :       CALL cp_fm_struct_release(prod_struct)
    1785           2 :       CALL cp_fm_struct_release(vec_struct)
    1786           2 :       DEALLOCATE (gsex_block)
    1787             : 
    1788             : !  Then spin-conserving/spin-conserving SOC
    1789             : !  <Psi_Isc|SOC|Psi_Jsc> =
    1790             : !  sum_k,sigma [<psi^Isc_k,sigma|SOC|psi^Jsc_k,sigma> + <psi^Isc_k,sigma|psi^Jsc_k,sigma> * gs_sum]
    1791             : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isc_l,sigma|psi^Jsc_k,sigma>
    1792             : 
    1793             :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1794           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1795           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1796             : 
    1797             :       !the overlap as well
    1798             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, dbcsr_work, &
    1799           2 :                           filter_eps=eps_filter)
    1800           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1801             : 
    1802             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=1.0_dp, &
    1803             :                                 pref_diagb=-1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1804           2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1805             : 
    1806           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1807             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1808           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=2)
    1809             : 
    1810             : !  Then spin-flip/spin-flip SOC
    1811             : !  <Psi_Isf|SOC|Psi_Jsf> =
    1812             : !  sum_k,sigma [<psi^Isf_k,tau|SOC|psi^Jsf_k,tau> + <psi^Isf_k,tau|psi^Jsf_k,tau> * gs_sum]
    1813             : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isf_l,tau|psi^Jsf_k,tau> , tau != sigma
    1814             : 
    1815             :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1816           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1817           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1818             : 
    1819             :       !the overlap as well
    1820             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1821           2 :                           dbcsr_work, filter_eps=eps_filter)
    1822           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1823             : 
    1824             :       !note: the different prefactors are derived from the fact that because of spin-flip, we have
    1825             :       !alpha-gs and beta-lr interaction
    1826             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=-1.0_dp, &
    1827             :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1828           2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1829             : 
    1830           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1831             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1832           2 :                               s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    1833             : 
    1834             : !  Finally the spin-conserving/spin-flip interaction
    1835             : ! <Psi_Isc|SOC|Psi_Jsf> =   sum_k,sigma <psi^Isc_k,sigma|SOC|psi^Isf_k,tau>
    1836             : !                           - sum_k,l,sigma <psi^0_k,tau|SOC|psi^0_l,sigma
    1837             : 
    1838           2 :       tas(1) = ndo_mo + 1; tbs(1) = 1
    1839           2 :       tas(2) = 1; tbs(2) = ndo_mo + 1
    1840             : 
    1841             :       !the overlap
    1842             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1843           2 :                           dbcsr_work, filter_eps=eps_filter)
    1844           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1845             : 
    1846             :       !start with the imaginary contribution
    1847           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1848           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1849             : 
    1850             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, pref_diaga=1.0_dp, &
    1851             :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    1852           2 :                                 tracea_start=tas, traceb_start=tbs)
    1853             : 
    1854           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1855             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1856           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1857             : 
    1858             :       !follow up with the real contribution
    1859           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1860           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1861             : 
    1862             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, pref_diaga=1.0_dp, &
    1863             :                                 pref_diagb=-1.0_dp, pref_tracea=1.0_dp, pref_traceb=-1.0_dp, &
    1864           2 :                                 tracea_start=tas, traceb_start=tbs)
    1865             : 
    1866           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1867             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1868           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1869             : 
    1870             : !  Setting up the complex Hermitian perturbed matrix
    1871           2 :       CALL cp_cfm_create(pert_cfm, full_struct)
    1872           2 :       CALL cp_fm_to_cfm(real_fm, img_fm, pert_cfm)
    1873             : 
    1874           2 :       CALL cp_fm_release(real_fm)
    1875           2 :       CALL cp_fm_release(img_fm)
    1876             : 
    1877             : !  Diagonalize the perturbed matrix
    1878           6 :       ALLOCATE (tmp_evals(ntot))
    1879           2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    1880           2 :       CALL cp_cfm_heevd(pert_cfm, evecs_cfm, tmp_evals)
    1881             : 
    1882             :       !shift the energies such that the GS has zero and store all that in soc_evals (\wo the GS)
    1883           6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    1884          50 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    1885             : 
    1886             : !  The SOC dipole oscillator strengths
    1887             :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1888           2 :                                    xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1889             : 
    1890             : !  And quadrupole
    1891           2 :       IF (xas_tdp_control%do_quad) THEN
    1892             :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1893           0 :                                           xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1894             :       END IF
    1895             : 
    1896             : ! Clean-up
    1897           2 :       CALL cp_cfm_release(pert_cfm)
    1898           2 :       CALL cp_cfm_release(evecs_cfm)
    1899           2 :       CALL cp_fm_struct_release(full_struct)
    1900           2 :       IF (do_roks) THEN
    1901           0 :          CALL cp_fm_release(gs_coeffs)
    1902           0 :          DEALLOCATE (gs_coeffs)
    1903             :       END IF
    1904           2 :       CALL dbcsr_distribution_release(coeffs_dist)
    1905           2 :       CALL dbcsr_distribution_release(prod_dist)
    1906           2 :       CALL dbcsr_release(dbcsr_sc)
    1907           2 :       CALL dbcsr_release(dbcsr_sf)
    1908           2 :       CALL dbcsr_release(dbcsr_prod)
    1909           2 :       CALL dbcsr_release(dbcsr_ovlp)
    1910           2 :       CALL dbcsr_release(dbcsr_tmp)
    1911           2 :       CALL dbcsr_release(dbcsr_work)
    1912           2 :       CALL cp_fm_release(work_fm)
    1913           2 :       CALL cp_fm_struct_release(work_struct)
    1914           2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    1915           2 :       DEALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    1916             : 
    1917           2 :       CALL timestop(handle)
    1918             : 
    1919          30 :    END SUBROUTINE include_os_soc
    1920             : 
    1921             : ! **************************************************************************************************
    1922             : !> \brief Includes the SOC effects on the precomputed restricted closed-shell singlet and triplet
    1923             : !>        excitations. This is a perturbative treatmnent
    1924             : !> \param donor_state ...
    1925             : !> \param xas_tdp_env ...
    1926             : !> \param xas_tdp_control ...
    1927             : !> \param qs_env ...
    1928             : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1929             : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1930             : !>       energies and corresponding linear combinations of lr_coeffs.
    1931             : !>       The AMEWs are normalized
    1932             : !>       Only for spin-restricted calculations
    1933             : !>       The ms=-1,+1 triplets are not explicitely computed in the first place. Assume they have
    1934             : !>       the same energy as the ms=0 triplets and apply the spin raising and lowering operators
    1935             : !>       on the latter to get their AMEWs => this is the qusi-degenerate perturbation theory
    1936             : !>       approach by Neese (QDPT)
    1937             : ! **************************************************************************************************
    1938           2 :    SUBROUTINE include_rcs_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1939             : 
    1940             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1941             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1942             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1943             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1944             : 
    1945             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_rcs_soc'
    1946             : 
    1947             :       INTEGER                                            :: group, handle, iex, isg, itp, nao, &
    1948             :                                                             ndo_mo, nex, npcols, nprows, nsg, &
    1949             :                                                             ntot, ntp
    1950           2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1951           2 :                                                             row_dist, row_dist_new
    1952           2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1953             :       REAL(dp)                                           :: eps_filter, soc_gst, sqrt2
    1954           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1955           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1956           2 :                                                             gstp_block
    1957           2 :       REAL(dp), DIMENSION(:), POINTER                    :: sg_evals, tp_evals
    1958             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1959             :       TYPE(cp_cfm_type)                                  :: evecs_cfm, hami_cfm
    1960             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gstp_struct, prod_struct, &
    1961             :                                                             vec_struct, work_struct
    1962             :       TYPE(cp_fm_type)                                   :: gstp_fm, img_fm, prod_fm, real_fm, &
    1963             :                                                             tmp_fm, vec_soc_x, vec_soc_y, &
    1964             :                                                             vec_soc_z, work_fm
    1965             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, sg_coeffs, tp_coeffs
    1966             :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1967           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1968             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1969             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sg, &
    1970             :                                                             dbcsr_tmp, dbcsr_tp, dbcsr_work, &
    1971             :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1972             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1973             : 
    1974           2 :       NULLIFY (sg_coeffs, tp_coeffs, gs_coeffs, sg_evals, tp_evals, full_struct)
    1975           2 :       NULLIFY (para_env, blacs_env, prod_struct, vec_struct, orb_soc_y, orb_soc_z)
    1976           2 :       NULLIFY (matrix_s, orb_soc_x)
    1977           2 :       NULLIFY (work_struct, dbcsr_dist, coeffs_dist, prod_dist, pgrid)
    1978           2 :       NULLIFY (col_dist, row_dist, col_blk_size, row_blk_size, row_dist_new, gstp_struct)
    1979           2 :       NULLIFY (dbcsr_tp, dbcsr_sg, dbcsr_prod, dbcsr_work, dbcsr_ovlp, dbcsr_tmp)
    1980             : 
    1981           2 :       CALL timeset(routineN, handle)
    1982             : 
    1983             : !  Initialization
    1984           2 :       CPASSERT(ASSOCIATED(xas_tdp_control))
    1985           2 :       gs_coeffs => donor_state%gs_coeffs
    1986           2 :       sg_coeffs => donor_state%sg_coeffs
    1987           2 :       tp_coeffs => donor_state%tp_coeffs
    1988           2 :       sg_evals => donor_state%sg_evals
    1989           2 :       tp_evals => donor_state%tp_evals
    1990           2 :       nsg = SIZE(sg_evals)
    1991           2 :       ntp = SIZE(tp_evals)
    1992           2 :       ntot = 1 + nsg + 3*ntp
    1993           2 :       ndo_mo = donor_state%ndo_mo
    1994           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s)
    1995           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1996           2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1997           2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    1998           2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    1999             :       !by construction nsg == ntp, keep those separate for more code clarity though
    2000           2 :       CPASSERT(nsg == ntp)
    2001           2 :       nex = nsg
    2002           2 :       eps_filter = xas_tdp_control%eps_filter
    2003             : 
    2004             : !  Creating the real part and imaginary part of the final SOC fm
    2005           2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2006             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2007           2 :                                nrow_global=ntot, ncol_global=ntot)
    2008           2 :       CALL cp_fm_create(real_fm, full_struct)
    2009           2 :       CALL cp_fm_create(img_fm, full_struct)
    2010             : 
    2011             : !  Put the excitation energies on the diagonal of the real matrix
    2012          26 :       DO isg = 1, nsg
    2013          26 :          CALL cp_fm_set_element(real_fm, 1 + isg, 1 + isg, sg_evals(isg))
    2014             :       END DO
    2015          26 :       DO itp = 1, ntp
    2016             :          ! first T^-1, then T^0, then T^+1
    2017          24 :          CALL cp_fm_set_element(real_fm, 1 + itp + nsg, 1 + itp + nsg, tp_evals(itp))
    2018          24 :          CALL cp_fm_set_element(real_fm, 1 + itp + ntp + nsg, 1 + itp + ntp + nsg, tp_evals(itp))
    2019          26 :          CALL cp_fm_set_element(real_fm, 1 + itp + 2*ntp + nsg, 1 + itp + 2*ntp + nsg, tp_evals(itp))
    2020             :       END DO
    2021             : 
    2022             : !  Create the dbcsr machinery (for fast MM, the core of this routine)
    2023           2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    2024             :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    2025           2 :                                   npcols=npcols, nprows=nprows)
    2026           8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    2027          26 :       DO iex = 1, nex
    2028          24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    2029          26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    2030             :       END DO
    2031           2 :       ALLOCATE (coeffs_dist, prod_dist)
    2032             :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    2033           2 :                                   col_dist=col_dist)
    2034             :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    2035           2 :                                   col_dist=col_dist)
    2036             : 
    2037             :       !Create the matrices
    2038           4 :       ALLOCATE (col_blk_size(nex))
    2039          26 :       col_blk_size = ndo_mo
    2040           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    2041             : 
    2042           2 :       ALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    2043             :       CALL dbcsr_create(matrix=dbcsr_sg, name="SINGLETS", matrix_type=dbcsr_type_no_symmetry, &
    2044           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2045             :       CALL dbcsr_create(matrix=dbcsr_tp, name="TRIPLETS", matrix_type=dbcsr_type_no_symmetry, &
    2046           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2047             :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    2048           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2049             :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    2050           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2051             :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    2052           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2053             : 
    2054          26 :       col_blk_size = 1
    2055             :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    2056           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2057           2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    2058             : 
    2059           2 :       dbcsr_soc_package%dbcsr_sg => dbcsr_sg
    2060           2 :       dbcsr_soc_package%dbcsr_tp => dbcsr_tp
    2061           2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    2062           2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    2063           2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    2064           2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    2065             : 
    2066             :       !Filling the coeffs matrices by copying from the stored fms
    2067           2 :       CALL copy_fm_to_dbcsr(sg_coeffs, dbcsr_sg)
    2068           2 :       CALL copy_fm_to_dbcsr(tp_coeffs, dbcsr_tp)
    2069             : 
    2070             : !  Create the work and helper fms
    2071           2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2072             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2073           2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2074           2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2075           2 :       CALL cp_fm_create(vec_soc_x, vec_struct)
    2076           2 :       CALL cp_fm_create(vec_soc_y, vec_struct)
    2077           2 :       CALL cp_fm_create(vec_soc_z, vec_struct)
    2078             :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    2079           2 :                                nrow_global=nex, ncol_global=nex)
    2080           2 :       CALL cp_fm_create(work_fm, work_struct)
    2081           2 :       CALL cp_fm_create(tmp_fm, work_struct)
    2082           6 :       ALLOCATE (diag(ndo_mo))
    2083             : 
    2084             : !  Precompute everything we can before looping over excited states
    2085           2 :       sqrt2 = SQRT(2.0_dp)
    2086             : 
    2087             :       ! The subset of the donor MOs matrix elements: <phi_I^0|Hsoc|phi_J^0> (kept as global array, small)
    2088          16 :       ALLOCATE (domo_soc_x(ndo_mo, ndo_mo), domo_soc_y(ndo_mo, ndo_mo), domo_soc_z(ndo_mo, ndo_mo))
    2089             : 
    2090           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_mo)
    2091           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_fm)
    2092           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_x)
    2093             : 
    2094           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_mo)
    2095           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_fm)
    2096           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_y)
    2097             : 
    2098           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_mo)
    2099           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_fm)
    2100           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_z)
    2101             : 
    2102             : !  Only have SOC between singlet-triplet triplet-triplet and ground_state-triplet, the resulting
    2103             : !  matrix is Hermitian i.e. the real part is symmetric and the imaginary part is anti-symmetric.
    2104             : !  Can only fill upper half
    2105             : 
    2106             :       !Start with the ground state/triplet SOC, SOC*gs_coeffs already computed above
    2107             :       !note: we are computing <0|H|T>, but have SOC*gs_coeffs instead of gs_coeffs*SOC in store. Since
    2108             :       !      the SOC Hamiltonian is anti-symmetric, a - signs pops up in the gemms below
    2109             : 
    2110             :       CALL cp_fm_struct_create(gstp_struct, context=blacs_env, para_env=para_env, &
    2111           2 :                                nrow_global=ntp*ndo_mo, ncol_global=ndo_mo)
    2112           2 :       CALL cp_fm_create(gstp_fm, gstp_struct)
    2113           6 :       ALLOCATE (gstp_block(ndo_mo, ndo_mo))
    2114             : 
    2115             :       !gs-triplet with Ms=+-1, imaginary part
    2116           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_x, 0.0_dp, gstp_fm)
    2117             : 
    2118          26 :       DO itp = 1, ntp
    2119             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2120          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2121          24 :          diag(:) = get_diag(gstp_block)
    2122          96 :          soc_gst = SUM(diag)
    2123          24 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_x|T^-1>
    2124          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + 2*ntp + itp, soc_gst) ! <0|H_x|T^+1>
    2125             :       END DO
    2126             : 
    2127             :       !gs-triplet with Ms=+-1, real part
    2128           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_y, 0.0_dp, gstp_fm)
    2129             : 
    2130          26 :       DO itp = 1, ntp
    2131             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2132          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2133          24 :          diag(:) = get_diag(gstp_block)
    2134          96 :          soc_gst = SUM(diag)
    2135          24 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_y|T^-1>
    2136          26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + 2*ntp + itp, -1.0_dp*soc_gst) ! <0|H_y|T^+1>
    2137             :       END DO
    2138             : 
    2139             :       !gs-triplet with Ms=0, purely imaginary
    2140           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_z, 0.0_dp, gstp_fm)
    2141             : 
    2142          26 :       DO itp = 1, ntp
    2143             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2144          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2145          24 :          diag(:) = get_diag(gstp_block)
    2146          96 :          soc_gst = sqrt2*SUM(diag)
    2147          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + ntp + itp, soc_gst)
    2148             :       END DO
    2149             : 
    2150             :       !gs clean-up
    2151           2 :       CALL cp_fm_release(prod_fm)
    2152           2 :       CALL cp_fm_release(vec_soc_x)
    2153           2 :       CALL cp_fm_release(vec_soc_y)
    2154           2 :       CALL cp_fm_release(vec_soc_z)
    2155           2 :       CALL cp_fm_release(gstp_fm)
    2156           2 :       CALL cp_fm_struct_release(gstp_struct)
    2157           2 :       CALL cp_fm_struct_release(prod_struct)
    2158           2 :       DEALLOCATE (gstp_block)
    2159             : 
    2160             :       !Now do the singlet-triplet SOC
    2161             :       !start by computing the singlet-triplet overlap
    2162             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2163           2 :                           dbcsr_work, filter_eps=eps_filter)
    2164           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2165             : 
    2166             :       !singlet-triplet with Ms=+-1, imaginary part
    2167           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2168           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2169             : 
    2170             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2171           2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2172             : 
    2173             :       !<S|H_x|T^-1>
    2174           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2175             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2176             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2177           2 :                               t_firstcol=1 + nsg + 1)
    2178             : 
    2179             :       !<S|H_x|T^+1> takes a minus sign
    2180           2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2181             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2182             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2183           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2184             : 
    2185             :       !singlet-triplet with Ms=+-1, real part
    2186           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2187           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2188             : 
    2189             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2190           2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2191             : 
    2192             :       !<S|H_y|T^-1>
    2193           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2194             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2195             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2196           2 :                               t_firstcol=1 + nsg + 1)
    2197             : 
    2198             :       !<S|H_y|T^+1>
    2199             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2200             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2201           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2202             : 
    2203             :       !singlet-triplet with Ms=0, purely imaginary
    2204           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2205           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2206             : 
    2207             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2208           2 :                                  pref_trace=-1.0_dp, pref_overall=1.0_dp)
    2209             : 
    2210             :       !<S|H_z|T^0>
    2211           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2212             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2213             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2214           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2215             : 
    2216             :       !Now the triplet-triplet SOC
    2217             :       !start by computing the overlap
    2218             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2219           2 :                           dbcsr_work, filter_eps=eps_filter)
    2220           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2221             : 
    2222             :       !Ms=0 to Ms=+-1 SOC, imaginary part
    2223           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2224           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2225             : 
    2226             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2227           2 :                                  pref_trace=1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2228             : 
    2229             :       !<T^0|H_x|T^+1>
    2230           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2231             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2232             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2233           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2234             : 
    2235             :       !<T^-1|H_x|T^0>, takes a minus sign and a transpose (because computed <T^0|H_x|T^-1>)
    2236           2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2237           2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2238             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2239             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2240           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2241             : 
    2242             :       !Ms=0 to Ms=+-1 SOC, real part
    2243           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2244           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2245             : 
    2246             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2247           2 :                                  pref_trace=1.0_dp, pref_overall=0.5_dp*sqrt2)
    2248             : 
    2249             :       !<T^0|H_y|T^+1>
    2250           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2251             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2252             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2253           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2254             : 
    2255             :       !<T^-1|H_y|T^0>, takes a minus sign and a transpose
    2256           2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2257           2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2258             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2259             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2260           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2261             : 
    2262             :       !Ms=1 to Ms=1 and Ms=-1 to Ms=-1 SOC, purely imaginary
    2263           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2264           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2265             : 
    2266             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2267           2 :                                  pref_trace=1.0_dp, pref_overall=1.0_dp)
    2268             : 
    2269             :       !<T^+1|H_z|T^+1>
    2270           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2271             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2272             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2273           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2274             : 
    2275             :       !<T^-1|H_z|T^-1>, takes a minus sign
    2276           2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2277             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2278             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2279           2 :                               t_firstcol=1 + nsg + 1)
    2280             : 
    2281             : !  Intermediate clean-up
    2282           2 :       CALL cp_fm_struct_release(work_struct)
    2283           2 :       CALL cp_fm_release(work_fm)
    2284           2 :       CALL cp_fm_release(tmp_fm)
    2285           2 :       DEALLOCATE (diag, domo_soc_x, domo_soc_y, domo_soc_z)
    2286             : 
    2287             : !  Set-up the complex hermitian perturbation matrix
    2288           2 :       CALL cp_cfm_create(hami_cfm, full_struct)
    2289           2 :       CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
    2290             : 
    2291           2 :       CALL cp_fm_release(real_fm)
    2292           2 :       CALL cp_fm_release(img_fm)
    2293             : 
    2294             : !  Diagonalize the Hamiltonian
    2295           6 :       ALLOCATE (tmp_evals(ntot))
    2296           2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    2297           2 :       CALL cp_cfm_heevd(hami_cfm, evecs_cfm, tmp_evals)
    2298             : 
    2299             :       !  Adjust the energies so the GS has zero, and store in the donor_state (without the GS)
    2300           6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    2301          98 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    2302             : 
    2303             : !  Compute the dipole oscillator strengths
    2304             :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2305           2 :                                    xas_tdp_control, qs_env)
    2306             : 
    2307             : !  And the quadrupole (if needed)
    2308           2 :       IF (xas_tdp_control%do_quad) THEN
    2309             :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2310           0 :                                           xas_tdp_control, qs_env)
    2311             :       END IF
    2312             : 
    2313             : !  Clean-up
    2314           2 :       CALL cp_fm_struct_release(full_struct)
    2315           2 :       CALL cp_cfm_release(hami_cfm)
    2316           2 :       CALL cp_cfm_release(evecs_cfm)
    2317           2 :       CALL dbcsr_distribution_release(coeffs_dist)
    2318           2 :       CALL dbcsr_distribution_release(prod_dist)
    2319           2 :       CALL dbcsr_release(dbcsr_sg)
    2320           2 :       CALL dbcsr_release(dbcsr_tp)
    2321           2 :       CALL dbcsr_release(dbcsr_prod)
    2322           2 :       CALL dbcsr_release(dbcsr_ovlp)
    2323           2 :       CALL dbcsr_release(dbcsr_tmp)
    2324           2 :       CALL dbcsr_release(dbcsr_work)
    2325           2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    2326           2 :       DEALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    2327             : 
    2328           2 :       CALL timestop(handle)
    2329             : 
    2330          22 :    END SUBROUTINE include_rcs_soc
    2331             : 
    2332             : ! **************************************************************************************************
    2333             : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2334             : !>        excited state AMEWs with ground state, for the open-shell case
    2335             : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2336             : !> \param ao_op the operator in the basis of the atomic orbitals
    2337             : !> \param gs_coeffs the coefficient of the GS donor MOs. Ecplicitely passed because of special
    2338             : !>                  format in the ROKS case (see include_os_soc routine)
    2339             : !> \param dbcsr_soc_package inhertited from the main SOC routine
    2340             : !> \param donor_state ...
    2341             : !> \param eps_filter ...
    2342             : !> \param qs_env ...
    2343             : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sc, sf
    2344             : !>       We assume that the operator is spin-independent => only <0|0>, <0|sc>, <sc|sc> and <sf|sf>
    2345             : !>       yield non-zero matrix elements
    2346             : !>       Only for open-shell calculations
    2347             : ! **************************************************************************************************
    2348           2 :    SUBROUTINE get_os_amew_op(amew_op, ao_op, gs_coeffs, dbcsr_soc_package, donor_state, &
    2349             :                              eps_filter, qs_env)
    2350             : 
    2351             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2352             :          INTENT(OUT)                                     :: amew_op
    2353             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2354             :       TYPE(cp_fm_type), INTENT(IN)                       :: gs_coeffs
    2355             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2356             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2357             :       REAL(dp), INTENT(IN)                               :: eps_filter
    2358             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2359             : 
    2360             :       INTEGER                                            :: dim_op, homo, i, isc, nao, ndo_mo, &
    2361             :                                                             ndo_so, nex, nsc, nsf, ntot
    2362             :       REAL(dp)                                           :: op
    2363           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gsgs_op
    2364           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, gsex_block, tmp
    2365             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2366             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    2367             :                                                             tmp_struct, vec_struct
    2368             :       TYPE(cp_fm_type)                                   :: gsex_fm, prod_work, tmp_fm, vec_work, &
    2369             :                                                             work_fm
    2370             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff, sc_coeffs, sf_coeffs
    2371           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2372             :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2373             :                                                             dbcsr_sc, dbcsr_sf, dbcsr_tmp, &
    2374             :                                                             dbcsr_work
    2375           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2376             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2377             : 
    2378           2 :       NULLIFY (matrix_s, para_env, blacs_env, full_struct, vec_struct, prod_struct, mos)
    2379           2 :       NULLIFY (mo_coeff, ao_op_i, tmp_struct)
    2380           2 :       NULLIFY (dbcsr_sc, dbcsr_sf, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2381             : 
    2382             : !  Iinitialization
    2383           2 :       dim_op = SIZE(ao_op)
    2384           2 :       sc_coeffs => donor_state%sc_coeffs
    2385           2 :       sf_coeffs => donor_state%sf_coeffs
    2386           2 :       nsc = SIZE(donor_state%sc_evals)
    2387           2 :       nsf = SIZE(donor_state%sf_evals)
    2388           2 :       nex = nsc
    2389           2 :       ntot = 1 + nsc + nsf
    2390           2 :       ndo_mo = donor_state%ndo_mo
    2391           2 :       ndo_so = 2*donor_state%ndo_mo !open-shell => nspins = 2
    2392           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2393           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    2394             : 
    2395           2 :       dbcsr_sc => dbcsr_soc_package%dbcsr_sc
    2396           2 :       dbcsr_sf => dbcsr_soc_package%dbcsr_sf
    2397           2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2398           2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2399           2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2400           2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2401             : 
    2402             : !  Create the amew_op matrix set
    2403             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2404           2 :                                nrow_global=ntot, ncol_global=ntot)
    2405          12 :       ALLOCATE (amew_op(dim_op))
    2406           8 :       DO i = 1, dim_op
    2407           8 :          CALL cp_fm_create(amew_op(i), full_struct)
    2408             :       END DO
    2409             : 
    2410             : !  Before looping, need to evaluate sum_j,sigma <phi^0_j,sgima|op|phi^0_j,sigma>, for each dimension
    2411             : !  of the operator
    2412           6 :       ALLOCATE (gsgs_op(dim_op))
    2413             : 
    2414             :       !start with the alpha MOs
    2415           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    2416           6 :       ALLOCATE (diag(homo))
    2417           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2418             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2419           2 :                                nrow_global=homo, ncol_global=homo)
    2420           2 :       CALL cp_fm_create(vec_work, vec_struct)
    2421           2 :       CALL cp_fm_create(prod_work, prod_struct)
    2422             : 
    2423           8 :       DO i = 1, dim_op
    2424             : 
    2425           6 :          ao_op_i => ao_op(i)%matrix
    2426             : 
    2427           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2428           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2429           6 :          CALL cp_fm_get_diag(prod_work, diag)
    2430          62 :          gsgs_op(i) = SUM(diag)
    2431             : 
    2432             :       END DO !i
    2433             : 
    2434           2 :       CALL cp_fm_release(vec_work)
    2435           2 :       CALL cp_fm_release(prod_work)
    2436           2 :       CALL cp_fm_struct_release(prod_struct)
    2437           2 :       DEALLOCATE (diag)
    2438           2 :       NULLIFY (vec_struct)
    2439             : 
    2440             :       !then beta orbitals
    2441           2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    2442           6 :       ALLOCATE (diag(homo))
    2443           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2444             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2445           2 :                                nrow_global=homo, ncol_global=homo)
    2446           2 :       CALL cp_fm_create(vec_work, vec_struct)
    2447           2 :       CALL cp_fm_create(prod_work, prod_struct)
    2448             : 
    2449           8 :       DO i = 1, dim_op
    2450             : 
    2451           6 :          ao_op_i => ao_op(i)%matrix
    2452             : 
    2453           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2454           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2455           6 :          CALL cp_fm_get_diag(prod_work, diag)
    2456          62 :          gsgs_op(i) = gsgs_op(i) + SUM(diag)
    2457             : 
    2458             :       END DO !i
    2459             : 
    2460           2 :       CALL cp_fm_release(vec_work)
    2461           2 :       CALL cp_fm_release(prod_work)
    2462           2 :       CALL cp_fm_struct_release(prod_struct)
    2463           2 :       DEALLOCATE (diag)
    2464           2 :       NULLIFY (vec_struct)
    2465             : 
    2466             : !  Before looping over excited AMEWs, define some work matrices and structures
    2467             :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    2468           2 :                                nrow_global=nao, ncol_global=ndo_so)
    2469             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2470           2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    2471             :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    2472           2 :                                nrow_global=ndo_so*nex, ncol_global=ndo_so)
    2473             :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2474           2 :                                nrow_global=nex, ncol_global=nex)
    2475           2 :       CALL cp_fm_create(vec_work, vec_struct) !for op*|phi>
    2476           2 :       CALL cp_fm_create(prod_work, prod_struct) !for any <phi|op|phi>
    2477           2 :       CALL cp_fm_create(work_fm, full_struct)
    2478           2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    2479           2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2480           6 :       ALLOCATE (diag(ndo_so))
    2481           8 :       ALLOCATE (domo_op(ndo_so, ndo_so))
    2482           6 :       ALLOCATE (tmp(ndo_so, ndo_so))
    2483           6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    2484             : 
    2485             : !  Loop over the dimensions of the operator
    2486           8 :       DO i = 1, dim_op
    2487             : 
    2488           6 :          ao_op_i => ao_op(i)%matrix
    2489             : 
    2490             :          !put the gs-gs contribution
    2491           6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2492             : 
    2493             :          !  Precompute what we can before looping over excited states
    2494             :          ! Need the operator in the donor MOs basis <phi^0_I,sigma|op_i|phi^0_J,tau>
    2495           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_work, ncol=ndo_so)
    2496           6 :          CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_work, 0.0_dp, prod_work)
    2497           6 :          CALL cp_fm_get_submatrix(prod_work, domo_op)
    2498             : 
    2499             :          !  Do the ground-state/spin-conserving operator
    2500           6 :          CALL parallel_gemm('T', 'N', ndo_so*nsc, ndo_so, nao, 1.0_dp, sc_coeffs, vec_work, 0.0_dp, gsex_fm)
    2501          78 :          DO isc = 1, nsc
    2502             :             CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    2503          72 :                                      start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    2504          72 :             diag(:) = get_diag(gsex_block)
    2505         504 :             op = SUM(diag)
    2506          78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isc, op)
    2507             :          END DO !isc
    2508             : 
    2509             :          !  The spin-conserving/spin-conserving operator
    2510             :          !overlap
    2511             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, &
    2512           6 :                              dbcsr_work, filter_eps=eps_filter)
    2513           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2514             : 
    2515             :          !operator in SC LR-orbital basis
    2516           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2517           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2518             : 
    2519             :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_diaga=1.0_dp, &
    2520             :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2521           6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2522             : 
    2523           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2524             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2525           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2526             : 
    2527             :          !  The spin-flip/spin-flip operator
    2528             :          !overlap
    2529             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    2530           6 :                              dbcsr_work, filter_eps=eps_filter)
    2531           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2532             : 
    2533             :          !operator in SF LR-orbital basis
    2534           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2535           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2536             : 
    2537             :          !need to reorganize the domo_op array by swapping the alpha-alpha and the beta-beta quarter
    2538          78 :          tmp(1:ndo_mo, 1:ndo_mo) = domo_op(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so)
    2539          78 :          tmp(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so) = domo_op(1:ndo_mo, 1:ndo_mo)
    2540             : 
    2541             :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, tmp, pref_diaga=1.0_dp, &
    2542             :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2543           6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2544             : 
    2545           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2546             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2547           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    2548             : 
    2549             :          !Symmetry => only upper diag explicitly built
    2550           8 :          CALL cp_fm_upper_to_full(amew_op(i), work_fm)
    2551             : 
    2552             :       END DO !i
    2553             : 
    2554             : !  Clean-up
    2555           2 :       CALL cp_fm_struct_release(full_struct)
    2556           2 :       CALL cp_fm_struct_release(prod_struct)
    2557           2 :       CALL cp_fm_struct_release(vec_struct)
    2558           2 :       CALL cp_fm_struct_release(tmp_struct)
    2559           2 :       CALL cp_fm_struct_release(gsex_struct)
    2560           2 :       CALL cp_fm_release(work_fm)
    2561           2 :       CALL cp_fm_release(tmp_fm)
    2562           2 :       CALL cp_fm_release(vec_work)
    2563           2 :       CALL cp_fm_release(prod_work)
    2564           2 :       CALL cp_fm_release(gsex_fm)
    2565             : 
    2566          14 :    END SUBROUTINE get_os_amew_op
    2567             : 
    2568             : ! **************************************************************************************************
    2569             : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2570             : !>        excited state AMEWs with ground state, singlet and triplet with Ms = -1,0,+1
    2571             : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2572             : !> \param ao_op the operator in the basis of the atomic orbitals
    2573             : !> \param dbcsr_soc_package inherited from the main SOC routine
    2574             : !> \param donor_state ...
    2575             : !> \param eps_filter for dbcsr multiplication
    2576             : !> \param qs_env ...
    2577             : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sg, tp(-1), tp(0). tp(+1)
    2578             : !>       We assume that the operator is spin-independent => only <0|0>, <0|S>, <S|S> and <T|T>
    2579             : !>       yield non-zero matrix elements
    2580             : !>       Only for spin-restricted calculations
    2581             : ! **************************************************************************************************
    2582           2 :    SUBROUTINE get_rcs_amew_op(amew_op, ao_op, dbcsr_soc_package, donor_state, eps_filter, qs_env)
    2583             : 
    2584             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2585             :          INTENT(OUT)                                     :: amew_op
    2586             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2587             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2588             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2589             :       REAL(dp), INTENT(IN)                               :: eps_filter
    2590             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2591             : 
    2592             :       INTEGER                                            :: dim_op, homo, i, isg, nao, ndo_mo, nex, &
    2593             :                                                             nsg, ntot, ntp
    2594             :       REAL(dp)                                           :: op, sqrt2
    2595           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gs_diag, gsgs_op
    2596           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, sggs_block
    2597             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2598             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsgs_struct, prod_struct, &
    2599             :                                                             sggs_struct, std_struct, tmp_struct, &
    2600             :                                                             vec_struct
    2601             :       TYPE(cp_fm_type)                                   :: gs_fm, prod_fm, sggs_fm, tmp_fm, vec_op, &
    2602             :                                                             work_fm
    2603             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sg_coeffs
    2604           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2605             :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2606             :                                                             dbcsr_sg, dbcsr_tmp, dbcsr_tp, &
    2607             :                                                             dbcsr_work
    2608           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2609             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2610             : 
    2611           2 :       NULLIFY (gs_coeffs, sg_coeffs, matrix_s, full_struct, prod_struct, vec_struct, blacs_env)
    2612           2 :       NULLIFY (para_env, mo_coeff, mos, gsgs_struct, std_struct, tmp_struct, sggs_struct)
    2613           2 :       NULLIFY (ao_op_i, dbcsr_tp, dbcsr_sg, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2614             : 
    2615             : !  Initialization
    2616           2 :       gs_coeffs => donor_state%gs_coeffs
    2617           2 :       sg_coeffs => donor_state%sg_coeffs
    2618           2 :       nsg = SIZE(donor_state%sg_evals)
    2619           2 :       ntp = nsg; nex = nsg !all the same by construction, keep them separate for clarity
    2620           2 :       ntot = 1 + nsg + 3*ntp
    2621           2 :       ndo_mo = donor_state%ndo_mo
    2622           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2623           2 :       sqrt2 = SQRT(2.0_dp)
    2624           2 :       dim_op = SIZE(ao_op)
    2625             : 
    2626           2 :       dbcsr_sg => dbcsr_soc_package%dbcsr_sg
    2627           2 :       dbcsr_tp => dbcsr_soc_package%dbcsr_tp
    2628           2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2629           2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2630           2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2631           2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2632             : 
    2633             : !  Create the amew_op matrix
    2634             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2635           2 :                                nrow_global=ntot, ncol_global=ntot)
    2636          12 :       ALLOCATE (amew_op(dim_op))
    2637           8 :       DO i = 1, dim_op
    2638           8 :          CALL cp_fm_create(amew_op(i), full_struct)
    2639             :       END DO !i
    2640             : 
    2641             : !  Deal with the GS-GS contribution <0|0> = 2*sum_j <phi_j|op|phi_j>
    2642           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, nao=nao, homo=homo)
    2643             :       CALL cp_fm_struct_create(gsgs_struct, context=blacs_env, para_env=para_env, &
    2644           2 :                                nrow_global=homo, ncol_global=homo)
    2645           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=std_struct)
    2646           2 :       CALL cp_fm_create(gs_fm, gsgs_struct)
    2647           2 :       CALL cp_fm_create(work_fm, std_struct)
    2648           6 :       ALLOCATE (gsgs_op(dim_op))
    2649           6 :       ALLOCATE (gs_diag(homo))
    2650             : 
    2651           8 :       DO i = 1, dim_op
    2652             : 
    2653           6 :          ao_op_i => ao_op(i)%matrix
    2654             : 
    2655           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, work_fm, ncol=homo)
    2656           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, work_fm, 0.0_dp, gs_fm)
    2657           6 :          CALL cp_fm_get_diag(gs_fm, gs_diag)
    2658          62 :          gsgs_op(i) = 2.0_dp*SUM(gs_diag)
    2659             : 
    2660             :       END DO !i
    2661             : 
    2662           2 :       CALL cp_fm_release(gs_fm)
    2663           2 :       CALL cp_fm_release(work_fm)
    2664           2 :       CALL cp_fm_struct_release(gsgs_struct)
    2665           2 :       DEALLOCATE (gs_diag)
    2666             : 
    2667             : !  Create the work and helper fms
    2668           2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2669             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2670           2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2671           2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2672           2 :       CALL cp_fm_create(vec_op, vec_struct)
    2673             :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2674           2 :                                nrow_global=nex, ncol_global=nex)
    2675             :       CALL cp_fm_struct_create(sggs_struct, context=blacs_env, para_env=para_env, &
    2676           2 :                                nrow_global=ndo_mo*nsg, ncol_global=ndo_mo)
    2677           2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2678           2 :       CALL cp_fm_create(work_fm, full_struct)
    2679           2 :       CALL cp_fm_create(sggs_fm, sggs_struct)
    2680           6 :       ALLOCATE (diag(ndo_mo))
    2681           8 :       ALLOCATE (domo_op(ndo_mo, ndo_mo))
    2682           6 :       ALLOCATE (sggs_block(ndo_mo, ndo_mo))
    2683             : 
    2684             : ! Iterate over the dimensions of the operator
    2685             : ! Note: operator matrices are asusmed symmetric, can only do upper half
    2686           8 :       DO i = 1, dim_op
    2687             : 
    2688           6 :          ao_op_i => ao_op(i)%matrix
    2689             : 
    2690             :          ! The GS-GS contribution
    2691           6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2692             : 
    2693             :          ! Compute the operator in the donor MOs basis
    2694           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_op, ncol=ndo_mo)
    2695           6 :          CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_op, 0.0_dp, prod_fm)
    2696           6 :          CALL cp_fm_get_submatrix(prod_fm, domo_op)
    2697             : 
    2698             :          ! Compute the ground-state/singlet components. ao_op*gs_coeffs already stored in vec_op
    2699           6 :          CALL parallel_gemm('T', 'N', ndo_mo*nsg, ndo_mo, nao, 1.0_dp, sg_coeffs, vec_op, 0.0_dp, sggs_fm)
    2700          78 :          DO isg = 1, nsg
    2701             :             CALL cp_fm_get_submatrix(fm=sggs_fm, target_m=sggs_block, start_row=(isg - 1)*ndo_mo + 1, &
    2702          72 :                                      start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2703          72 :             diag(:) = get_diag(sggs_block)
    2704         288 :             op = sqrt2*SUM(diag)
    2705          78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isg, op)
    2706             :          END DO
    2707             : 
    2708             :          ! do the singlet-singlet components
    2709             :          !start with the overlap
    2710             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sg, 0.0_dp, &
    2711           6 :                              dbcsr_work, filter_eps=eps_filter)
    2712           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2713             : 
    2714             :          !then the operator in the LR orbital basis
    2715           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2716           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2717             : 
    2718             :          !use the soc routine, it is compatible
    2719             :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2720           6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2721             : 
    2722           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2723             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2724           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2725             : 
    2726             :          ! compute the triplet-triplet components
    2727             :          !the overlap
    2728             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2729           6 :                              dbcsr_work, filter_eps=eps_filter)
    2730           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2731             : 
    2732             :          !the operator in the LR orbital basis
    2733           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2734           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2735             : 
    2736             :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2737           6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2738             : 
    2739           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2740             :          !<T^-1|op|T^-1>
    2741             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2742           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, t_firstcol=1 + nsg + 1)
    2743             :          !<T^0|op|T^0>
    2744             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2745             :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2746           6 :                                  t_firstcol=1 + nsg + ntp + 1)
    2747             :          !<T^-1|op|T^-1>
    2748             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2749             :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2750           6 :                                  t_firstcol=1 + nsg + 2*ntp + 1)
    2751             : 
    2752             :          ! Symmetrize the matrix (only upper triangle built)
    2753           8 :          CALL cp_fm_upper_to_full(amew_op(i), work_fm)
    2754             : 
    2755             :       END DO !i
    2756             : 
    2757             : !  Clean-up
    2758           2 :       CALL cp_fm_release(prod_fm)
    2759           2 :       CALL cp_fm_release(work_fm)
    2760           2 :       CALL cp_fm_release(tmp_fm)
    2761           2 :       CALL cp_fm_release(vec_op)
    2762           2 :       CALL cp_fm_release(sggs_fm)
    2763           2 :       CALL cp_fm_struct_release(prod_struct)
    2764           2 :       CALL cp_fm_struct_release(full_struct)
    2765           2 :       CALL cp_fm_struct_release(tmp_struct)
    2766           2 :       CALL cp_fm_struct_release(sggs_struct)
    2767             : 
    2768          12 :    END SUBROUTINE get_rcs_amew_op
    2769             : 
    2770             : ! **************************************************************************************************
    2771             : !> \brief Computes the os SOC matrix elements between excited states AMEWs based on the LR orbitals
    2772             : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2773             : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2774             : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2775             : !> \param domo_soc the SOC in the basis of the donor MOs
    2776             : !> \param pref_diaga ...
    2777             : !> \param pref_diagb ...
    2778             : !> \param pref_tracea ...
    2779             : !> \param pref_traceb ...
    2780             : !> \param pref_diags see notes
    2781             : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2782             : !> \param tracea_start the indices where to start in the trace part for alpha
    2783             : !> \param traceb_start the indices where to start in the trace part for beta
    2784             : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2785             : !>       soc_ij =   pref_diaga*SUM(alpha part of diag of lr_soc_ij)
    2786             : !>                + pref_diagb*SUM(beta part of diag of lr_soc_ij)
    2787             : !>                + pref_tracea*SUM(alpha part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2788             : !>                + pref_traceb*SUM(beta part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2789             : !>       optinally, one can add pref_diags*SUM(diag lr_ovlp_ij)
    2790             : ! **************************************************************************************************
    2791          20 :    SUBROUTINE os_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_diaga, &
    2792             :                                    pref_diagb, pref_tracea, pref_traceb, pref_diags, &
    2793             :                                    symmetric, tracea_start, traceb_start)
    2794             : 
    2795             :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2796             :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2797             :       REAL(dp)                                           :: pref_diaga, pref_diagb, pref_tracea, &
    2798             :                                                             pref_traceb
    2799             :       REAL(dp), OPTIONAL                                 :: pref_diags
    2800             :       LOGICAL, OPTIONAL                                  :: symmetric
    2801             :       INTEGER, DIMENSION(2), OPTIONAL                    :: tracea_start, traceb_start
    2802             : 
    2803             :       INTEGER                                            :: blk, iex, jex, ndo_mo, ndo_so
    2804             :       INTEGER, DIMENSION(2)                              :: tas, tbs
    2805             :       LOGICAL                                            :: do_diags, found, my_symm
    2806             :       REAL(dp)                                           :: soc_elem
    2807          20 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2808          20 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2809             :       TYPE(dbcsr_iterator_type)                          :: iter
    2810             : 
    2811          20 :       ndo_so = SIZE(domo_soc, 1)
    2812          20 :       ndo_mo = ndo_so/2
    2813          60 :       ALLOCATE (diag(ndo_so))
    2814          20 :       my_symm = .FALSE.
    2815          20 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2816          20 :       do_diags = .FALSE.
    2817          20 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2818             : 
    2819             :       !by default, alpha part is (1:ndo_mo,1:ndo_mo) and beta is (ndo_mo+1:ndo_so,ndo_mo+1:ndo_so)
    2820             :       !note: in some SF cases, that might change, mainly because the spin-flip LR-coeffs have
    2821             :       !inverse order, that is: the beta-coeffs in the alpha spot and the alpha coeffs in the
    2822             :       !beta spot
    2823          60 :       tas = 1
    2824          60 :       tbs = ndo_mo + 1
    2825          20 :       IF (PRESENT(tracea_start)) tas = tracea_start
    2826          20 :       IF (PRESENT(traceb_start)) tbs = traceb_start
    2827             : 
    2828          20 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2829             :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2830          20 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2831        1460 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2832             : 
    2833        1440 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex, blk=blk)
    2834             : 
    2835        1440 :          IF (my_symm .AND. iex > jex) CYCLE
    2836             : 
    2837             :          !compute the soc matrix element
    2838         912 :          soc_elem = 0.0_dp
    2839         912 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2840         912 :          IF (found) THEN
    2841         444 :             diag(:) = get_diag(pblock)
    2842        3108 :             soc_elem = soc_elem + pref_diaga*SUM(diag(1:ndo_mo)) + pref_diagb*(SUM(diag(ndo_mo + 1:ndo_so)))
    2843             :          END IF
    2844             : 
    2845         912 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2846         912 :          IF (found) THEN
    2847             :             soc_elem = soc_elem &
    2848             :                        + pref_tracea*SUM(pblock(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)* &
    2849             :                                          domo_soc(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)) &
    2850             :                        + pref_traceb*SUM(pblock(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1)* &
    2851       12000 :                                          domo_soc(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1))
    2852             : 
    2853         480 :             IF (do_diags) THEN
    2854         336 :                diag(:) = get_diag(pblock)
    2855        2352 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2856             :             END IF
    2857             :          END IF
    2858             : 
    2859         912 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2860        3284 :          pblock = soc_elem
    2861             : 
    2862             :       END DO
    2863          20 :       CALL dbcsr_iterator_stop(iter)
    2864             : 
    2865          60 :    END SUBROUTINE os_amew_soc_elements
    2866             : 
    2867             : ! **************************************************************************************************
    2868             : !> \brief Computes the rcs SOC matrix elements between excited states AMEWs based on the LR orbitals
    2869             : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2870             : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2871             : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2872             : !> \param domo_soc the SOC in the basis of the donor MOs
    2873             : !> \param pref_trace see notes
    2874             : !> \param pref_overall see notes
    2875             : !> \param pref_diags see notes
    2876             : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2877             : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2878             : !>       soc_ij = pref_overall*(SUM(diag(lr_soc_ij)) + pref_trace*SUM(lr_overlap_ij*TRANSPOSE(domo_soc)))
    2879             : !>       optionally, the value pref_diags*SUM(diag(lr_overlap_ij)) can be added (before pref_overall)
    2880             : ! **************************************************************************************************
    2881         120 :    SUBROUTINE rcs_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_trace, &
    2882             :                                     pref_overall, pref_diags, symmetric)
    2883             : 
    2884             :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2885             :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2886             :       REAL(dp)                                           :: pref_trace, pref_overall
    2887             :       REAL(dp), OPTIONAL                                 :: pref_diags
    2888             :       LOGICAL, OPTIONAL                                  :: symmetric
    2889             : 
    2890             :       INTEGER                                            :: blk, iex, jex
    2891             :       LOGICAL                                            :: do_diags, found, my_symm
    2892             :       REAL(dp)                                           :: soc_elem
    2893         120 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2894         120 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2895             :       TYPE(dbcsr_iterator_type)                          :: iter
    2896             : 
    2897         360 :       ALLOCATE (diag(SIZE(domo_soc, 1)))
    2898         120 :       my_symm = .FALSE.
    2899         120 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2900         120 :       do_diags = .FALSE.
    2901         120 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2902             : 
    2903         120 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2904             :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2905         120 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2906        2220 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2907             : 
    2908        2100 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex, blk=blk)
    2909             : 
    2910        2100 :          IF (my_symm .AND. iex > jex) CYCLE
    2911             : 
    2912             :          !compute the soc matrix element
    2913        1644 :          soc_elem = 0.0_dp
    2914        1644 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2915        1644 :          IF (found) THEN
    2916        1008 :             diag(:) = get_diag(pblock)
    2917        5328 :             soc_elem = soc_elem + SUM(diag)
    2918             :          END IF
    2919             : 
    2920        1644 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2921        1644 :          IF (found) THEN
    2922       31050 :             soc_elem = soc_elem + pref_trace*SUM(pblock*TRANSPOSE(domo_soc))
    2923             : 
    2924        1158 :             IF (do_diags) THEN
    2925         432 :                diag(:) = get_diag(pblock)
    2926        2250 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2927             :             END IF
    2928             :          END IF
    2929             : 
    2930        1644 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2931        5508 :          pblock = pref_overall*soc_elem
    2932             : 
    2933             :       END DO
    2934         120 :       CALL dbcsr_iterator_stop(iter)
    2935             : 
    2936         360 :    END SUBROUTINE rcs_amew_soc_elements
    2937             : 
    2938             : ! **************************************************************************************************
    2939             : !> \brief Computes the dipole oscillator strengths in the AMEWs basis for SOC
    2940             : !> \param soc_evecs_cfm the complex AMEWs coefficients
    2941             : !> \param dbcsr_soc_package ...
    2942             : !> \param donor_state ...
    2943             : !> \param xas_tdp_env ...
    2944             : !> \param xas_tdp_control ...
    2945             : !> \param qs_env ...
    2946             : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    2947             : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    2948             : ! **************************************************************************************************
    2949           4 :    SUBROUTINE compute_soc_dipole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2950             :                                       xas_tdp_control, qs_env, gs_coeffs)
    2951             : 
    2952             :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    2953             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2954             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2955             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    2956             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    2957             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2958             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    2959             : 
    2960             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_dipole_fosc'
    2961             : 
    2962           4 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transdip
    2963             :       INTEGER                                            :: handle, i, nosc, ntot
    2964             :       LOGICAL                                            :: do_os, do_rcs
    2965           4 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: osc_xyz
    2966           4 :       REAL(dp), DIMENSION(:), POINTER                    :: soc_evals
    2967           4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: osc_str
    2968             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2969             :       TYPE(cp_cfm_type)                                  :: dip_cfm, work1_cfm, work2_cfm
    2970             :       TYPE(cp_fm_struct_type), POINTER                   :: dip_struct, full_struct
    2971           4 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_dip
    2972             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2973             : 
    2974           4 :       NULLIFY (para_env, blacs_env, dip_struct, full_struct, osc_str)
    2975           4 :       NULLIFY (soc_evals)
    2976             : 
    2977           4 :       CALL timeset(routineN, handle)
    2978             : 
    2979             :       !init
    2980           4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2981           4 :       do_os = xas_tdp_control%do_spin_cons
    2982           4 :       do_rcs = xas_tdp_control%do_singlet
    2983           4 :       soc_evals => donor_state%soc_evals
    2984           4 :       nosc = SIZE(soc_evals)
    2985           4 :       ntot = nosc + 1 !because GS AMEW is in there
    2986          12 :       ALLOCATE (donor_state%soc_osc_str(nosc, 4))
    2987           4 :       osc_str => donor_state%soc_osc_str
    2988         596 :       osc_str(:, :) = 0.0_dp
    2989           4 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    2990             : 
    2991             :       !get some work arrays/matrix
    2992             :       CALL cp_fm_struct_create(dip_struct, context=blacs_env, para_env=para_env, &
    2993           4 :                                nrow_global=ntot, ncol_global=1)
    2994           4 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    2995           4 :       CALL cp_cfm_create(dip_cfm, dip_struct)
    2996           4 :       CALL cp_cfm_create(work1_cfm, full_struct)
    2997           4 :       CALL cp_cfm_create(work2_cfm, full_struct)
    2998          12 :       ALLOCATE (transdip(ntot, 1))
    2999             : 
    3000             :       !get the dipole in the AMEW basis
    3001           4 :       IF (do_os) THEN
    3002             :          CALL get_os_amew_op(amew_dip, xas_tdp_env%dipmat, gs_coeffs, dbcsr_soc_package, &
    3003           2 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3004             :       ELSE
    3005             :          CALL get_rcs_amew_op(amew_dip, xas_tdp_env%dipmat, dbcsr_soc_package, donor_state, &
    3006           2 :                               xas_tdp_control%eps_filter, qs_env)
    3007             :       END IF
    3008             : 
    3009          12 :       ALLOCATE (osc_xyz(nosc))
    3010          16 :       DO i = 1, 3 !cartesian coord x, y, z
    3011             : 
    3012             :          !Convert the real dipole into the cfm format for calculations
    3013          12 :          CALL cp_fm_to_cfm(msourcer=amew_dip(i), mtarget=work1_cfm)
    3014             : 
    3015             :          !compute amew_coeffs^dagger * amew_dip * amew_gs to get the transition moments
    3016             :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3017          12 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3018             :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3019          12 :                             (0.0_dp, 0.0_dp), dip_cfm)
    3020             : 
    3021          12 :          CALL cp_cfm_get_submatrix(dip_cfm, transdip)
    3022             : 
    3023             :          !transition dipoles are real numbers
    3024         444 :          osc_xyz(:) = REAL(transdip(2:ntot, 1))**2 + AIMAG(transdip(2:ntot, 1))**2
    3025         444 :          osc_str(:, 4) = osc_str(:, 4) + osc_xyz(:)
    3026         448 :          osc_str(:, i) = osc_xyz(:)
    3027             : 
    3028             :       END DO !i
    3029             : 
    3030             :       !multiply with appropriate prefac depending in the rep
    3031          20 :       DO i = 1, 4
    3032          20 :          IF (xas_tdp_control%dipole_form == xas_dip_len) THEN
    3033           0 :             osc_str(:, i) = 2.0_dp/3.0_dp*soc_evals(:)*osc_str(:, i)
    3034             :          ELSE
    3035        1184 :             osc_str(:, i) = 2.0_dp/3.0_dp/soc_evals(:)*osc_str(:, i)
    3036             :          END IF
    3037             :       END DO
    3038             : 
    3039             :       !clean-up
    3040           4 :       CALL cp_fm_struct_release(dip_struct)
    3041           4 :       CALL cp_cfm_release(work1_cfm)
    3042           4 :       CALL cp_cfm_release(work2_cfm)
    3043           4 :       CALL cp_cfm_release(dip_cfm)
    3044          16 :       DO i = 1, 3
    3045          16 :          CALL cp_fm_release(amew_dip(i))
    3046             :       END DO
    3047           4 :       DEALLOCATE (amew_dip, transdip)
    3048             : 
    3049           4 :       CALL timestop(handle)
    3050             : 
    3051          12 :    END SUBROUTINE compute_soc_dipole_fosc
    3052             : 
    3053             : ! **************************************************************************************************
    3054             : !> \brief Computes the quadrupole oscillator strengths in the AMEWs basis for SOC
    3055             : !> \param soc_evecs_cfm the complex AMEWs coefficients
    3056             : !> \param dbcsr_soc_package inherited from the main SOC routine
    3057             : !> \param donor_state ...
    3058             : !> \param xas_tdp_env ...
    3059             : !> \param xas_tdp_control ...
    3060             : !> \param qs_env ...
    3061             : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    3062             : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    3063             : ! **************************************************************************************************
    3064           0 :    SUBROUTINE compute_soc_quadrupole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, &
    3065             :                                           xas_tdp_env, xas_tdp_control, qs_env, gs_coeffs)
    3066             : 
    3067             :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    3068             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    3069             :       TYPE(donor_state_type), POINTER                    :: donor_state
    3070             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    3071             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    3072             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3073             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    3074             : 
    3075             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_quadrupole_fosc'
    3076             : 
    3077           0 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:)             :: trace
    3078             :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transquad
    3079             :       INTEGER                                            :: handle, i, nosc, ntot
    3080             :       LOGICAL                                            :: do_os, do_rcs
    3081           0 :       REAL(dp), DIMENSION(:), POINTER                    :: osc_str, soc_evals
    3082             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3083             :       TYPE(cp_cfm_type)                                  :: quad_cfm, work1_cfm, work2_cfm
    3084             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, quad_struct
    3085           0 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_quad
    3086             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3087             : 
    3088           0 :       NULLIFY (para_env, blacs_env, quad_struct, full_struct, osc_str)
    3089           0 :       NULLIFY (soc_evals)
    3090             : 
    3091           0 :       CALL timeset(routineN, handle)
    3092             : 
    3093             :       !init
    3094           0 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    3095           0 :       do_os = xas_tdp_control%do_spin_cons
    3096           0 :       do_rcs = xas_tdp_control%do_singlet
    3097           0 :       soc_evals => donor_state%soc_evals
    3098           0 :       nosc = SIZE(soc_evals)
    3099           0 :       ntot = nosc + 1 !because GS AMEW is in there
    3100           0 :       ALLOCATE (donor_state%soc_quad_osc_str(nosc))
    3101           0 :       osc_str => donor_state%soc_quad_osc_str
    3102           0 :       osc_str(:) = 0.0_dp
    3103           0 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    3104             : 
    3105             :       !get some work arrays/matrix
    3106             :       CALL cp_fm_struct_create(quad_struct, context=blacs_env, para_env=para_env, &
    3107           0 :                                nrow_global=ntot, ncol_global=1)
    3108           0 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    3109           0 :       CALL cp_cfm_create(quad_cfm, quad_struct)
    3110           0 :       CALL cp_cfm_create(work1_cfm, full_struct)
    3111           0 :       CALL cp_cfm_create(work2_cfm, full_struct)
    3112           0 :       ALLOCATE (transquad(ntot, 1))
    3113           0 :       ALLOCATE (trace(nosc))
    3114           0 :       trace = (0.0_dp, 0.0_dp)
    3115             : 
    3116             :       !get the quadrupole in the AMEWs basis
    3117           0 :       IF (do_os) THEN
    3118             :          CALL get_os_amew_op(amew_quad, xas_tdp_env%quadmat, gs_coeffs, dbcsr_soc_package, &
    3119           0 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3120             :       ELSE
    3121             :          CALL get_rcs_amew_op(amew_quad, xas_tdp_env%quadmat, dbcsr_soc_package, donor_state, &
    3122           0 :                               xas_tdp_control%eps_filter, qs_env)
    3123             :       END IF
    3124             : 
    3125           0 :       DO i = 1, 6 ! x2, xy, xz, y2, yz, z2
    3126             : 
    3127             :          !Convert the real quadrupole into a cfm for further calculation
    3128           0 :          CALL cp_fm_to_cfm(msourcer=amew_quad(i), mtarget=work1_cfm)
    3129             : 
    3130             :          !compute amew_coeffs^dagger * amew_quad * amew_gs to get the transition moments
    3131             :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3132           0 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3133             :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3134           0 :                             (0.0_dp, 0.0_dp), quad_cfm)
    3135             : 
    3136           0 :          CALL cp_cfm_get_submatrix(quad_cfm, transquad)
    3137             : 
    3138             :          !if x2, y2 or z2, need to keep track of trace
    3139           0 :          IF (i == 1 .OR. i == 4 .OR. i == 6) THEN
    3140           0 :             osc_str(:) = osc_str(:) + REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2
    3141           0 :             trace(:) = trace(:) + transquad(2:ntot, 1)
    3142             : 
    3143             :             !if xy, xz, or yz, need to count twice (for yx, zx and zy)
    3144             :          ELSE
    3145           0 :             osc_str(:) = osc_str(:) + 2.0_dp*(REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2)
    3146             :          END IF
    3147             : 
    3148             :       END DO !i
    3149             : 
    3150             :       !remove a third of the trace
    3151           0 :       osc_str(:) = osc_str(:) - 1._dp/3._dp*(REAL(trace(:))**2 + AIMAG(trace(:))**2)
    3152             : 
    3153             :       !multiply by the prefactor
    3154           0 :       osc_str(:) = osc_str(:)*1._dp/20._dp*a_fine**2*soc_evals(:)**3
    3155             : 
    3156             :       !clean-up
    3157           0 :       CALL cp_fm_struct_release(quad_struct)
    3158           0 :       CALL cp_cfm_release(work1_cfm)
    3159           0 :       CALL cp_cfm_release(work2_cfm)
    3160           0 :       CALL cp_cfm_release(quad_cfm)
    3161           0 :       CALL cp_fm_release(amew_quad)
    3162           0 :       DEALLOCATE (transquad, trace)
    3163             : 
    3164           0 :       CALL timestop(handle)
    3165             : 
    3166           0 :    END SUBROUTINE compute_soc_quadrupole_fosc
    3167             : 
    3168           0 : END MODULE xas_tdp_utils
    3169             : 

Generated by: LCOV version 1.15