LCOV - code coverage report
Current view: top level - src - almo_scf_optimizer.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:15a58fb) Lines: 1709 3155 54.2 %
Date: 2025-02-18 08:24:35 Functions: 21 34 61.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Optimization routines for all ALMO-based SCF methods
      10             : !> \par History
      11             : !>       2011.05 created [Rustam Z Khaliullin]
      12             : !>       2014.10 as a separate file [Rustam Z Khaliullin]
      13             : !> \author Rustam Z Khaliullin
      14             : ! **************************************************************************************************
      15             : MODULE almo_scf_optimizer
      16             :    USE almo_scf_diis_types,             ONLY: almo_scf_diis_extrapolate,&
      17             :                                               almo_scf_diis_init,&
      18             :                                               almo_scf_diis_push,&
      19             :                                               almo_scf_diis_release,&
      20             :                                               almo_scf_diis_type
      21             :    USE almo_scf_lbfgs_types,            ONLY: lbfgs_create,&
      22             :                                               lbfgs_get_direction,&
      23             :                                               lbfgs_history_type,&
      24             :                                               lbfgs_release,&
      25             :                                               lbfgs_seed
      26             :    USE almo_scf_methods,                ONLY: &
      27             :         almo_scf_ks_blk_to_tv_blk, almo_scf_ks_to_ks_blk, almo_scf_ks_to_ks_xx, &
      28             :         almo_scf_ks_xx_to_tv_xx, almo_scf_p_blk_to_t_blk, almo_scf_t_rescaling, &
      29             :         almo_scf_t_to_proj, apply_domain_operators, apply_projector, &
      30             :         construct_domain_preconditioner, construct_domain_r_down, construct_domain_s_inv, &
      31             :         construct_domain_s_sqrt, fill_matrix_with_ones, get_overlap, orthogonalize_mos, &
      32             :         pseudo_invert_diagonal_blk, xalmo_initial_guess
      33             :    USE almo_scf_qs,                     ONLY: almo_dm_to_almo_ks,&
      34             :                                               almo_dm_to_qs_env,&
      35             :                                               almo_scf_update_ks_energy,&
      36             :                                               matrix_qs_to_almo
      37             :    USE almo_scf_types,                  ONLY: almo_scf_env_type,&
      38             :                                               optimizer_options_type
      39             :    USE cell_types,                      ONLY: cell_type
      40             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      41             :    USE cp_dbcsr_api,                    ONLY: &
      42             :         dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      43             :         dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_dot, dbcsr_filter, dbcsr_finalize, &
      44             :         dbcsr_get_block_p, dbcsr_get_diag, dbcsr_get_info, dbcsr_iterator_blocks_left, &
      45             :         dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
      46             :         dbcsr_multiply, dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_p_type, &
      47             :         dbcsr_print_block_sum, dbcsr_release, dbcsr_reserve_block2d, dbcsr_scale, dbcsr_set, &
      48             :         dbcsr_set_diag, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_work_create
      49             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      50             :                                               cp_dbcsr_cholesky_invert,&
      51             :                                               cp_dbcsr_cholesky_restore
      52             :    USE cp_dbcsr_contrib,                ONLY: dbcsr_frobenius_norm,&
      53             :                                               dbcsr_hadamard_product,&
      54             :                                               dbcsr_maxabs
      55             :    USE cp_external_control,             ONLY: external_control
      56             :    USE cp_files,                        ONLY: close_file,&
      57             :                                               open_file
      58             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      59             :                                               cp_logger_get_default_unit_nr,&
      60             :                                               cp_logger_type,&
      61             :                                               cp_to_string
      62             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      63             :                                               cp_print_key_unit_nr
      64             :    USE ct_methods,                      ONLY: analytic_line_search,&
      65             :                                               ct_step_execute,&
      66             :                                               diagonalize_diagonal_blocks
      67             :    USE ct_types,                        ONLY: ct_step_env_clean,&
      68             :                                               ct_step_env_get,&
      69             :                                               ct_step_env_init,&
      70             :                                               ct_step_env_set,&
      71             :                                               ct_step_env_type
      72             :    USE domain_submatrix_methods,        ONLY: add_submatrices,&
      73             :                                               construct_submatrices,&
      74             :                                               copy_submatrices,&
      75             :                                               init_submatrices,&
      76             :                                               maxnorm_submatrices,&
      77             :                                               release_submatrices
      78             :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      79             :                                               domain_submatrix_type,&
      80             :                                               select_row
      81             :    USE input_constants,                 ONLY: &
      82             :         almo_scf_diag, almo_scf_dm_sign, cg_dai_yuan, cg_fletcher, cg_fletcher_reeves, &
      83             :         cg_hager_zhang, cg_hestenes_stiefel, cg_liu_storey, cg_polak_ribiere, cg_zero, &
      84             :         op_loc_berry, op_loc_pipek, trustr_cauchy, trustr_dogleg, virt_full, &
      85             :         xalmo_case_block_diag, xalmo_case_fully_deloc, xalmo_case_normal, xalmo_prec_domain, &
      86             :         xalmo_prec_full, xalmo_prec_zero
      87             :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      88             :                                               section_vals_type
      89             :    USE iterate_matrix,                  ONLY: determinant,&
      90             :                                               invert_Hotelling,&
      91             :                                               matrix_sqrt_Newton_Schulz
      92             :    USE kinds,                           ONLY: dp
      93             :    USE machine,                         ONLY: m_flush,&
      94             :                                               m_walltime
      95             :    USE message_passing,                 ONLY: mp_comm_type,&
      96             :                                               mp_para_env_type
      97             :    USE particle_methods,                ONLY: get_particle_set
      98             :    USE particle_types,                  ONLY: particle_type
      99             :    USE qs_energy_types,                 ONLY: qs_energy_type
     100             :    USE qs_environment_types,            ONLY: get_qs_env,&
     101             :                                               qs_environment_type
     102             :    USE qs_kind_types,                   ONLY: qs_kind_type
     103             :    USE qs_loc_utils,                    ONLY: compute_berry_operator
     104             :    USE qs_localization_methods,         ONLY: initialize_weights
     105             : #include "./base/base_uses.f90"
     106             : 
     107             :    IMPLICIT NONE
     108             : 
     109             :    PRIVATE
     110             : 
     111             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'almo_scf_optimizer'
     112             : 
     113             :    PUBLIC :: almo_scf_block_diagonal, &
     114             :              almo_scf_xalmo_eigensolver, &
     115             :              almo_scf_xalmo_trustr, &
     116             :              almo_scf_xalmo_pcg, &
     117             :              almo_scf_construct_nlmos
     118             : 
     119             :    LOGICAL, PARAMETER :: debug_mode = .FALSE.
     120             :    LOGICAL, PARAMETER :: safe_mode = .FALSE.
     121             :    LOGICAL, PARAMETER :: almo_mathematica = .FALSE.
     122             :    INTEGER, PARAMETER :: hessian_path_reuse = 1, &
     123             :                          hessian_path_assemble = 2
     124             : 
     125             : CONTAINS
     126             : 
     127             : ! **************************************************************************************************
     128             : !> \brief An SCF procedure that optimizes block-diagonal ALMOs using DIIS
     129             : !> \param qs_env ...
     130             : !> \param almo_scf_env ...
     131             : !> \param optimizer ...
     132             : !> \par History
     133             : !>       2011.06 created [Rustam Z Khaliullin]
     134             : !>       2018.09 smearing support [Ruben Staub]
     135             : !> \author Rustam Z Khaliullin
     136             : ! **************************************************************************************************
     137          76 :    SUBROUTINE almo_scf_block_diagonal(qs_env, almo_scf_env, optimizer)
     138             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     139             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     140             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     141             : 
     142             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_block_diagonal'
     143             : 
     144             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     145          76 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_nocc_of_domain
     146             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop, &
     147             :                                                             use_diis, use_prev_as_guess
     148             :       REAL(KIND=dp) :: density_rec, energy_diff, energy_new, energy_old, error_norm, &
     149             :          error_norm_ispin, kTS_sum, prev_error_norm, t1, t2, true_mixing_fraction
     150          76 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: local_mu
     151             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     152          76 :          DIMENSION(:)                                    :: almo_diis
     153             :       TYPE(cp_logger_type), POINTER                      :: logger
     154          76 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_mixing_old_blk
     155             :       TYPE(qs_energy_type), POINTER                      :: qs_energy
     156             : 
     157          76 :       CALL timeset(routineN, handle)
     158             : 
     159             :       ! get a useful output_unit
     160          76 :       logger => cp_get_default_logger()
     161          76 :       IF (logger%para_env%is_source()) THEN
     162          38 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     163             :       ELSE
     164             :          unit_nr = -1
     165             :       END IF
     166             : 
     167             :       ! use DIIS, it's superior to simple mixing
     168          76 :       use_diis = .TRUE.
     169          76 :       use_prev_as_guess = .FALSE.
     170             : 
     171          76 :       nspin = almo_scf_env%nspins
     172         228 :       ALLOCATE (local_mu(almo_scf_env%ndomains))
     173         228 :       ALLOCATE (local_nocc_of_domain(almo_scf_env%ndomains))
     174             : 
     175             :       ! init mixing matrices
     176         304 :       ALLOCATE (matrix_mixing_old_blk(nspin))
     177         304 :       ALLOCATE (almo_diis(nspin))
     178         152 :       DO ispin = 1, nspin
     179             :          CALL dbcsr_create(matrix_mixing_old_blk(ispin), &
     180          76 :                            template=almo_scf_env%matrix_ks_blk(ispin))
     181             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     182             :                                  sample_err=almo_scf_env%matrix_ks_blk(ispin), &
     183             :                                  sample_var=almo_scf_env%matrix_s_blk(1), &
     184             :                                  error_type=1, &
     185         152 :                                  max_length=optimizer%ndiis)
     186             :       END DO
     187             : 
     188          76 :       CALL get_qs_env(qs_env, energy=qs_energy)
     189          76 :       energy_old = qs_energy%total
     190             : 
     191          76 :       iscf = 0
     192          76 :       prepare_to_exit = .FALSE.
     193          76 :       true_mixing_fraction = 0.0_dp
     194          76 :       error_norm = 1.0E+10_dp ! arbitrary big step
     195             : 
     196          76 :       IF (unit_nr > 0) THEN
     197          38 :          WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     198          76 :             " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     199          38 :          WRITE (unit_nr, *)
     200          38 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     201          76 :             "Total Energy", "Change", "Convergence", "Time"
     202          38 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     203             :       END IF
     204             : 
     205             :       ! the real SCF loop
     206          76 :       t1 = m_walltime()
     207         424 :       DO
     208             : 
     209         424 :          iscf = iscf + 1
     210             : 
     211             :          ! obtain projected KS matrix and the DIIS-error vector
     212         424 :          CALL almo_scf_ks_to_ks_blk(almo_scf_env)
     213             : 
     214             :          ! inform the DIIS handler about the new KS matrix and its error vector
     215             :          IF (use_diis) THEN
     216         848 :             DO ispin = 1, nspin
     217             :                CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     218             :                                        var=almo_scf_env%matrix_ks_blk(ispin), &
     219         848 :                                        err=almo_scf_env%matrix_err_blk(ispin))
     220             :             END DO
     221             :          END IF
     222             : 
     223             :          ! get error_norm: choose the largest of the two spins
     224         848 :          prev_error_norm = error_norm
     225         848 :          DO ispin = 1, nspin
     226             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     227         424 :             error_norm_ispin = dbcsr_maxabs(almo_scf_env%matrix_err_blk(ispin))
     228         424 :             IF (ispin .EQ. 1) error_norm = error_norm_ispin
     229           0 :             IF (ispin .GT. 1 .AND. error_norm_ispin .GT. error_norm) &
     230         424 :                error_norm = error_norm_ispin
     231             :          END DO
     232             : 
     233         424 :          IF (error_norm .LT. almo_scf_env%eps_prev_guess) THEN
     234           0 :             use_prev_as_guess = .TRUE.
     235             :          ELSE
     236         424 :             use_prev_as_guess = .FALSE.
     237             :          END IF
     238             : 
     239             :          ! check convergence
     240         424 :          converged = .TRUE.
     241         424 :          IF (error_norm .GT. optimizer%eps_error) converged = .FALSE.
     242             : 
     243             :          ! check other exit criteria: max SCF steps and timing
     244             :          CALL external_control(should_stop, "SCF", &
     245             :                                start_time=qs_env%start_time, &
     246         424 :                                target_time=qs_env%target_time)
     247         424 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     248          76 :             prepare_to_exit = .TRUE.
     249          76 :             IF (iscf == 1) energy_new = energy_old
     250             :          END IF
     251             : 
     252             :          ! if early stopping is on do at least one iteration
     253         424 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     254             :             prepare_to_exit = .FALSE.
     255             : 
     256         424 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     257             : 
     258             :             ! perform mixing of KS matrices
     259         348 :             IF (iscf .NE. 1) THEN
     260             :                IF (use_diis) THEN ! use diis instead of mixing
     261         544 :                   DO ispin = 1, nspin
     262             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     263         544 :                                                     extr_var=almo_scf_env%matrix_ks_blk(ispin))
     264             :                   END DO
     265             :                ELSE ! use mixing
     266             :                   true_mixing_fraction = almo_scf_env%mixing_fraction
     267             :                   DO ispin = 1, nspin
     268             :                      CALL dbcsr_add(almo_scf_env%matrix_ks_blk(ispin), &
     269             :                                     matrix_mixing_old_blk(ispin), &
     270             :                                     true_mixing_fraction, &
     271             :                                     1.0_dp - true_mixing_fraction)
     272             :                   END DO
     273             :                END IF
     274             :             END IF
     275             :             ! save the new matrix for the future mixing
     276         696 :             DO ispin = 1, nspin
     277             :                CALL dbcsr_copy(matrix_mixing_old_blk(ispin), &
     278         696 :                                almo_scf_env%matrix_ks_blk(ispin))
     279             :             END DO
     280             : 
     281             :             ! obtain ALMOs from the new KS matrix
     282         696 :             SELECT CASE (almo_scf_env%almo_update_algorithm)
     283             :             CASE (almo_scf_diag)
     284             : 
     285         348 :                CALL almo_scf_ks_blk_to_tv_blk(almo_scf_env)
     286             : 
     287             :             CASE (almo_scf_dm_sign)
     288             : 
     289             :                ! update the density matrix
     290           0 :                DO ispin = 1, nspin
     291             : 
     292           0 :                   local_nocc_of_domain(:) = almo_scf_env%nocc_of_domain(:, ispin)
     293           0 :                   local_mu(:) = almo_scf_env%mu_of_domain(:, ispin)
     294             :                   ! RZK UPDATE! the update algorithm is removed because
     295             :                   ! RZK UPDATE! it requires updating core LS_SCF routines
     296             :                   ! RZK UPDATE! (the code exists in the CVS version)
     297           0 :                   CPABORT("Density_matrix_sign has not been tested yet")
     298             :                   ! RZK UPDATE!  CALL density_matrix_sign(almo_scf_env%matrix_p_blk(ispin),&
     299             :                   ! RZK UPDATE!          local_mu,&
     300             :                   ! RZK UPDATE!          almo_scf_env%fixed_mu,&
     301             :                   ! RZK UPDATE!          almo_scf_env%matrix_ks_blk(ispin),&
     302             :                   ! RZK UPDATE!          !matrix_mixing_old_blk(ispin),&
     303             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk(1), &
     304             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk_inv(1), &
     305             :                   ! RZK UPDATE!          local_nocc_of_domain,&
     306             :                   ! RZK UPDATE!          almo_scf_env%eps_filter,&
     307             :                   ! RZK UPDATE!          almo_scf_env%domain_index_of_ao)
     308             :                   ! RZK UPDATE!
     309           0 :                   almo_scf_env%mu_of_domain(:, ispin) = local_mu(:)
     310             : 
     311             :                END DO
     312             : 
     313             :                ! obtain ALMOs from matrix_p_blk: T_new = P_blk S_blk T_old
     314           0 :                CALL almo_scf_p_blk_to_t_blk(almo_scf_env, ionic=.FALSE.)
     315             : 
     316         348 :                DO ispin = 1, almo_scf_env%nspins
     317             : 
     318             :                   CALL orthogonalize_mos(ket=almo_scf_env%matrix_t_blk(ispin), &
     319             :                                          overlap=almo_scf_env%matrix_sigma_blk(ispin), &
     320             :                                          metric=almo_scf_env%matrix_s_blk(1), &
     321             :                                          retain_locality=.TRUE., &
     322             :                                          only_normalize=.FALSE., &
     323             :                                          nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     324             :                                          eps_filter=almo_scf_env%eps_filter, &
     325             :                                          order_lanczos=almo_scf_env%order_lanczos, &
     326             :                                          eps_lanczos=almo_scf_env%eps_lanczos, &
     327           0 :                                          max_iter_lanczos=almo_scf_env%max_iter_lanczos)
     328             : 
     329             :                END DO
     330             : 
     331             :             END SELECT
     332             : 
     333             :             ! obtain density matrix from ALMOs
     334         696 :             DO ispin = 1, almo_scf_env%nspins
     335             : 
     336             :                !! Application of an occupation-rescaling trick for smearing, if requested
     337         348 :                IF (almo_scf_env%smear) THEN
     338             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     339             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     340             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     341             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     342             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     343             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     344             :                                             ndomains=almo_scf_env%ndomains, &
     345          16 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     346             :                END IF
     347             : 
     348             :                CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t_blk(ispin), &
     349             :                                        p=almo_scf_env%matrix_p(ispin), &
     350             :                                        eps_filter=almo_scf_env%eps_filter, &
     351             :                                        orthog_orbs=.FALSE., &
     352             :                                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     353             :                                        s=almo_scf_env%matrix_s(1), &
     354             :                                        sigma=almo_scf_env%matrix_sigma(ispin), &
     355             :                                        sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     356             :                                        use_guess=use_prev_as_guess, &
     357             :                                        smear=almo_scf_env%smear, &
     358             :                                        algorithm=almo_scf_env%sigma_inv_algorithm, &
     359             :                                        inverse_accelerator=almo_scf_env%order_lanczos, &
     360             :                                        inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     361             :                                        eps_lanczos=almo_scf_env%eps_lanczos, &
     362             :                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     363             :                                        para_env=almo_scf_env%para_env, &
     364         696 :                                        blacs_env=almo_scf_env%blacs_env)
     365             : 
     366             :             END DO
     367             : 
     368         348 :             IF (almo_scf_env%nspins == 1) THEN
     369         348 :                CALL dbcsr_scale(almo_scf_env%matrix_p(1), 2.0_dp)
     370             :                !! Rescaling electronic entropy contribution by spin_factor
     371         348 :                IF (almo_scf_env%smear) THEN
     372          16 :                   almo_scf_env%kTS(1) = almo_scf_env%kTS(1)*2.0_dp
     373             :                END IF
     374             :             END IF
     375             : 
     376         348 :             IF (almo_scf_env%smear) THEN
     377          32 :                kTS_sum = SUM(almo_scf_env%kTS)
     378             :             ELSE
     379         332 :                kTS_sum = 0.0_dp
     380             :             END IF
     381             : 
     382             :             ! compute the new KS matrix and new energy
     383             :             CALL almo_dm_to_almo_ks(qs_env, &
     384             :                                     almo_scf_env%matrix_p, &
     385             :                                     almo_scf_env%matrix_ks, &
     386             :                                     energy_new, &
     387             :                                     almo_scf_env%eps_filter, &
     388             :                                     almo_scf_env%mat_distr_aos, &
     389             :                                     smear=almo_scf_env%smear, &
     390         348 :                                     kTS_sum=kTS_sum)
     391             : 
     392             :          END IF ! prepare_to_exit
     393             : 
     394         424 :          energy_diff = energy_new - energy_old
     395         424 :          energy_old = energy_new
     396         424 :          almo_scf_env%almo_scf_energy = energy_new
     397             : 
     398         424 :          t2 = m_walltime()
     399             :          ! brief report on the current SCF loop
     400         424 :          IF (unit_nr > 0) THEN
     401         212 :             WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') "ALMO SCF DIIS", &
     402         212 :                iscf, &
     403         424 :                energy_new, energy_diff, error_norm, t2 - t1
     404             :          END IF
     405         424 :          t1 = m_walltime()
     406             : 
     407         424 :          IF (prepare_to_exit) EXIT
     408             : 
     409             :       END DO ! end scf cycle
     410             : 
     411             :       !! Print number of electrons recovered if smearing was requested
     412          76 :       IF (almo_scf_env%smear) THEN
     413           8 :          DO ispin = 1, nspin
     414           4 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     415           8 :             IF (unit_nr > 0) THEN
     416           2 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     417             :             END IF
     418             :          END DO
     419             :       END IF
     420             : 
     421          76 :       IF (.NOT. converged .AND. (.NOT. optimizer%early_stopping_on)) THEN
     422           0 :          IF (unit_nr > 0) THEN
     423           0 :             CPABORT("SCF for block-diagonal ALMOs not converged!")
     424             :          END IF
     425             :       END IF
     426             : 
     427         152 :       DO ispin = 1, nspin
     428          76 :          CALL dbcsr_release(matrix_mixing_old_blk(ispin))
     429         152 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     430             :       END DO
     431         152 :       DEALLOCATE (almo_diis)
     432          76 :       DEALLOCATE (matrix_mixing_old_blk)
     433          76 :       DEALLOCATE (local_mu)
     434          76 :       DEALLOCATE (local_nocc_of_domain)
     435             : 
     436          76 :       CALL timestop(handle)
     437             : 
     438          76 :    END SUBROUTINE almo_scf_block_diagonal
     439             : 
     440             : ! **************************************************************************************************
     441             : !> \brief An eigensolver-based SCF to optimize extended ALMOs (i.e. ALMOs on
     442             : !>        overlapping domains)
     443             : !> \param qs_env ...
     444             : !> \param almo_scf_env ...
     445             : !> \param optimizer ...
     446             : !> \par History
     447             : !>       2013.03 created [Rustam Z Khaliullin]
     448             : !>       2018.09 smearing support [Ruben Staub]
     449             : !> \author Rustam Z Khaliullin
     450             : ! **************************************************************************************************
     451           2 :    SUBROUTINE almo_scf_xalmo_eigensolver(qs_env, almo_scf_env, optimizer)
     452             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     453             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     454             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     455             : 
     456             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_eigensolver'
     457             : 
     458             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     459             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop
     460             :       REAL(KIND=dp) :: denergy_tot, density_rec, energy_diff, energy_new, energy_old, error_norm, &
     461             :          error_norm_0, kTS_sum, spin_factor, t1, t2
     462             :       REAL(KIND=dp), DIMENSION(2)                        :: denergy_spin
     463             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     464           2 :          DIMENSION(:)                                    :: almo_diis
     465             :       TYPE(cp_logger_type), POINTER                      :: logger
     466             :       TYPE(dbcsr_type)                                   :: matrix_p_almo_scf_converged
     467             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     468           2 :          DIMENSION(:, :)                                 :: submatrix_mixing_old_blk
     469             : 
     470           2 :       CALL timeset(routineN, handle)
     471             : 
     472             :       ! get a useful output_unit
     473           2 :       logger => cp_get_default_logger()
     474           2 :       IF (logger%para_env%is_source()) THEN
     475           1 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     476             :       ELSE
     477           1 :          unit_nr = -1
     478             :       END IF
     479             : 
     480           2 :       nspin = almo_scf_env%nspins
     481           2 :       IF (nspin == 1) THEN
     482           2 :          spin_factor = 2.0_dp
     483             :       ELSE
     484           0 :          spin_factor = 1.0_dp
     485             :       END IF
     486             : 
     487             :       ! RZK-warning domain_s_sqrt and domain_s_sqrt_inv do not have spin
     488             :       ! components yet (may be used later)
     489           2 :       ispin = 1
     490             :       CALL construct_domain_s_sqrt( &
     491             :          matrix_s=almo_scf_env%matrix_s(1), &
     492             :          subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
     493             :          subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
     494             :          dpattern=almo_scf_env%quench_t(ispin), &
     495             :          map=almo_scf_env%domain_map(ispin), &
     496           2 :          node_of_domain=almo_scf_env%cpu_of_domain)
     497             :       ! TRY: construct s_inv
     498             :       !CALL construct_domain_s_inv(&
     499             :       !       matrix_s=almo_scf_env%matrix_s(1),&
     500             :       !       subm_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
     501             :       !       dpattern=almo_scf_env%quench_t(ispin),&
     502             :       !       map=almo_scf_env%domain_map(ispin),&
     503             :       !       node_of_domain=almo_scf_env%cpu_of_domain)
     504             : 
     505             :       ! construct the domain template for the occupied orbitals
     506           4 :       DO ispin = 1, nspin
     507             :          ! RZK-warning we need only the matrix structure, not data
     508             :          ! replace construct_submatrices with lighter procedure with
     509             :          ! no heavy communications
     510             :          CALL construct_submatrices( &
     511             :             matrix=almo_scf_env%quench_t(ispin), &
     512             :             submatrix=almo_scf_env%domain_t(:, ispin), &
     513             :             distr_pattern=almo_scf_env%quench_t(ispin), &
     514             :             domain_map=almo_scf_env%domain_map(ispin), &
     515             :             node_of_domain=almo_scf_env%cpu_of_domain, &
     516           4 :             job_type=select_row)
     517             :       END DO
     518             : 
     519             :       ! init mixing matrices
     520          20 :       ALLOCATE (submatrix_mixing_old_blk(almo_scf_env%ndomains, nspin))
     521           2 :       CALL init_submatrices(submatrix_mixing_old_blk)
     522           8 :       ALLOCATE (almo_diis(nspin))
     523             : 
     524             :       ! TRY: construct block-projector
     525             :       !ALLOCATE(submatrix_tmp(almo_scf_env%ndomains))
     526             :       !DO ispin=1,nspin
     527             :       !   CALL init_submatrices(submatrix_tmp)
     528             :       !   CALL construct_domain_r_down(&
     529             :       !           matrix_t=almo_scf_env%matrix_t_blk(ispin),&
     530             :       !           matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin),&
     531             :       !           matrix_s=almo_scf_env%matrix_s(1),&
     532             :       !           subm_r_down=submatrix_tmp(:),&
     533             :       !           dpattern=almo_scf_env%quench_t(ispin),&
     534             :       !           map=almo_scf_env%domain_map(ispin),&
     535             :       !           node_of_domain=almo_scf_env%cpu_of_domain,&
     536             :       !           filter_eps=almo_scf_env%eps_filter)
     537             :       !   CALL multiply_submatrices('N','N',1.0_dp,&
     538             :       !           submatrix_tmp(:),&
     539             :       !           almo_scf_env%domain_s_inv(:,1),0.0_dp,&
     540             :       !           almo_scf_env%domain_r_down_up(:,ispin))
     541             :       !   CALL release_submatrices(submatrix_tmp)
     542             :       !ENDDO
     543             :       !DEALLOCATE(submatrix_tmp)
     544             : 
     545           4 :       DO ispin = 1, nspin
     546             :          ! use s_sqrt since they are already properly constructed
     547             :          ! and have the same distributions as domain_err and domain_ks_xx
     548             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     549             :                                  sample_err=almo_scf_env%domain_s_sqrt(:, ispin), &
     550             :                                  error_type=1, &
     551           4 :                                  max_length=optimizer%ndiis)
     552             :       END DO
     553             : 
     554           2 :       denergy_tot = 0.0_dp
     555           2 :       energy_old = 0.0_dp
     556           2 :       iscf = 0
     557           2 :       prepare_to_exit = .FALSE.
     558             : 
     559             :       ! the SCF loop
     560           2 :       t1 = m_walltime()
     561           2 :       DO
     562             : 
     563           2 :          iscf = iscf + 1
     564             : 
     565             :          ! obtain projected KS matrix and the DIIS-error vector
     566           2 :          CALL almo_scf_ks_to_ks_xx(almo_scf_env)
     567             : 
     568             :          ! inform the DIIS handler about the new KS matrix and its error vector
     569           4 :          DO ispin = 1, nspin
     570             :             CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     571             :                                     d_var=almo_scf_env%domain_ks_xx(:, ispin), &
     572           4 :                                     d_err=almo_scf_env%domain_err(:, ispin))
     573             :          END DO
     574             : 
     575             :          ! check convergence
     576           2 :          converged = .TRUE.
     577           2 :          DO ispin = 1, nspin
     578             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     579           2 :             error_norm = dbcsr_maxabs(almo_scf_env%matrix_err_xx(ispin))
     580             :             CALL maxnorm_submatrices(almo_scf_env%domain_err(:, ispin), &
     581           2 :                                      norm=error_norm_0)
     582           2 :             IF (error_norm .GT. optimizer%eps_error) THEN
     583             :                converged = .FALSE.
     584             :                EXIT ! no need to check the other spin
     585             :             END IF
     586             :          END DO
     587             :          ! check other exit criteria: max SCF steps and timing
     588             :          CALL external_control(should_stop, "SCF", &
     589             :                                start_time=qs_env%start_time, &
     590           2 :                                target_time=qs_env%target_time)
     591           2 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     592           0 :             prepare_to_exit = .TRUE.
     593             :          END IF
     594             : 
     595             :          ! if early stopping is on do at least one iteration
     596           2 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     597             :             prepare_to_exit = .FALSE.
     598             : 
     599           2 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     600             : 
     601             :             ! perform mixing of KS matrices
     602           2 :             IF (iscf .NE. 1) THEN
     603             :                IF (.FALSE.) THEN ! use diis instead of mixing
     604             :                   DO ispin = 1, nspin
     605             :                      CALL add_submatrices( &
     606             :                         almo_scf_env%mixing_fraction, &
     607             :                         almo_scf_env%domain_ks_xx(:, ispin), &
     608             :                         1.0_dp - almo_scf_env%mixing_fraction, &
     609             :                         submatrix_mixing_old_blk(:, ispin), &
     610             :                         'N')
     611             :                   END DO
     612             :                ELSE
     613           0 :                   DO ispin = 1, nspin
     614             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     615           0 :                                                     d_extr_var=almo_scf_env%domain_ks_xx(:, ispin))
     616             :                   END DO
     617             :                END IF
     618             :             END IF
     619             :             ! save the new matrix for the future mixing
     620           4 :             DO ispin = 1, nspin
     621             :                CALL copy_submatrices( &
     622             :                   almo_scf_env%domain_ks_xx(:, ispin), &
     623             :                   submatrix_mixing_old_blk(:, ispin), &
     624           4 :                   copy_data=.TRUE.)
     625             :             END DO
     626             : 
     627             :             ! obtain a new set of ALMOs from the updated KS matrix
     628           2 :             CALL almo_scf_ks_xx_to_tv_xx(almo_scf_env)
     629             : 
     630             :             ! update the density matrix
     631           4 :             DO ispin = 1, nspin
     632             : 
     633             :                ! save the initial density matrix (to get the perturbative energy lowering)
     634           2 :                IF (iscf .EQ. 1) THEN
     635             :                   CALL dbcsr_create(matrix_p_almo_scf_converged, &
     636           2 :                                     template=almo_scf_env%matrix_p(ispin))
     637             :                   CALL dbcsr_copy(matrix_p_almo_scf_converged, &
     638           2 :                                   almo_scf_env%matrix_p(ispin))
     639             :                END IF
     640             : 
     641             :                !! Application of an occupation-rescaling trick for smearing, if requested
     642           2 :                IF (almo_scf_env%smear) THEN
     643             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     644             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     645             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     646             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     647             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     648             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     649             :                                             ndomains=almo_scf_env%ndomains, &
     650           0 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     651             :                END IF
     652             : 
     653             :                ! update now
     654             :                CALL almo_scf_t_to_proj( &
     655             :                   t=almo_scf_env%matrix_t(ispin), &
     656             :                   p=almo_scf_env%matrix_p(ispin), &
     657             :                   eps_filter=almo_scf_env%eps_filter, &
     658             :                   orthog_orbs=.FALSE., &
     659             :                   nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     660             :                   s=almo_scf_env%matrix_s(1), &
     661             :                   sigma=almo_scf_env%matrix_sigma(ispin), &
     662             :                   sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     663             :                   use_guess=.TRUE., &
     664             :                   smear=almo_scf_env%smear, &
     665             :                   algorithm=almo_scf_env%sigma_inv_algorithm, &
     666             :                   inverse_accelerator=almo_scf_env%order_lanczos, &
     667             :                   inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     668             :                   eps_lanczos=almo_scf_env%eps_lanczos, &
     669             :                   max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     670             :                   para_env=almo_scf_env%para_env, &
     671           2 :                   blacs_env=almo_scf_env%blacs_env)
     672           2 :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), spin_factor)
     673             :                !! Rescaling electronic entropy contribution by spin_factor
     674           2 :                IF (almo_scf_env%smear) THEN
     675           0 :                   almo_scf_env%kTS(ispin) = almo_scf_env%kTS(ispin)*spin_factor
     676             :                END IF
     677             : 
     678             :                ! obtain perturbative estimate (at no additional cost)
     679             :                ! of the energy lowering relative to the block-diagonal ALMOs
     680           4 :                IF (iscf .EQ. 1) THEN
     681             : 
     682             :                   CALL dbcsr_add(matrix_p_almo_scf_converged, &
     683           2 :                                  almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
     684             :                   CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
     685             :                                  matrix_p_almo_scf_converged, &
     686           2 :                                  denergy_spin(ispin))
     687             : 
     688           2 :                   CALL dbcsr_release(matrix_p_almo_scf_converged)
     689             : 
     690             :                   !! RS-WARNING: If smearing ALMO is requested, electronic entropy contribution should probably be included here
     691             : 
     692           2 :                   denergy_tot = denergy_tot + denergy_spin(ispin)
     693             : 
     694             :                   ! RZK-warning Energy correction can be evaluated using matrix_x
     695             :                   ! as shown in the attempt below and in the PCG procedure.
     696             :                   ! Using matrix_x allows immediate decomposition of the energy
     697             :                   ! lowering into 2-body components for EDA. However, it does not
     698             :                   ! work here because the diagonalization routine does not necessarily
     699             :                   ! produce orbitals with the same sign as the block-diagonal ALMOs
     700             :                   ! Any fixes?!
     701             : 
     702             :                   !CALL dbcsr_init(matrix_x)
     703             :                   !CALL dbcsr_create(matrix_x,&
     704             :                   !        template=almo_scf_env%matrix_t(ispin))
     705             :                   !
     706             :                   !CALL dbcsr_init(matrix_tmp_no)
     707             :                   !CALL dbcsr_create(matrix_tmp_no,&
     708             :                   !        template=almo_scf_env%matrix_t(ispin))
     709             :                   !
     710             :                   !CALL dbcsr_copy(matrix_x,&
     711             :                   !        almo_scf_env%matrix_t_blk(ispin))
     712             :                   !CALL dbcsr_add(matrix_x,almo_scf_env%matrix_t(ispin),&
     713             :                   !        -1.0_dp,1.0_dp)
     714             : 
     715             :                   !CALL dbcsr_dot(matrix_x, almo_scf_env%matrix_err_xx(ispin),denergy)
     716             : 
     717             :                   !denergy=denergy*spin_factor
     718             : 
     719             :                   !IF (unit_nr>0) THEN
     720             :                   !   WRITE(unit_nr,*) "_ENERGY-0: ", almo_scf_env%almo_scf_energy
     721             :                   !   WRITE(unit_nr,*) "_ENERGY-D: ", denergy
     722             :                   !   WRITE(unit_nr,*) "_ENERGY-F: ", almo_scf_env%almo_scf_energy+denergy
     723             :                   !ENDIF
     724             :                   !! RZK-warning update will not work since the energy is overwritten almost immediately
     725             :                   !!CALL almo_scf_update_ks_energy(qs_env,&
     726             :                   !!        almo_scf_env%almo_scf_energy+denergy)
     727             :                   !!
     728             : 
     729             :                   !! print out the results of the decomposition analysis
     730             :                   !CALL dbcsr_hadamard_product(matrix_x,&
     731             :                   !        almo_scf_env%matrix_err_xx(ispin),&
     732             :                   !        matrix_tmp_no)
     733             :                   !CALL dbcsr_scale(matrix_tmp_no,spin_factor)
     734             :                   !CALL dbcsr_filter(matrix_tmp_no,almo_scf_env%eps_filter)
     735             :                   !
     736             :                   !IF (unit_nr>0) THEN
     737             :                   !   WRITE(unit_nr,*)
     738             :                   !   WRITE(unit_nr,'(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
     739             :                   !ENDIF
     740             : 
     741             :                   !mynode=dbcsr_mp_mynode(dbcsr_distribution_mp(&
     742             :                   !   dbcsr_distribution(matrix_tmp_no)))
     743             :                   !WRITE(mynodestr,'(I6.6)') mynode
     744             :                   !mylogfile='EDA.'//TRIM(ADJUSTL(mynodestr))
     745             :                   !OPEN (iunit,file=mylogfile,status='REPLACE')
     746             :                   !CALL dbcsr_print_block_sum(matrix_tmp_no,iunit)
     747             :                   !CLOSE(iunit)
     748             :                   !
     749             :                   !CALL dbcsr_release(matrix_tmp_no)
     750             :                   !CALL dbcsr_release(matrix_x)
     751             : 
     752             :                END IF ! iscf.eq.1
     753             : 
     754             :             END DO
     755             : 
     756             :             ! print out the energy lowering
     757           2 :             IF (iscf .EQ. 1) THEN
     758             :                CALL energy_lowering_report( &
     759             :                   unit_nr=unit_nr, &
     760             :                   ref_energy=almo_scf_env%almo_scf_energy, &
     761           2 :                   energy_lowering=denergy_tot)
     762             :                CALL almo_scf_update_ks_energy(qs_env, &
     763             :                                               energy=almo_scf_env%almo_scf_energy, &
     764           2 :                                               energy_singles_corr=denergy_tot)
     765             :             END IF
     766             : 
     767             :             ! compute the new KS matrix and new energy
     768           2 :             IF (.NOT. almo_scf_env%perturbative_delocalization) THEN
     769             : 
     770           0 :                IF (almo_scf_env%smear) THEN
     771           0 :                   kTS_sum = SUM(almo_scf_env%kTS)
     772             :                ELSE
     773           0 :                   kTS_sum = 0.0_dp
     774             :                END IF
     775             : 
     776             :                CALL almo_dm_to_almo_ks(qs_env, &
     777             :                                        almo_scf_env%matrix_p, &
     778             :                                        almo_scf_env%matrix_ks, &
     779             :                                        energy_new, &
     780             :                                        almo_scf_env%eps_filter, &
     781             :                                        almo_scf_env%mat_distr_aos, &
     782             :                                        smear=almo_scf_env%smear, &
     783           0 :                                        kTS_sum=kTS_sum)
     784             :             END IF
     785             : 
     786             :          END IF ! prepare_to_exit
     787             : 
     788           2 :          IF (almo_scf_env%perturbative_delocalization) THEN
     789             : 
     790             :             ! exit after the first step if we do not need the SCF procedure
     791           2 :             CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, almo_scf_env%mat_distr_aos)
     792           2 :             converged = .TRUE.
     793           2 :             prepare_to_exit = .TRUE.
     794             : 
     795             :          ELSE ! not a perturbative treatment
     796             : 
     797           0 :             energy_diff = energy_new - energy_old
     798           0 :             energy_old = energy_new
     799           0 :             almo_scf_env%almo_scf_energy = energy_new
     800             : 
     801           0 :             t2 = m_walltime()
     802             :             ! brief report on the current SCF loop
     803           0 :             IF (unit_nr > 0) THEN
     804           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9,E11.3,E11.3,E11.3,F8.2)') "ALMO SCF", &
     805           0 :                   iscf, &
     806           0 :                   energy_new, energy_diff, error_norm, error_norm_0, t2 - t1
     807             :             END IF
     808           0 :             t1 = m_walltime()
     809             : 
     810             :          END IF
     811             : 
     812           2 :          IF (prepare_to_exit) EXIT
     813             : 
     814             :       END DO ! end scf cycle
     815             : 
     816             :       !! Print number of electrons recovered if smearing was requested
     817           2 :       IF (almo_scf_env%smear) THEN
     818           0 :          DO ispin = 1, nspin
     819           0 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     820           0 :             IF (unit_nr > 0) THEN
     821           0 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     822             :             END IF
     823             :          END DO
     824             :       END IF
     825             : 
     826           2 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
     827           0 :          CPABORT("SCF for ALMOs on overlapping domains not converged!")
     828             :       END IF
     829             : 
     830           4 :       DO ispin = 1, nspin
     831           2 :          CALL release_submatrices(submatrix_mixing_old_blk(:, ispin))
     832           4 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     833             :       END DO
     834           4 :       DEALLOCATE (almo_diis)
     835          12 :       DEALLOCATE (submatrix_mixing_old_blk)
     836             : 
     837           2 :       CALL timestop(handle)
     838             : 
     839           2 :    END SUBROUTINE almo_scf_xalmo_eigensolver
     840             : 
     841             : ! **************************************************************************************************
     842             : !> \brief Optimization of ALMOs using PCG-like minimizers
     843             : !> \param qs_env ...
     844             : !> \param almo_scf_env ...
     845             : !> \param optimizer   controls the optimization algorithm
     846             : !> \param quench_t ...
     847             : !> \param matrix_t_in ...
     848             : !> \param matrix_t_out ...
     849             : !> \param assume_t0_q0x - since it is extremely difficult to converge the iterative
     850             : !>                        procedure using T as an optimized variable, assume
     851             : !>                        T = T_0 + (1-R_0)*X and optimize X
     852             : !>                        T_0 is assumed to be the zero-delocalization reference
     853             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
     854             : !> \param special_case   to reduce the overhead special cases are implemented:
     855             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
     856             : !>                       xalmo_case_block_diag
     857             : !>                       xalmo_case_fully_deloc
     858             : !> \par History
     859             : !>       2011.11 created [Rustam Z Khaliullin]
     860             : !> \author Rustam Z Khaliullin
     861             : ! **************************************************************************************************
     862          86 :    SUBROUTINE almo_scf_xalmo_pcg(qs_env, almo_scf_env, optimizer, quench_t, &
     863             :                                  matrix_t_in, matrix_t_out, assume_t0_q0x, perturbation_only, &
     864             :                                  special_case)
     865             : 
     866             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     867             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     868             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     869             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
     870             :          INTENT(INOUT)                                   :: quench_t, matrix_t_in, matrix_t_out
     871             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, perturbation_only
     872             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
     873             : 
     874             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_pcg'
     875             : 
     876             :       CHARACTER(LEN=20)                                  :: iter_type
     877             :       INTEGER :: cg_iteration, dim_op, fixed_line_search_niter, handle, idim0, ielem, ispin, &
     878             :          iteration, line_search_iteration, max_iter, my_special_case, ndomains, nmo, nspins, &
     879             :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
     880          86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
     881             :       LOGICAL :: blissful_neglect, converged, just_started, line_search, normalize_orbitals, &
     882             :          optimize_theta, outer_prepare_to_exit, penalty_occ_local, penalty_occ_vol, &
     883             :          prepare_to_exit, reset_conjugator, skip_grad, use_guess
     884          86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: reim_diag, weights, z2
     885             :       REAL(kind=dp) :: appr_sec_der, beta, denom, denom2, e0, e1, energy_coeff, energy_diff, &
     886             :          energy_new, energy_old, eps_skip_gradients, fval, g0, g1, grad_norm, grad_norm_frob, &
     887             :          line_search_error, localiz_coeff, localization_obj_function, next_step_size_guess, &
     888             :          penalty_amplitude, penalty_func_new, spin_factor, step_size, t1, t2, tempreal
     889          86 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
     890          86 :                                                             penalty_occ_vol_g_prefactor, &
     891          86 :                                                             penalty_occ_vol_h_prefactor
     892             :       TYPE(cell_type), POINTER                           :: cell
     893             :       TYPE(cp_logger_type), POINTER                      :: logger
     894          86 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
     895          86 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
     896          86 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_sig_sqrti_ii, m_t_in_local, &
     897          86 :          m_theta, prec_vv, prev_grad, prev_minus_prec_grad, prev_step, siginvTFTsiginv, ST, step, &
     898          86 :          STsiginv_0, tempNOcc, tempNOcc_1, tempOccOcc
     899             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     900          86 :          DIMENSION(:, :)                                 :: bad_modes_projector_down, domain_r_down
     901             :       TYPE(mp_comm_type)                                 :: group
     902             : 
     903          86 :       CALL timeset(routineN, handle)
     904             : 
     905          86 :       my_special_case = xalmo_case_normal
     906          86 :       IF (PRESENT(special_case)) my_special_case = special_case
     907             : 
     908             :       ! get a useful output_unit
     909          86 :       logger => cp_get_default_logger()
     910          86 :       IF (logger%para_env%is_source()) THEN
     911          43 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     912             :       ELSE
     913             :          unit_nr = -1
     914             :       END IF
     915             : 
     916          86 :       nspins = almo_scf_env%nspins
     917             : 
     918             :       ! if unprojected XALMOs are optimized
     919             :       ! then we must use the "blissful_neglect" procedure
     920          86 :       blissful_neglect = .FALSE.
     921          86 :       IF (my_special_case .EQ. xalmo_case_normal .AND. .NOT. assume_t0_q0x) THEN
     922          14 :          blissful_neglect = .TRUE.
     923             :       END IF
     924             : 
     925          86 :       IF (unit_nr > 0) THEN
     926          43 :          WRITE (unit_nr, *)
     927           2 :          SELECT CASE (my_special_case)
     928             :          CASE (xalmo_case_block_diag)
     929           2 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     930           4 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     931             :          CASE (xalmo_case_fully_deloc)
     932          22 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     933          44 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
     934             :          CASE (xalmo_case_normal)
     935          43 :             IF (blissful_neglect) THEN
     936           7 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 25), &
     937          14 :                   " LCP optimization of XALMOs ", REPEAT("-", 26)
     938             :             ELSE
     939          12 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
     940          24 :                   " Optimization of XALMOs ", REPEAT("-", 28)
     941             :             END IF
     942             :          END SELECT
     943          43 :          WRITE (unit_nr, *)
     944          43 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     945          86 :             "Objective Function", "Change", "Convergence", "Time"
     946          43 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     947             :       END IF
     948             : 
     949             :       ! set local parameters using developer's keywords
     950             :       ! RZK-warning: change to normal keywords later
     951          86 :       optimize_theta = almo_scf_env%logical05
     952          86 :       eps_skip_gradients = almo_scf_env%real01
     953             : 
     954             :       ! penalty amplitude adjusts the strength of volume conservation
     955          86 :       energy_coeff = 1.0_dp !optimizer%opt_penalty%energy_coeff
     956          86 :       localiz_coeff = 0.0_dp !optimizer%opt_penalty%occ_loc_coeff
     957          86 :       penalty_amplitude = 0.0_dp !optimizer%opt_penalty%occ_vol_coeff
     958          86 :       penalty_occ_vol = .FALSE. !( optimizer%opt_penalty%occ_vol_method &
     959             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     960          86 :       penalty_occ_local = .FALSE. !( optimizer%opt_penalty%occ_loc_method &
     961             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     962          86 :       normalize_orbitals = penalty_occ_vol .OR. penalty_occ_local
     963         258 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
     964         172 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
     965         172 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
     966         172 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
     967          86 :       penalty_func_new = 0.0_dp
     968             : 
     969             :       ! preconditioner control
     970          86 :       prec_type = optimizer%preconditioner
     971             : 
     972             :       ! control of the line search
     973          86 :       fixed_line_search_niter = 0 ! init to zero, change when eps is small enough
     974             : 
     975          86 :       IF (nspins == 1) THEN
     976          86 :          spin_factor = 2.0_dp
     977             :       ELSE
     978           0 :          spin_factor = 1.0_dp
     979             :       END IF
     980             : 
     981         172 :       ALLOCATE (grad_norm_spin(nspins))
     982         258 :       ALLOCATE (nocc(nspins))
     983             : 
     984             :       ! create a local copy of matrix_t_in because
     985             :       ! matrix_t_in and matrix_t_out can be the same matrix
     986             :       ! we need to make sure data in matrix_t_in is intact
     987             :       ! after we start writing to matrix_t_out
     988         344 :       ALLOCATE (m_t_in_local(nspins))
     989         172 :       DO ispin = 1, nspins
     990             :          CALL dbcsr_create(m_t_in_local(ispin), &
     991             :                            template=matrix_t_in(ispin), &
     992          86 :                            matrix_type=dbcsr_type_no_symmetry)
     993         172 :          CALL dbcsr_copy(m_t_in_local(ispin), matrix_t_in(ispin))
     994             :       END DO
     995             : 
     996             :       ! m_theta contains a set of variational parameters
     997             :       ! that define one-electron orbitals (simple, projected, etc.)
     998         258 :       ALLOCATE (m_theta(nspins))
     999         172 :       DO ispin = 1, nspins
    1000             :          CALL dbcsr_create(m_theta(ispin), &
    1001             :                            template=matrix_t_out(ispin), &
    1002         172 :                            matrix_type=dbcsr_type_no_symmetry)
    1003             :       END DO
    1004             : 
    1005             :       ! Compute localization matrices
    1006             :       IF (penalty_occ_local) THEN
    1007             : 
    1008             :          CALL get_qs_env(qs_env=qs_env, &
    1009             :                          matrix_s=qs_matrix_s, &
    1010             :                          cell=cell)
    1011             : 
    1012             :          IF (cell%orthorhombic) THEN
    1013             :             dim_op = 3
    1014             :          ELSE
    1015             :             dim_op = 6
    1016             :          END IF
    1017             :          ALLOCATE (weights(6))
    1018             :          weights = 0.0_dp
    1019             : 
    1020             :          CALL initialize_weights(cell, weights)
    1021             : 
    1022             :          ALLOCATE (op_sm_set_qs(2, dim_op))
    1023             :          ALLOCATE (op_sm_set_almo(2, dim_op))
    1024             : 
    1025             :          DO idim0 = 1, dim_op
    1026             :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1027             :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix)
    1028             :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1029             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    1030             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1031             :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    1032             :                NULLIFY (op_sm_set_almo(reim, idim0)%matrix)
    1033             :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1034             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%matrix_s(1), &
    1035             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1036             :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    1037             :             END DO
    1038             :          END DO
    1039             : 
    1040             :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    1041             : 
    1042             :          !CALL matrix_qs_to_almo(op_sm_set_qs, op_sm_set_almo, almo_scf_env%mat_distr_aos)
    1043             : 
    1044             :       END IF
    1045             : 
    1046             :       ! create initial guess from the initial orbitals
    1047             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    1048             :                                m_t_in=m_t_in_local, &
    1049             :                                m_t0=almo_scf_env%matrix_t_blk, &
    1050             :                                m_quench_t=quench_t, &
    1051             :                                m_overlap=almo_scf_env%matrix_s(1), &
    1052             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    1053             :                                nspins=nspins, &
    1054             :                                xalmo_history=almo_scf_env%xalmo_history, &
    1055             :                                assume_t0_q0x=assume_t0_q0x, &
    1056             :                                optimize_theta=optimize_theta, &
    1057             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1058             :                                eps_filter=almo_scf_env%eps_filter, &
    1059             :                                order_lanczos=almo_scf_env%order_lanczos, &
    1060             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    1061             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    1062          86 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    1063             : 
    1064          86 :       ndomains = almo_scf_env%ndomains
    1065        1028 :       ALLOCATE (domain_r_down(ndomains, nspins))
    1066          86 :       CALL init_submatrices(domain_r_down)
    1067         942 :       ALLOCATE (bad_modes_projector_down(ndomains, nspins))
    1068          86 :       CALL init_submatrices(bad_modes_projector_down)
    1069             : 
    1070         258 :       ALLOCATE (prec_vv(nspins))
    1071         258 :       ALLOCATE (siginvTFTsiginv(nspins))
    1072         258 :       ALLOCATE (STsiginv_0(nspins))
    1073         258 :       ALLOCATE (FTsiginv(nspins))
    1074         258 :       ALLOCATE (ST(nspins))
    1075         258 :       ALLOCATE (prev_grad(nspins))
    1076         344 :       ALLOCATE (grad(nspins))
    1077         258 :       ALLOCATE (prev_step(nspins))
    1078         258 :       ALLOCATE (step(nspins))
    1079         258 :       ALLOCATE (prev_minus_prec_grad(nspins))
    1080         258 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    1081         258 :       ALLOCATE (tempNOcc(nspins))
    1082         258 :       ALLOCATE (tempNOcc_1(nspins))
    1083         258 :       ALLOCATE (tempOccOcc(nspins))
    1084         172 :       DO ispin = 1, nspins
    1085             : 
    1086             :          ! init temporary storage
    1087             :          CALL dbcsr_create(prec_vv(ispin), &
    1088             :                            template=almo_scf_env%matrix_ks(ispin), &
    1089          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1090             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    1091             :                            template=almo_scf_env%matrix_sigma(ispin), &
    1092          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1093             :          CALL dbcsr_create(STsiginv_0(ispin), &
    1094             :                            template=matrix_t_out(ispin), &
    1095          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1096             :          CALL dbcsr_create(FTsiginv(ispin), &
    1097             :                            template=matrix_t_out(ispin), &
    1098          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1099             :          CALL dbcsr_create(ST(ispin), &
    1100             :                            template=matrix_t_out(ispin), &
    1101          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1102             :          CALL dbcsr_create(prev_grad(ispin), &
    1103             :                            template=matrix_t_out(ispin), &
    1104          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1105             :          CALL dbcsr_create(grad(ispin), &
    1106             :                            template=matrix_t_out(ispin), &
    1107          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1108             :          CALL dbcsr_create(prev_step(ispin), &
    1109             :                            template=matrix_t_out(ispin), &
    1110          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1111             :          CALL dbcsr_create(step(ispin), &
    1112             :                            template=matrix_t_out(ispin), &
    1113          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1114             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    1115             :                            template=matrix_t_out(ispin), &
    1116          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1117             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    1118             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1119          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1120             :          CALL dbcsr_create(tempNOcc(ispin), &
    1121             :                            template=matrix_t_out(ispin), &
    1122          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1123             :          CALL dbcsr_create(tempNOcc_1(ispin), &
    1124             :                            template=matrix_t_out(ispin), &
    1125          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1126             :          CALL dbcsr_create(tempOccOcc(ispin), &
    1127             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1128          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1129             : 
    1130          86 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    1131          86 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    1132             : 
    1133             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    1134          86 :                              nfullrows_total=nocc(ispin))
    1135             : 
    1136             :          ! invert S domains if necessary
    1137             :          ! Note: domains for alpha and beta electrons might be different
    1138             :          ! that is why the inversion of the AO overlap is inside the spin loop
    1139          86 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    1140             :             CALL construct_domain_s_inv( &
    1141             :                matrix_s=almo_scf_env%matrix_s(1), &
    1142             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1143             :                dpattern=quench_t(ispin), &
    1144             :                map=almo_scf_env%domain_map(ispin), &
    1145          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1146             : 
    1147             :             CALL construct_domain_s_sqrt( &
    1148             :                matrix_s=almo_scf_env%matrix_s(1), &
    1149             :                subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
    1150             :                subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1151             :                dpattern=almo_scf_env%quench_t(ispin), &
    1152             :                map=almo_scf_env%domain_map(ispin), &
    1153          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1154             : 
    1155             :          END IF
    1156             : 
    1157          86 :          IF (assume_t0_q0x) THEN
    1158             : 
    1159             :             ! save S.T_0.siginv_0
    1160          42 :             IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1161             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1162             :                                    almo_scf_env%matrix_s(1), &
    1163             :                                    almo_scf_env%matrix_t_blk(ispin), &
    1164             :                                    0.0_dp, ST(ispin), &
    1165          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1166             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1167             :                                    ST(ispin), &
    1168             :                                    almo_scf_env%matrix_sigma_inv_0deloc(ispin), &
    1169             :                                    0.0_dp, STsiginv_0(ispin), &
    1170          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1171             :             END IF
    1172             : 
    1173             :             ! construct domain-projector
    1174          42 :             IF (my_special_case .EQ. xalmo_case_normal) THEN
    1175             :                CALL construct_domain_r_down( &
    1176             :                   matrix_t=almo_scf_env%matrix_t_blk(ispin), &
    1177             :                   matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    1178             :                   matrix_s=almo_scf_env%matrix_s(1), &
    1179             :                   subm_r_down=domain_r_down(:, ispin), &
    1180             :                   dpattern=quench_t(ispin), &
    1181             :                   map=almo_scf_env%domain_map(ispin), &
    1182             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    1183          24 :                   filter_eps=almo_scf_env%eps_filter)
    1184             :             END IF
    1185             : 
    1186             :          END IF ! assume_t0_q0x
    1187             : 
    1188             :          ! localization functional
    1189         172 :          IF (penalty_occ_local) THEN
    1190             : 
    1191             :             ! compute S.R0.B.R0.S
    1192             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1193             :                                 almo_scf_env%matrix_s(1), &
    1194             :                                 matrix_t_in(ispin), &
    1195             :                                 0.0_dp, tempNOcc(ispin), &
    1196           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1197             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1198             :                                 tempNOcc(ispin), &
    1199             :                                 almo_scf_env%matrix_sigma_inv(ispin), &
    1200             :                                 0.0_dp, tempNOCC_1(ispin), &
    1201           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1202             : 
    1203           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1204           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1205             : 
    1206             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    1207           0 :                                          op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%mat_distr_aos)
    1208             : 
    1209             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1210             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    1211             :                                       matrix_t_in(ispin), &
    1212             :                                       0.0_dp, tempNOcc(ispin), &
    1213           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1214             : 
    1215             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1216             :                                       matrix_t_in(ispin), &
    1217             :                                       tempNOcc(ispin), &
    1218             :                                       0.0_dp, tempOccOcc(ispin), &
    1219           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1220             : 
    1221             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1222             :                                       tempNOCC_1(ispin), &
    1223             :                                       tempOccOcc(ispin), &
    1224             :                                       0.0_dp, tempNOcc(ispin), &
    1225           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1226             : 
    1227             :                   CALL dbcsr_multiply("N", "T", 1.0_dp, &
    1228             :                                       tempNOcc(ispin), &
    1229             :                                       tempNOcc_1(ispin), &
    1230             :                                       0.0_dp, op_sm_set_almo(reim, idim0)%matrix, &
    1231           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1232             : 
    1233             :                END DO
    1234             :             END DO ! end loop over idim0
    1235             : 
    1236             :          END IF !penalty_occ_local
    1237             : 
    1238             :       END DO ! ispin
    1239             : 
    1240             :       ! start the outer SCF loop
    1241          86 :       outer_max_iter = optimizer%max_iter_outer_loop
    1242          86 :       outer_prepare_to_exit = .FALSE.
    1243          86 :       outer_iteration = 0
    1244          86 :       grad_norm = 0.0_dp
    1245          86 :       grad_norm_frob = 0.0_dp
    1246          86 :       use_guess = .FALSE.
    1247             : 
    1248             :       DO
    1249             : 
    1250             :          ! start the inner SCF loop
    1251          92 :          max_iter = optimizer%max_iter
    1252          92 :          prepare_to_exit = .FALSE.
    1253          92 :          line_search = .FALSE.
    1254          92 :          converged = .FALSE.
    1255          92 :          iteration = 0
    1256          92 :          cg_iteration = 0
    1257          92 :          line_search_iteration = 0
    1258             :          energy_new = 0.0_dp
    1259          92 :          energy_old = 0.0_dp
    1260          92 :          energy_diff = 0.0_dp
    1261             :          localization_obj_function = 0.0_dp
    1262          92 :          line_search_error = 0.0_dp
    1263             : 
    1264          92 :          t1 = m_walltime()
    1265             : 
    1266        1048 :          DO
    1267             : 
    1268        1048 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    1269             : 
    1270             :             CALL main_var_to_xalmos_and_loss_func( &
    1271             :                almo_scf_env=almo_scf_env, &
    1272             :                qs_env=qs_env, &
    1273             :                m_main_var_in=m_theta, &
    1274             :                m_t_out=matrix_t_out, &
    1275             :                m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    1276             :                energy_out=energy_new, &
    1277             :                penalty_out=penalty_func_new, &
    1278             :                m_FTsiginv_out=FTsiginv, &
    1279             :                m_siginvTFTsiginv_out=siginvTFTsiginv, &
    1280             :                m_ST_out=ST, &
    1281             :                m_STsiginv0_in=STsiginv_0, &
    1282             :                m_quench_t_in=quench_t, &
    1283             :                domain_r_down_in=domain_r_down, &
    1284             :                assume_t0_q0x=assume_t0_q0x, &
    1285             :                just_started=just_started, &
    1286             :                optimize_theta=optimize_theta, &
    1287             :                normalize_orbitals=normalize_orbitals, &
    1288             :                perturbation_only=perturbation_only, &
    1289             :                do_penalty=penalty_occ_vol, &
    1290        1048 :                special_case=my_special_case)
    1291        1048 :             IF (penalty_occ_vol) THEN
    1292             :                ! this is not pure energy anymore
    1293           0 :                energy_new = energy_new + penalty_func_new
    1294             :             END IF
    1295        2096 :             DO ispin = 1, nspins
    1296        2096 :                IF (penalty_occ_vol) THEN
    1297             :                   penalty_occ_vol_g_prefactor(ispin) = &
    1298           0 :                      -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    1299           0 :                   penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    1300             :                END IF
    1301             :             END DO
    1302             : 
    1303        1048 :             localization_obj_function = 0.0_dp
    1304             :             ! RZK-warning: This block must be combined with the loss function
    1305        1048 :             IF (penalty_occ_local) THEN
    1306           0 :                DO ispin = 1, nspins
    1307             : 
    1308             :                   ! LzL insert localization penalty
    1309           0 :                   localization_obj_function = 0.0_dp
    1310           0 :                   CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), nfullrows_total=nmo)
    1311           0 :                   ALLOCATE (z2(nmo))
    1312           0 :                   ALLOCATE (reim_diag(nmo))
    1313             : 
    1314           0 :                   CALL dbcsr_get_info(tempOccOcc(ispin), group=group)
    1315             : 
    1316           0 :                   DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1317             : 
    1318           0 :                      z2(:) = 0.0_dp
    1319             : 
    1320           0 :                      DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1321             : 
    1322             :                         !CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix,
    1323             :                         !                       op_sm_set_almo(reim, idim0)%matrix, &
    1324             :                         !                       almo_scf_env%mat_distr_aos)
    1325             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1326             :                                             op_sm_set_almo(reim, idim0)%matrix, &
    1327             :                                             matrix_t_out(ispin), &
    1328             :                                             0.0_dp, tempNOcc(ispin), &
    1329           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1330             :                         !warning - save time by computing only the diagonal elements
    1331             :                         CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1332             :                                             matrix_t_out(ispin), &
    1333             :                                             tempNOcc(ispin), &
    1334             :                                             0.0_dp, tempOccOcc(ispin), &
    1335           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1336             : 
    1337           0 :                         reim_diag = 0.0_dp
    1338           0 :                         CALL dbcsr_get_diag(tempOccOcc(ispin), reim_diag)
    1339           0 :                         CALL group%sum(reim_diag)
    1340           0 :                         z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    1341             : 
    1342             :                      END DO
    1343             : 
    1344           0 :                      DO ielem = 1, nmo
    1345             :                         SELECT CASE (2) ! allows for selection of different spread functionals
    1346             :                         CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    1347           0 :                            fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    1348             :                         CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    1349           0 :                            fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    1350             :                         CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    1351             :                            fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    1352             :                         END SELECT
    1353           0 :                         localization_obj_function = localization_obj_function + fval
    1354             :                      END DO
    1355             : 
    1356             :                   END DO ! end loop over idim0
    1357             : 
    1358           0 :                   DEALLOCATE (z2)
    1359           0 :                   DEALLOCATE (reim_diag)
    1360             : 
    1361           0 :                   energy_new = energy_new + localiz_coeff*localization_obj_function
    1362             : 
    1363             :                END DO ! ispin
    1364             :             END IF ! penalty_occ_local
    1365             : 
    1366        2096 :             DO ispin = 1, nspins
    1367             : 
    1368             :                IF (just_started .AND. almo_mathematica) THEN
    1369             :                   CPWARN_IF(ispin .GT. 1, "Mathematica files will be overwritten")
    1370             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_s(1), "matrixS.dat")
    1371             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_ks(ispin), "matrixF.dat")
    1372             :                   CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixT.dat")
    1373             :                   CALL print_mathematica_matrix(quench_t(ispin), "matrixQ.dat")
    1374             :                END IF
    1375             : 
    1376             :                ! save the previous gradient to compute beta
    1377             :                ! do it only if the previous grad was computed
    1378             :                ! for .NOT.line_search
    1379        1048 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) &
    1380        1542 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    1381             : 
    1382             :             END DO ! ispin
    1383             : 
    1384             :             ! compute the energy gradient if necessary
    1385             :             skip_grad = (iteration .GT. 0 .AND. &
    1386             :                          fixed_line_search_niter .NE. 0 .AND. &
    1387        1048 :                          line_search_iteration .NE. fixed_line_search_niter)
    1388             : 
    1389             :             IF (.NOT. skip_grad) THEN
    1390             : 
    1391        2096 :                DO ispin = 1, nspins
    1392             : 
    1393             :                   CALL compute_gradient( &
    1394             :                      m_grad_out=grad(ispin), &
    1395             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    1396             :                      m_s=almo_scf_env%matrix_s(1), &
    1397             :                      m_t=matrix_t_out(ispin), &
    1398             :                      m_t0=almo_scf_env%matrix_t_blk(ispin), &
    1399             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1400             :                      m_quench_t=quench_t(ispin), &
    1401             :                      m_FTsiginv=FTsiginv(ispin), &
    1402             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1403             :                      m_ST=ST(ispin), &
    1404             :                      m_STsiginv0=STsiginv_0(ispin), &
    1405             :                      m_theta=m_theta(ispin), &
    1406             :                      m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    1407             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1408             :                      domain_r_down=domain_r_down(:, ispin), &
    1409             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1410             :                      domain_map=almo_scf_env%domain_map(ispin), &
    1411             :                      assume_t0_q0x=assume_t0_q0x, &
    1412             :                      optimize_theta=optimize_theta, &
    1413             :                      normalize_orbitals=normalize_orbitals, &
    1414             :                      penalty_occ_vol=penalty_occ_vol, &
    1415             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1416             :                      envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1417             :                      eps_filter=almo_scf_env%eps_filter, &
    1418             :                      spin_factor=spin_factor, &
    1419             :                      special_case=my_special_case, &
    1420             :                      penalty_occ_local=penalty_occ_local, &
    1421             :                      op_sm_set=op_sm_set_almo, &
    1422             :                      weights=weights, &
    1423             :                      energy_coeff=energy_coeff, &
    1424        2096 :                      localiz_coeff=localiz_coeff)
    1425             : 
    1426             :                END DO ! ispin
    1427             : 
    1428             :             END IF ! skip_grad
    1429             : 
    1430             :             ! if unprojected XALMOs are optimized then compute both
    1431             :             ! HessianInv/preconditioner and the "bad-mode" projector
    1432             : 
    1433        1048 :             IF (blissful_neglect) THEN
    1434         460 :                DO ispin = 1, nspins
    1435             :                   !compute the prec only for the first step,
    1436             :                   !but project the gradient every step
    1437         230 :                   IF (iteration .EQ. 0) THEN
    1438             :                      CALL compute_preconditioner( &
    1439             :                         domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1440             :                         bad_modes_projector_down_out=bad_modes_projector_down(:, ispin), &
    1441             :                         m_prec_out=prec_vv(ispin), &
    1442             :                         m_ks=almo_scf_env%matrix_ks(ispin), &
    1443             :                         m_s=almo_scf_env%matrix_s(1), &
    1444             :                         m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1445             :                         m_quench_t=quench_t(ispin), &
    1446             :                         m_FTsiginv=FTsiginv(ispin), &
    1447             :                         m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1448             :                         m_ST=ST(ispin), &
    1449             :                         para_env=almo_scf_env%para_env, &
    1450             :                         blacs_env=almo_scf_env%blacs_env, &
    1451             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1452             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1453             :                         domain_s_inv_half=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1454             :                         domain_s_half=almo_scf_env%domain_s_sqrt(:, ispin), &
    1455             :                         domain_r_down=domain_r_down(:, ispin), &
    1456             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1457             :                         domain_map=almo_scf_env%domain_map(ispin), &
    1458             :                         assume_t0_q0x=assume_t0_q0x, &
    1459             :                         penalty_occ_vol=penalty_occ_vol, &
    1460             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1461             :                         eps_filter=almo_scf_env%eps_filter, &
    1462             :                         neg_thr=optimizer%neglect_threshold, &
    1463             :                         spin_factor=spin_factor, &
    1464             :                         skip_inversion=.FALSE., &
    1465          18 :                         special_case=my_special_case)
    1466             :                   END IF
    1467             :                   ! remove bad modes from the gradient
    1468             :                   CALL apply_domain_operators( &
    1469             :                      matrix_in=grad(ispin), &
    1470             :                      matrix_out=grad(ispin), &
    1471             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
    1472             :                      operator2=bad_modes_projector_down(:, ispin), &
    1473             :                      dpattern=quench_t(ispin), &
    1474             :                      map=almo_scf_env%domain_map(ispin), &
    1475             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    1476             :                      my_action=1, &
    1477         460 :                      filter_eps=almo_scf_env%eps_filter)
    1478             : 
    1479             :                END DO ! ispin
    1480             : 
    1481             :             END IF ! blissful neglect
    1482             : 
    1483             :             ! check convergence and other exit criteria
    1484        2096 :             DO ispin = 1, nspins
    1485        2096 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    1486             :             END DO ! ispin
    1487        3144 :             grad_norm = MAXVAL(grad_norm_spin)
    1488             : 
    1489        1048 :             converged = (grad_norm .LE. optimizer%eps_error)
    1490        1048 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    1491          92 :                prepare_to_exit = .TRUE.
    1492             :             END IF
    1493             :             ! if early stopping is on do at least one iteration
    1494        1048 :             IF (optimizer%early_stopping_on .AND. just_started) &
    1495           0 :                prepare_to_exit = .FALSE.
    1496             : 
    1497             :             IF (grad_norm .LT. almo_scf_env%eps_prev_guess) &
    1498        1048 :                use_guess = .TRUE.
    1499             : 
    1500             :             ! it is not time to exit just yet
    1501        1048 :             IF (.NOT. prepare_to_exit) THEN
    1502             : 
    1503             :                ! check the gradient along the step direction
    1504             :                ! and decide whether to switch to the line-search mode
    1505             :                ! do not do this in the first iteration
    1506         956 :                IF (iteration .NE. 0) THEN
    1507             : 
    1508         864 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1509             : 
    1510             :                      ! enforce at least one line search
    1511             :                      ! without even checking the error
    1512         864 :                      IF (.NOT. line_search) THEN
    1513             : 
    1514         422 :                         line_search = .TRUE.
    1515         422 :                         line_search_iteration = line_search_iteration + 1
    1516             : 
    1517             :                      ELSE
    1518             : 
    1519             :                         ! check the line-search error and decide whether to
    1520             :                         ! change the direction
    1521             :                         line_search_error = 0.0_dp
    1522             :                         denom = 0.0_dp
    1523             :                         denom2 = 0.0_dp
    1524             : 
    1525         884 :                         DO ispin = 1, nspins
    1526             : 
    1527         442 :                            CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1528         442 :                            line_search_error = line_search_error + tempreal
    1529         442 :                            CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    1530         442 :                            denom = denom + tempreal
    1531         442 :                            CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    1532         884 :                            denom2 = denom2 + tempreal
    1533             : 
    1534             :                         END DO ! ispin
    1535             : 
    1536             :                         ! cosine of the angle between the step and grad
    1537             :                         ! (must be close to zero at convergence)
    1538         442 :                         line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    1539             : 
    1540         442 :                         IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    1541          40 :                            line_search = .TRUE.
    1542          40 :                            line_search_iteration = line_search_iteration + 1
    1543             :                         ELSE
    1544         402 :                            line_search = .FALSE.
    1545         402 :                            line_search_iteration = 0
    1546         402 :                            IF (grad_norm .LT. eps_skip_gradients) THEN
    1547           0 :                               fixed_line_search_niter = ABS(almo_scf_env%integer04)
    1548             :                            END IF
    1549             :                         END IF
    1550             : 
    1551             :                      END IF
    1552             : 
    1553             :                   ELSE ! decision for fixed_line_search_niter
    1554             : 
    1555           0 :                      IF (.NOT. line_search) THEN
    1556           0 :                         line_search = .TRUE.
    1557           0 :                         line_search_iteration = line_search_iteration + 1
    1558             :                      ELSE
    1559           0 :                         IF (line_search_iteration .EQ. fixed_line_search_niter) THEN
    1560           0 :                            line_search = .FALSE.
    1561             :                            line_search_iteration = 0
    1562           0 :                            line_search_iteration = line_search_iteration + 1
    1563             :                         END IF
    1564             :                      END IF
    1565             : 
    1566             :                   END IF ! fixed_line_search_niter fork
    1567             : 
    1568             :                END IF ! iteration.ne.0
    1569             : 
    1570         956 :                IF (line_search) THEN
    1571         462 :                   energy_diff = 0.0_dp
    1572             :                ELSE
    1573         494 :                   energy_diff = energy_new - energy_old
    1574         494 :                   energy_old = energy_new
    1575             :                END IF
    1576             : 
    1577             :                ! update the step direction
    1578         956 :                IF (.NOT. line_search) THEN
    1579             : 
    1580             :                   !IF (unit_nr>0) THEN
    1581             :                   !   WRITE(unit_nr,*) "....updating step direction...."
    1582             :                   !ENDIF
    1583             : 
    1584         988 :                   cg_iteration = cg_iteration + 1
    1585             : 
    1586             :                   ! save the previous step
    1587         988 :                   DO ispin = 1, nspins
    1588         988 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    1589             :                   END DO ! ispin
    1590             : 
    1591             :                   ! compute the new step (apply preconditioner if available)
    1592           0 :                   SELECT CASE (prec_type)
    1593             :                   CASE (xalmo_prec_full)
    1594             : 
    1595             :                      ! solving approximate Newton eq in the full (linearized) space
    1596             :                      CALL newton_grad_to_step( &
    1597             :                         optimizer=almo_scf_env%opt_xalmo_newton_pcg_solver, &
    1598             :                         m_grad=grad(:), &
    1599             :                         m_delta=step(:), &
    1600             :                         m_s=almo_scf_env%matrix_s(:), &
    1601             :                         m_ks=almo_scf_env%matrix_ks(:), &
    1602             :                         m_siginv=almo_scf_env%matrix_sigma_inv(:), &
    1603             :                         m_quench_t=quench_t(:), &
    1604             :                         m_FTsiginv=FTsiginv(:), &
    1605             :                         m_siginvTFTsiginv=siginvTFTsiginv(:), &
    1606             :                         m_ST=ST(:), &
    1607             :                         m_t=matrix_t_out(:), &
    1608             :                         m_sig_sqrti_ii=m_sig_sqrti_ii(:), &
    1609             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, :), &
    1610             :                         domain_r_down=domain_r_down(:, :), &
    1611             :                         domain_map=almo_scf_env%domain_map(:), &
    1612             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1613             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, :), &
    1614             :                         para_env=almo_scf_env%para_env, &
    1615             :                         blacs_env=almo_scf_env%blacs_env, &
    1616             :                         eps_filter=almo_scf_env%eps_filter, &
    1617             :                         optimize_theta=optimize_theta, &
    1618             :                         penalty_occ_vol=penalty_occ_vol, &
    1619             :                         normalize_orbitals=normalize_orbitals, &
    1620             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(:), &
    1621             :                         penalty_occ_vol_pf2=penalty_occ_vol_h_prefactor(:), &
    1622             :                         special_case=my_special_case &
    1623           0 :                         )
    1624             : 
    1625             :                   CASE (xalmo_prec_domain)
    1626             : 
    1627             :                      ! compute and invert preconditioner?
    1628         494 :                      IF (.NOT. blissful_neglect .AND. &
    1629             :                          ((just_started .AND. perturbation_only) .OR. &
    1630             :                           (iteration .EQ. 0 .AND. (.NOT. perturbation_only))) &
    1631             :                          ) THEN
    1632             : 
    1633             :                         ! computing preconditioner
    1634         148 :                         DO ispin = 1, nspins
    1635             :                            CALL compute_preconditioner( &
    1636             :                               domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1637             :                               m_prec_out=prec_vv(ispin), &
    1638             :                               m_ks=almo_scf_env%matrix_ks(ispin), &
    1639             :                               m_s=almo_scf_env%matrix_s(1), &
    1640             :                               m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1641             :                               m_quench_t=quench_t(ispin), &
    1642             :                               m_FTsiginv=FTsiginv(ispin), &
    1643             :                               m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1644             :                               m_ST=ST(ispin), &
    1645             :                               para_env=almo_scf_env%para_env, &
    1646             :                               blacs_env=almo_scf_env%blacs_env, &
    1647             :                               nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1648             :                               domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1649             :                               domain_r_down=domain_r_down(:, ispin), &
    1650             :                               cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1651             :                               domain_map=almo_scf_env%domain_map(ispin), &
    1652             :                               assume_t0_q0x=assume_t0_q0x, &
    1653             :                               penalty_occ_vol=penalty_occ_vol, &
    1654             :                               penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1655             :                               eps_filter=almo_scf_env%eps_filter, &
    1656             :                               neg_thr=0.5_dp, &
    1657             :                               spin_factor=spin_factor, &
    1658             :                               skip_inversion=.FALSE., &
    1659         568 :                               special_case=my_special_case)
    1660             :                         END DO ! ispin
    1661             :                      END IF ! compute_prec
    1662             : 
    1663             :                      !IF (unit_nr>0) THEN
    1664             :                      !   WRITE(unit_nr,*) "....applying precomputed preconditioner...."
    1665             :                      !ENDIF
    1666             : 
    1667         494 :                      IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    1668             :                          my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1669             : 
    1670         488 :                         DO ispin = 1, nspins
    1671             : 
    1672             :                            CALL dbcsr_multiply("N", "N", -1.0_dp, &
    1673             :                                                prec_vv(ispin), &
    1674             :                                                grad(ispin), &
    1675             :                                                0.0_dp, step(ispin), &
    1676         488 :                                                filter_eps=almo_scf_env%eps_filter)
    1677             : 
    1678             :                         END DO ! ispin
    1679             : 
    1680             :                      ELSE
    1681             : 
    1682             :                         !!! RZK-warning Currently for non-theta only
    1683         250 :                         IF (optimize_theta) THEN
    1684           0 :                            CPABORT("theta is NYI")
    1685             :                         END IF
    1686             : 
    1687         500 :                         DO ispin = 1, nspins
    1688             : 
    1689             :                            CALL apply_domain_operators( &
    1690             :                               matrix_in=grad(ispin), &
    1691             :                               matrix_out=step(ispin), &
    1692             :                               operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    1693             :                               dpattern=quench_t(ispin), &
    1694             :                               map=almo_scf_env%domain_map(ispin), &
    1695             :                               node_of_domain=almo_scf_env%cpu_of_domain, &
    1696             :                               my_action=0, &
    1697         250 :                               filter_eps=almo_scf_env%eps_filter)
    1698         500 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    1699             : 
    1700             :                            !CALL dbcsr_copy(m_tmp_no_3,&
    1701             :                            !        quench_t(ispin))
    1702             :                            !CALL inverse_of_elements(m_tmp_no_3)
    1703             :                            !CALL dbcsr_copy(m_tmp_no_2,step)
    1704             :                            !CALL dbcsr_hadamard_product(&
    1705             :                            !        m_tmp_no_2,&
    1706             :                            !        m_tmp_no_3,&
    1707             :                            !        step)
    1708             :                            !CALL dbcsr_copy(m_tmp_no_3,quench_t(ispin))
    1709             : 
    1710             :                         END DO ! ispin
    1711             : 
    1712             :                      END IF ! special case
    1713             : 
    1714             :                   CASE (xalmo_prec_zero)
    1715             : 
    1716             :                      ! no preconditioner
    1717         494 :                      DO ispin = 1, nspins
    1718             : 
    1719           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    1720           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    1721             : 
    1722             :                      END DO ! ispin
    1723             : 
    1724             :                   END SELECT ! preconditioner type fork
    1725             : 
    1726             :                   ! check whether we need to reset conjugate directions
    1727         494 :                   IF (iteration .EQ. 0) THEN
    1728          92 :                      reset_conjugator = .TRUE.
    1729             :                   END IF
    1730             : 
    1731             :                   ! compute the conjugation coefficient - beta
    1732         494 :                   IF (.NOT. reset_conjugator) THEN
    1733             : 
    1734             :                      CALL compute_cg_beta( &
    1735             :                         beta=beta, &
    1736             :                         reset_conjugator=reset_conjugator, &
    1737             :                         conjugator=optimizer%conjugator, &
    1738             :                         grad=grad(:), &
    1739             :                         prev_grad=prev_grad(:), &
    1740             :                         step=step(:), &
    1741             :                         prev_step=prev_step(:), &
    1742             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    1743         402 :                         )
    1744             : 
    1745             :                   END IF
    1746             : 
    1747         494 :                   IF (reset_conjugator) THEN
    1748             : 
    1749          92 :                      beta = 0.0_dp
    1750          92 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    1751           3 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    1752             :                      END IF
    1753          92 :                      reset_conjugator = .FALSE.
    1754             : 
    1755             :                   END IF
    1756             : 
    1757             :                   ! save the preconditioned gradient (useful for beta)
    1758         988 :                   DO ispin = 1, nspins
    1759             : 
    1760         494 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    1761             : 
    1762             :                      !IF (unit_nr>0) THEN
    1763             :                      !   WRITE(unit_nr,*) "....final beta....", beta
    1764             :                      !ENDIF
    1765             : 
    1766             :                      ! conjugate the step direction
    1767         988 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    1768             : 
    1769             :                   END DO ! ispin
    1770             : 
    1771             :                END IF ! update the step direction
    1772             : 
    1773             :                ! estimate the step size
    1774         956 :                IF (.NOT. line_search) THEN
    1775             :                   ! we just changed the direction and
    1776             :                   ! we have only E and grad from the current step
    1777             :                   ! it is not enouhg to compute step_size - just guess it
    1778         494 :                   e0 = energy_new
    1779         494 :                   g0 = 0.0_dp
    1780         988 :                   DO ispin = 1, nspins
    1781         494 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1782         988 :                      g0 = g0 + tempreal
    1783             :                   END DO ! ispin
    1784         494 :                   IF (iteration .EQ. 0) THEN
    1785          92 :                      step_size = optimizer%lin_search_step_size_guess
    1786             :                   ELSE
    1787         402 :                      IF (next_step_size_guess .LE. 0.0_dp) THEN
    1788           2 :                         step_size = optimizer%lin_search_step_size_guess
    1789             :                      ELSE
    1790             :                         ! take the last value
    1791         400 :                         step_size = next_step_size_guess*1.05_dp
    1792             :                      END IF
    1793             :                   END IF
    1794             :                   !IF (unit_nr > 0) THEN
    1795             :                   !   WRITE (unit_nr, '(A2,3F12.5)') &
    1796             :                   !      "EG", e0, g0, step_size
    1797             :                   !ENDIF
    1798         494 :                   next_step_size_guess = step_size
    1799             :                ELSE
    1800         462 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1801         462 :                      e1 = energy_new
    1802         462 :                      g1 = 0.0_dp
    1803         924 :                      DO ispin = 1, nspins
    1804         462 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1805         924 :                         g1 = g1 + tempreal
    1806             :                      END DO ! ispin
    1807             :                      ! we have accumulated some points along this direction
    1808             :                      ! use only the most recent g0 (quadratic approximation)
    1809         462 :                      appr_sec_der = (g1 - g0)/step_size
    1810             :                      !IF (unit_nr > 0) THEN
    1811             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1812             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1813             :                      !ENDIF
    1814         462 :                      step_size = -g1/appr_sec_der
    1815         462 :                      e0 = e1
    1816         462 :                      g0 = g1
    1817             :                   ELSE
    1818             :                      ! use e0, g0 and e1 to compute g1 and make a step
    1819             :                      ! if the next iteration is also line_search
    1820             :                      ! use e1 and the calculated g1 as e0 and g0
    1821           0 :                      e1 = energy_new
    1822           0 :                      appr_sec_der = 2.0*((e1 - e0)/step_size - g0)/step_size
    1823           0 :                      g1 = appr_sec_der*step_size + g0
    1824             :                      !IF (unit_nr > 0) THEN
    1825             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1826             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1827             :                      !ENDIF
    1828             :                      !appr_sec_der=(g1-g0)/step_size
    1829           0 :                      step_size = -g1/appr_sec_der
    1830           0 :                      e0 = e1
    1831           0 :                      g0 = g1
    1832             :                   END IF
    1833         462 :                   next_step_size_guess = next_step_size_guess + step_size
    1834             :                END IF
    1835             : 
    1836             :                ! update theta
    1837        1912 :                DO ispin = 1, nspins
    1838        1912 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    1839             :                END DO ! ispin
    1840             : 
    1841             :             END IF ! not.prepare_to_exit
    1842             : 
    1843        1048 :             IF (line_search) THEN
    1844         482 :                iter_type = "LS"
    1845             :             ELSE
    1846         566 :                iter_type = "CG"
    1847             :             END IF
    1848             : 
    1849        1048 :             t2 = m_walltime()
    1850        1048 :             IF (unit_nr > 0) THEN
    1851         524 :                iter_type = TRIM("ALMO SCF "//iter_type)
    1852             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    1853         524 :                   iter_type, iteration, &
    1854         524 :                   energy_new, energy_diff, grad_norm, &
    1855        1048 :                   t2 - t1
    1856         524 :                IF (penalty_occ_local .OR. penalty_occ_vol) THEN
    1857             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1858           0 :                      "Energy component:", (energy_new - penalty_func_new - localization_obj_function)
    1859             :                END IF
    1860         524 :                IF (penalty_occ_local) THEN
    1861             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1862           0 :                      "Localization component:", localization_obj_function
    1863             :                END IF
    1864         524 :                IF (penalty_occ_vol) THEN
    1865             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1866           0 :                      "Penalty component:", penalty_func_new
    1867             :                END IF
    1868             :             END IF
    1869             : 
    1870        1048 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    1871          46 :                IF (penalty_occ_vol) THEN
    1872           0 :                   almo_scf_env%almo_scf_energy = energy_new - penalty_func_new - localization_obj_function
    1873             :                ELSE
    1874          46 :                   almo_scf_env%almo_scf_energy = energy_new - localization_obj_function
    1875             :                END IF
    1876             :             END IF
    1877             : 
    1878        1048 :             t1 = m_walltime()
    1879             : 
    1880        1048 :             iteration = iteration + 1
    1881        1048 :             IF (prepare_to_exit) EXIT
    1882             : 
    1883             :          END DO ! inner SCF loop
    1884             : 
    1885          92 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    1886          86 :             outer_prepare_to_exit = .TRUE.
    1887             :          END IF
    1888             : 
    1889          92 :          outer_iteration = outer_iteration + 1
    1890          92 :          IF (outer_prepare_to_exit) EXIT
    1891             : 
    1892             :       END DO ! outer SCF loop
    1893             : 
    1894         172 :       DO ispin = 1, nspins
    1895          86 :          IF (converged .AND. almo_mathematica) THEN
    1896             :             CPWARN_IF(ispin .GT. 1, "Mathematica files will be overwritten")
    1897             :             CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixTf.dat")
    1898             :          END IF
    1899             :       END DO ! ispin
    1900             : 
    1901             :       ! post SCF-loop calculations
    1902          86 :       IF (converged) THEN
    1903             : 
    1904             :          CALL wrap_up_xalmo_scf( &
    1905             :             qs_env=qs_env, &
    1906             :             almo_scf_env=almo_scf_env, &
    1907             :             perturbation_in=perturbation_only, &
    1908             :             m_xalmo_in=matrix_t_out, &
    1909             :             m_quench_in=quench_t, &
    1910          86 :             energy_inout=energy_new)
    1911             : 
    1912             :       END IF ! if converged
    1913             : 
    1914         172 :       DO ispin = 1, nspins
    1915          86 :          CALL dbcsr_release(prec_vv(ispin))
    1916          86 :          CALL dbcsr_release(STsiginv_0(ispin))
    1917          86 :          CALL dbcsr_release(ST(ispin))
    1918          86 :          CALL dbcsr_release(FTsiginv(ispin))
    1919          86 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
    1920          86 :          CALL dbcsr_release(prev_grad(ispin))
    1921          86 :          CALL dbcsr_release(prev_step(ispin))
    1922          86 :          CALL dbcsr_release(grad(ispin))
    1923          86 :          CALL dbcsr_release(step(ispin))
    1924          86 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    1925          86 :          CALL dbcsr_release(m_theta(ispin))
    1926          86 :          CALL dbcsr_release(m_t_in_local(ispin))
    1927          86 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    1928          86 :          CALL release_submatrices(domain_r_down(:, ispin))
    1929          86 :          CALL release_submatrices(bad_modes_projector_down(:, ispin))
    1930          86 :          CALL dbcsr_release(tempNOcc(ispin))
    1931          86 :          CALL dbcsr_release(tempNOcc_1(ispin))
    1932         172 :          CALL dbcsr_release(tempOccOcc(ispin))
    1933             :       END DO ! ispin
    1934             : 
    1935          86 :       DEALLOCATE (tempNOcc)
    1936          86 :       DEALLOCATE (tempNOcc_1)
    1937          86 :       DEALLOCATE (tempOccOcc)
    1938          86 :       DEALLOCATE (prec_vv)
    1939          86 :       DEALLOCATE (siginvTFTsiginv)
    1940          86 :       DEALLOCATE (STsiginv_0)
    1941          86 :       DEALLOCATE (FTsiginv)
    1942          86 :       DEALLOCATE (ST)
    1943          86 :       DEALLOCATE (prev_grad)
    1944          86 :       DEALLOCATE (grad)
    1945          86 :       DEALLOCATE (prev_step)
    1946          86 :       DEALLOCATE (step)
    1947          86 :       DEALLOCATE (prev_minus_prec_grad)
    1948          86 :       DEALLOCATE (m_sig_sqrti_ii)
    1949             : 
    1950         684 :       DEALLOCATE (domain_r_down)
    1951         684 :       DEALLOCATE (bad_modes_projector_down)
    1952             : 
    1953          86 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
    1954          86 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
    1955          86 :       DEALLOCATE (grad_norm_spin)
    1956          86 :       DEALLOCATE (nocc)
    1957             : 
    1958          86 :       DEALLOCATE (m_theta, m_t_in_local)
    1959          86 :       IF (penalty_occ_local) THEN
    1960           0 :          DO idim0 = 1, dim_op
    1961           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1962           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1963           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1964             :             END DO
    1965             :          END DO
    1966           0 :          DEALLOCATE (op_sm_set_qs)
    1967           0 :          DEALLOCATE (op_sm_set_almo)
    1968           0 :          DEALLOCATE (weights)
    1969             :       END IF
    1970             : 
    1971          86 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
    1972           0 :          CPABORT("Optimization not converged! ")
    1973             :       END IF
    1974             : 
    1975          86 :       CALL timestop(handle)
    1976             : 
    1977         172 :    END SUBROUTINE almo_scf_xalmo_pcg
    1978             : 
    1979             : ! **************************************************************************************************
    1980             : !> \brief Optimization of NLMOs using PCG minimizers
    1981             : !> \param qs_env ...
    1982             : !> \param optimizer   controls the optimization algorithm
    1983             : !> \param matrix_s - AO overlap (NAOs x NAOs)
    1984             : !> \param matrix_mo_in - initial MOs (NAOs x NMOs)
    1985             : !> \param matrix_mo_out - final MOs (NAOs x NMOs)
    1986             : !> \param template_matrix_sigma - template (NMOs x NMOs)
    1987             : !> \param overlap_determinant - the determinant of the MOs overlap
    1988             : !> \param mat_distr_aos - info on the distribution of AOs
    1989             : !> \param virtuals ...
    1990             : !> \param eps_filter ...
    1991             : !> \par History
    1992             : !>       2018.10 created [Rustam Z Khaliullin]
    1993             : !> \author Rustam Z Khaliullin
    1994             : ! **************************************************************************************************
    1995           8 :    SUBROUTINE almo_scf_construct_nlmos(qs_env, optimizer, &
    1996             :                                        matrix_s, matrix_mo_in, matrix_mo_out, &
    1997             :                                        template_matrix_sigma, overlap_determinant, &
    1998             :                                        mat_distr_aos, virtuals, eps_filter)
    1999             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2000             :       TYPE(optimizer_options_type), INTENT(INOUT)        :: optimizer
    2001             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s
    2002             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2003             :          INTENT(INOUT)                                   :: matrix_mo_in, matrix_mo_out
    2004             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2005             :          INTENT(IN)                                      :: template_matrix_sigma
    2006             :       REAL(KIND=dp), INTENT(INOUT)                       :: overlap_determinant
    2007             :       INTEGER, INTENT(IN)                                :: mat_distr_aos
    2008             :       LOGICAL, INTENT(IN)                                :: virtuals
    2009             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    2010             : 
    2011             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_construct_nlmos'
    2012             : 
    2013             :       CHARACTER(LEN=30)                                  :: iter_type, print_string
    2014             :       INTEGER :: cg_iteration, dim_op, handle, iatom, idim0, isgf, ispin, iteration, &
    2015             :          line_search_iteration, linear_search_type, max_iter, natom, ncol, nspins, &
    2016             :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
    2017          16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf, nocc, nsgf
    2018             :       LOGICAL                                            :: converged, d_bfgs, just_started, l_bfgs, &
    2019             :                                                             line_search, outer_prepare_to_exit, &
    2020             :                                                             prepare_to_exit, reset_conjugator
    2021             :       REAL(KIND=dp) :: appr_sec_der, beta, bfgs_rho, bfgs_sum, denom, denom2, e0, e1, g0, g0sign, &
    2022             :          g1, g1sign, grad_norm, line_search_error, localization_obj_function, &
    2023             :          localization_obj_function_ispin, next_step_size_guess, obj_function_ispin, objf_diff, &
    2024             :          objf_new, objf_old, penalty_amplitude, penalty_func_ispin, penalty_func_new, spin_factor, &
    2025             :          step_size, t1, t2, tempreal
    2026           8 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: diagonal, grad_norm_spin, &
    2027           8 :                                                             penalty_vol_prefactor, &
    2028           8 :                                                             suggested_vol_penalty, weights
    2029             :       TYPE(cell_type), POINTER                           :: cell
    2030             :       TYPE(cp_logger_type), POINTER                      :: logger
    2031           8 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
    2032           8 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
    2033           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: approx_inv_hessian, bfgs_s, bfgs_y, grad, &
    2034           8 :          m_S0, m_sig_sqrti_ii, m_siginv, m_sigma, m_t_mo_local, m_theta, m_theta_normalized, &
    2035           8 :          prev_grad, prev_m_theta, prev_minus_prec_grad, prev_step, step, tempNOcc1, tempOccOcc1, &
    2036           8 :          tempOccOcc2, tempOccOcc3
    2037           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :, :)  :: m_B0
    2038          24 :       TYPE(lbfgs_history_type)                           :: nlmo_lbfgs_history
    2039             :       TYPE(mp_comm_type)                                 :: group
    2040           8 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2041           8 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2042             : 
    2043           8 :       CALL timeset(routineN, handle)
    2044             : 
    2045             :       ! get a useful output_unit
    2046           8 :       logger => cp_get_default_logger()
    2047           8 :       IF (logger%para_env%is_source()) THEN
    2048           4 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    2049             :       ELSE
    2050             :          unit_nr = -1
    2051             :       END IF
    2052             : 
    2053           8 :       nspins = SIZE(matrix_mo_in)
    2054             : 
    2055           8 :       IF (unit_nr > 0) THEN
    2056           4 :          WRITE (unit_nr, *)
    2057           4 :          IF (.NOT. virtuals) THEN
    2058           4 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2059           8 :                " Optimization of occupied NLMOs ", REPEAT("-", 23)
    2060             :          ELSE
    2061           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2062           0 :                " Optimization of virtual NLMOs ", REPEAT("-", 24)
    2063             :          END IF
    2064           4 :          WRITE (unit_nr, *)
    2065           4 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
    2066           8 :             "Objective Function", "Change", "Convergence", "Time"
    2067           4 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    2068             :       END IF
    2069             : 
    2070           8 :       NULLIFY (particle_set)
    2071             : 
    2072             :       CALL get_qs_env(qs_env=qs_env, &
    2073             :                       matrix_s=qs_matrix_s, &
    2074             :                       cell=cell, &
    2075             :                       particle_set=particle_set, &
    2076           8 :                       qs_kind_set=qs_kind_set)
    2077             : 
    2078           8 :       natom = SIZE(particle_set, 1)
    2079          24 :       ALLOCATE (first_sgf(natom))
    2080          16 :       ALLOCATE (last_sgf(natom))
    2081          16 :       ALLOCATE (nsgf(natom))
    2082             :       !   construction of
    2083             :       CALL get_particle_set(particle_set, qs_kind_set, &
    2084           8 :                             first_sgf=first_sgf, last_sgf=last_sgf, nsgf=nsgf)
    2085             : 
    2086             :       ! m_theta contains a set of variational parameters
    2087             :       ! that define one-electron orbitals
    2088          32 :       ALLOCATE (m_theta(nspins))
    2089          16 :       DO ispin = 1, nspins
    2090             :          CALL dbcsr_create(m_theta(ispin), &
    2091             :                            template=template_matrix_sigma(ispin), &
    2092           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2093             :          ! create initial guess for the main variable - identity matrix
    2094           8 :          CALL dbcsr_set(m_theta(ispin), 0.0_dp)
    2095          16 :          CALL dbcsr_add_on_diag(m_theta(ispin), 1.0_dp)
    2096             :       END DO
    2097             : 
    2098           8 :       SELECT CASE (optimizer%opt_penalty%operator_type)
    2099             :       CASE (op_loc_berry)
    2100             : 
    2101           0 :          IF (cell%orthorhombic) THEN
    2102           0 :             dim_op = 3
    2103             :          ELSE
    2104           0 :             dim_op = 6
    2105             :          END IF
    2106           0 :          ALLOCATE (weights(6))
    2107           0 :          weights = 0.0_dp
    2108           0 :          CALL initialize_weights(cell, weights)
    2109           0 :          ALLOCATE (op_sm_set_qs(2, dim_op))
    2110           0 :          ALLOCATE (op_sm_set_almo(2, dim_op))
    2111             :          ! allocate space for T0^t.B.T0
    2112           0 :          ALLOCATE (m_B0(2, dim_op, nspins))
    2113           0 :          DO idim0 = 1, dim_op
    2114           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    2115           0 :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix)
    2116           0 :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2117           0 :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2118             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    2119           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2120           0 :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    2121             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, matrix_s, &
    2122           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2123           0 :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    2124           0 :                DO ispin = 1, nspins
    2125             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2126             :                                     template=m_theta(ispin), &
    2127           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    2128           0 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2129             :                END DO
    2130             :             END DO
    2131             :          END DO
    2132             : 
    2133           0 :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    2134             : 
    2135             :       CASE (op_loc_pipek)
    2136             : 
    2137           8 :          dim_op = natom
    2138          24 :          ALLOCATE (weights(dim_op))
    2139          80 :          weights = 1.0_dp
    2140             : 
    2141         184 :          ALLOCATE (m_B0(1, dim_op, nspins))
    2142             :          !m_B0 first dim is 1 now!
    2143          88 :          DO idim0 = 1, dim_op
    2144         152 :             DO reim = 1, 1 !SIZE(op_sm_set_qs, 1)
    2145         216 :                DO ispin = 1, nspins
    2146             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2147             :                                     template=m_theta(ispin), &
    2148          72 :                                     matrix_type=dbcsr_type_no_symmetry)
    2149         144 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2150             :                END DO
    2151             :             END DO
    2152             :          END DO
    2153             : 
    2154             :       END SELECT
    2155             : 
    2156             :       ! penalty amplitude adjusts the strenght of volume conservation
    2157           8 :       penalty_amplitude = optimizer%opt_penalty%penalty_strength
    2158             :       !penalty_occ_vol = ( optimizer%opt_penalty%occ_vol_method .NE. penalty_type_none )
    2159             :       !penalty_local = ( optimizer%opt_penalty%occ_loc_method .NE. penalty_type_none )
    2160             : 
    2161             :       ! preconditioner control
    2162           8 :       prec_type = optimizer%preconditioner
    2163             : 
    2164             :       ! use diagonal BFGS if preconditioner is set
    2165           8 :       d_bfgs = .FALSE.
    2166           8 :       l_bfgs = .FALSE.
    2167           8 :       IF (prec_type .NE. xalmo_prec_zero) l_bfgs = .TRUE.
    2168           8 :       IF (l_bfgs .AND. (optimizer%conjugator .NE. cg_zero)) THEN
    2169           0 :          CPABORT("Cannot use conjugators with BFGS")
    2170             :       END IF
    2171           8 :       IF (l_bfgs) THEN
    2172           8 :          CALL lbfgs_create(nlmo_lbfgs_history, nspins, nstore=10)
    2173             :       END IF
    2174             : 
    2175             :       IF (nspins == 1) THEN
    2176             :          spin_factor = 2.0_dp
    2177             :       ELSE
    2178             :          spin_factor = 1.0_dp
    2179             :       END IF
    2180             : 
    2181          24 :       ALLOCATE (grad_norm_spin(nspins))
    2182          24 :       ALLOCATE (nocc(nspins))
    2183          16 :       ALLOCATE (penalty_vol_prefactor(nspins))
    2184          16 :       ALLOCATE (suggested_vol_penalty(nspins))
    2185             : 
    2186             :       ! create a local copy of matrix_mo_in because
    2187             :       ! matrix_mo_in and matrix_mo_out can be the same matrix
    2188             :       ! we need to make sure data in matrix_mo_in is intact
    2189             :       ! after we start writing to matrix_mo_out
    2190          24 :       ALLOCATE (m_t_mo_local(nspins))
    2191          16 :       DO ispin = 1, nspins
    2192             :          CALL dbcsr_create(m_t_mo_local(ispin), &
    2193             :                            template=matrix_mo_in(ispin), &
    2194           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2195          16 :          CALL dbcsr_copy(m_t_mo_local(ispin), matrix_mo_in(ispin))
    2196             :       END DO
    2197             : 
    2198          24 :       ALLOCATE (approx_inv_hessian(nspins))
    2199          24 :       ALLOCATE (m_theta_normalized(nspins))
    2200          32 :       ALLOCATE (prev_m_theta(nspins))
    2201          24 :       ALLOCATE (m_S0(nspins))
    2202          24 :       ALLOCATE (prev_grad(nspins))
    2203          24 :       ALLOCATE (grad(nspins))
    2204          24 :       ALLOCATE (prev_step(nspins))
    2205          24 :       ALLOCATE (step(nspins))
    2206          24 :       ALLOCATE (prev_minus_prec_grad(nspins))
    2207          24 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    2208          24 :       ALLOCATE (m_sigma(nspins))
    2209          24 :       ALLOCATE (m_siginv(nspins))
    2210          32 :       ALLOCATE (tempNOcc1(nspins))
    2211          24 :       ALLOCATE (tempOccOcc1(nspins))
    2212          24 :       ALLOCATE (tempOccOcc2(nspins))
    2213          24 :       ALLOCATE (tempOccOcc3(nspins))
    2214          24 :       ALLOCATE (bfgs_y(nspins))
    2215          24 :       ALLOCATE (bfgs_s(nspins))
    2216             : 
    2217          16 :       DO ispin = 1, nspins
    2218             : 
    2219             :          ! init temporary storage
    2220             :          CALL dbcsr_create(tempNOcc1(ispin), &
    2221             :                            template=matrix_mo_out(ispin), &
    2222           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2223             :          CALL dbcsr_create(approx_inv_hessian(ispin), &
    2224             :                            template=m_theta(ispin), &
    2225           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2226             :          CALL dbcsr_create(m_theta_normalized(ispin), &
    2227             :                            template=m_theta(ispin), &
    2228           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2229             :          CALL dbcsr_create(prev_m_theta(ispin), &
    2230             :                            template=m_theta(ispin), &
    2231           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2232             :          CALL dbcsr_create(m_S0(ispin), &
    2233             :                            template=m_theta(ispin), &
    2234           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2235             :          CALL dbcsr_create(prev_grad(ispin), &
    2236             :                            template=m_theta(ispin), &
    2237           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2238             :          CALL dbcsr_create(grad(ispin), &
    2239             :                            template=m_theta(ispin), &
    2240           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2241             :          CALL dbcsr_create(prev_step(ispin), &
    2242             :                            template=m_theta(ispin), &
    2243           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2244             :          CALL dbcsr_create(step(ispin), &
    2245             :                            template=m_theta(ispin), &
    2246           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2247             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    2248             :                            template=m_theta(ispin), &
    2249           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2250             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    2251             :                            template=m_theta(ispin), &
    2252           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2253             :          CALL dbcsr_create(m_sigma(ispin), &
    2254             :                            template=m_theta(ispin), &
    2255           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2256             :          CALL dbcsr_create(m_siginv(ispin), &
    2257             :                            template=m_theta(ispin), &
    2258           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2259             :          CALL dbcsr_create(tempOccOcc1(ispin), &
    2260             :                            template=m_theta(ispin), &
    2261           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2262             :          CALL dbcsr_create(tempOccOcc2(ispin), &
    2263             :                            template=m_theta(ispin), &
    2264           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2265             :          CALL dbcsr_create(tempOccOcc3(ispin), &
    2266             :                            template=m_theta(ispin), &
    2267           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2268             :          CALL dbcsr_create(bfgs_s(ispin), &
    2269             :                            template=m_theta(ispin), &
    2270           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2271             :          CALL dbcsr_create(bfgs_y(ispin), &
    2272             :                            template=m_theta(ispin), &
    2273           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2274             : 
    2275           8 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    2276           8 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    2277             : 
    2278             :          CALL dbcsr_get_info(template_matrix_sigma(ispin), &
    2279           8 :                              nfullrows_total=nocc(ispin))
    2280             : 
    2281           8 :          penalty_vol_prefactor(ispin) = -penalty_amplitude !KEEP: * spin_factor * nocc(ispin)
    2282             : 
    2283             :          ! compute m_S0=T0^t.S.T0
    2284             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2285             :                              matrix_s, &
    2286             :                              m_t_mo_local(ispin), &
    2287             :                              0.0_dp, tempNOcc1(ispin), &
    2288           8 :                              filter_eps=eps_filter)
    2289             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2290             :                              m_t_mo_local(ispin), &
    2291             :                              tempNOcc1(ispin), &
    2292             :                              0.0_dp, m_S0(ispin), &
    2293           8 :                              filter_eps=eps_filter)
    2294             : 
    2295           8 :          SELECT CASE (optimizer%opt_penalty%operator_type)
    2296             : 
    2297             :          CASE (op_loc_berry)
    2298             : 
    2299             :             ! compute m_B0=T0^t.B.T0
    2300           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2301             : 
    2302           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2303             : 
    2304             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    2305           0 :                                          op_sm_set_almo(reim, idim0)%matrix, mat_distr_aos)
    2306             : 
    2307             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2308             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    2309             :                                       m_t_mo_local(ispin), &
    2310             :                                       0.0_dp, tempNOcc1(ispin), &
    2311           0 :                                       filter_eps=eps_filter)
    2312             : 
    2313             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2314             :                                       m_t_mo_local(ispin), &
    2315             :                                       tempNOcc1(ispin), &
    2316             :                                       0.0_dp, m_B0(reim, idim0, ispin), &
    2317           0 :                                       filter_eps=eps_filter)
    2318             : 
    2319           0 :                   DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2320           0 :                   DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2321             : 
    2322             :                END DO
    2323             : 
    2324             :             END DO ! end loop over idim0
    2325             : 
    2326             :          CASE (op_loc_pipek)
    2327             : 
    2328             :             ! compute m_B0=T0^t.B.T0
    2329          80 :             DO iatom = 1, natom ! this loop is over "miller" ind
    2330             : 
    2331          72 :                isgf = first_sgf(iatom)
    2332          72 :                ncol = nsgf(iatom)
    2333             : 
    2334             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2335             :                                    matrix_s, &
    2336             :                                    m_t_mo_local(ispin), &
    2337             :                                    0.0_dp, tempNOcc1(ispin), &
    2338          72 :                                    filter_eps=eps_filter)
    2339             : 
    2340             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2341             :                                    m_t_mo_local(ispin), &
    2342             :                                    tempNOcc1(ispin), &
    2343             :                                    0.0_dp, m_B0(1, iatom, ispin), &
    2344             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2345          72 :                                    filter_eps=eps_filter)
    2346             : 
    2347             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2348             :                                    matrix_s, &
    2349             :                                    m_t_mo_local(ispin), &
    2350             :                                    0.0_dp, tempNOcc1(ispin), &
    2351             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2352          72 :                                    filter_eps=eps_filter)
    2353             : 
    2354             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2355             :                                    m_t_mo_local(ispin), &
    2356             :                                    tempNOcc1(ispin), &
    2357             :                                    1.0_dp, m_B0(1, iatom, ispin), &
    2358          80 :                                    filter_eps=eps_filter)
    2359             : 
    2360             :             END DO ! end loop over iatom
    2361             : 
    2362             :          END SELECT
    2363             : 
    2364             :       END DO ! ispin
    2365             : 
    2366           8 :       IF (optimizer%opt_penalty%operator_type .EQ. op_loc_berry) THEN
    2367           0 :          DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2368           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2369           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2370           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2371             :             END DO
    2372             :          END DO
    2373           0 :          DEALLOCATE (op_sm_set_qs, op_sm_set_almo)
    2374             :       END IF
    2375             : 
    2376             :       ! start the outer SCF loop
    2377           8 :       outer_max_iter = optimizer%max_iter_outer_loop
    2378           8 :       outer_prepare_to_exit = .FALSE.
    2379           8 :       outer_iteration = 0
    2380           8 :       grad_norm = 0.0_dp
    2381           8 :       penalty_func_new = 0.0_dp
    2382           8 :       linear_search_type = 1 ! safe restart, no quadratic assumption, takes more steps
    2383             :       localization_obj_function = 0.0_dp
    2384             :       penalty_func_new = 0.0_dp
    2385             : 
    2386             :       DO
    2387             : 
    2388             :          ! start the inner SCF loop
    2389           8 :          max_iter = optimizer%max_iter
    2390           8 :          prepare_to_exit = .FALSE.
    2391           8 :          line_search = .FALSE.
    2392           8 :          converged = .FALSE.
    2393           8 :          iteration = 0
    2394           8 :          cg_iteration = 0
    2395           8 :          line_search_iteration = 0
    2396           8 :          obj_function_ispin = 0.0_dp
    2397           8 :          objf_new = 0.0_dp
    2398           8 :          objf_old = 0.0_dp
    2399           8 :          objf_diff = 0.0_dp
    2400           8 :          line_search_error = 0.0_dp
    2401           8 :          t1 = m_walltime()
    2402           8 :          next_step_size_guess = 0.0_dp
    2403             : 
    2404             :          DO
    2405             : 
    2406          82 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    2407             : 
    2408         164 :             DO ispin = 1, nspins
    2409             : 
    2410          82 :                CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), group=group)
    2411             : 
    2412             :                ! compute diagonal (a^t.sigma0.a)^(-1/2)
    2413             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2414             :                                    m_S0(ispin), m_theta(ispin), 0.0_dp, &
    2415             :                                    tempOccOcc1(ispin), &
    2416          82 :                                    filter_eps=eps_filter)
    2417          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2418          82 :                CALL dbcsr_add_on_diag(m_sig_sqrti_ii(ispin), 1.0_dp)
    2419             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2420             :                                    m_theta(ispin), tempOccOcc1(ispin), 0.0_dp, &
    2421             :                                    m_sig_sqrti_ii(ispin), &
    2422          82 :                                    retain_sparsity=.TRUE.)
    2423         246 :                ALLOCATE (diagonal(nocc(ispin)))
    2424          82 :                CALL dbcsr_get_diag(m_sig_sqrti_ii(ispin), diagonal)
    2425          82 :                CALL group%sum(diagonal)
    2426             :                ! TODO: works for zero diagonal elements?
    2427        1368 :                diagonal(:) = 1.0_dp/SQRT(diagonal(:))
    2428          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2429          82 :                CALL dbcsr_set_diag(m_sig_sqrti_ii(ispin), diagonal)
    2430          82 :                DEALLOCATE (diagonal)
    2431             : 
    2432             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2433             :                                    m_theta(ispin), &
    2434             :                                    m_sig_sqrti_ii(ispin), &
    2435             :                                    0.0_dp, m_theta_normalized(ispin), &
    2436          82 :                                    filter_eps=eps_filter)
    2437             : 
    2438             :                ! compute new orbitals
    2439             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2440             :                                    m_t_mo_local(ispin), &
    2441             :                                    m_theta_normalized(ispin), &
    2442             :                                    0.0_dp, matrix_mo_out(ispin), &
    2443         246 :                                    filter_eps=eps_filter)
    2444             : 
    2445             :             END DO
    2446             : 
    2447             :             ! compute objective function
    2448          82 :             localization_obj_function = 0.0_dp
    2449          82 :             penalty_func_new = 0.0_dp
    2450         164 :             DO ispin = 1, nspins
    2451             : 
    2452             :                CALL compute_obj_nlmos( &
    2453             :                   !obj_function_ispin=obj_function_ispin, &
    2454             :                   localization_obj_function_ispin=localization_obj_function_ispin, &
    2455             :                   penalty_func_ispin=penalty_func_ispin, &
    2456             :                   overlap_determinant=overlap_determinant, &
    2457             :                   m_sigma=m_sigma(ispin), &
    2458             :                   nocc=nocc(ispin), &
    2459             :                   m_B0=m_B0(:, :, ispin), &
    2460             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2461             :                   template_matrix_mo=matrix_mo_out(ispin), &
    2462             :                   weights=weights, &
    2463             :                   m_S0=m_S0(ispin), &
    2464             :                   just_started=just_started, &
    2465             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2466             :                   penalty_amplitude=penalty_amplitude, &
    2467          82 :                   eps_filter=eps_filter)
    2468             : 
    2469          82 :                localization_obj_function = localization_obj_function + localization_obj_function_ispin
    2470         164 :                penalty_func_new = penalty_func_new + penalty_func_ispin
    2471             : 
    2472             :             END DO ! ispin
    2473          82 :             objf_new = penalty_func_new + localization_obj_function
    2474             : 
    2475         164 :             DO ispin = 1, nspins
    2476             :                ! save the previous gradient to compute beta
    2477             :                ! do it only if the previous grad was computed
    2478             :                ! for .NOT.line_search
    2479         164 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) THEN
    2480          30 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    2481             :                END IF
    2482             : 
    2483             :             END DO ! ispin
    2484             : 
    2485             :             ! compute the gradient
    2486         164 :             DO ispin = 1, nspins
    2487             : 
    2488             :                CALL invert_Hotelling( &
    2489             :                   matrix_inverse=m_siginv(ispin), &
    2490             :                   matrix=m_sigma(ispin), &
    2491             :                   threshold=eps_filter*10.0_dp, &
    2492             :                   filter_eps=eps_filter, &
    2493          82 :                   silent=.FALSE.)
    2494             : 
    2495             :                CALL compute_gradient_nlmos( &
    2496             :                   m_grad_out=grad(ispin), &
    2497             :                   m_B0=m_B0(:, :, ispin), &
    2498             :                   weights=weights, &
    2499             :                   m_S0=m_S0(ispin), &
    2500             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2501             :                   m_siginv=m_siginv(ispin), &
    2502             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    2503             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2504             :                   eps_filter=eps_filter, &
    2505         164 :                   suggested_vol_penalty=suggested_vol_penalty(ispin))
    2506             : 
    2507             :             END DO ! ispin
    2508             : 
    2509             :             ! check convergence and other exit criteria
    2510         164 :             DO ispin = 1, nspins
    2511         164 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    2512             :             END DO ! ispin
    2513         246 :             grad_norm = MAXVAL(grad_norm_spin)
    2514             : 
    2515          82 :             converged = (grad_norm .LE. optimizer%eps_error)
    2516          82 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    2517             :                prepare_to_exit = .TRUE.
    2518             :             END IF
    2519             : 
    2520             :             ! it is not time to exit just yet
    2521          74 :             IF (.NOT. prepare_to_exit) THEN
    2522             : 
    2523             :                ! check the gradient along the step direction
    2524             :                ! and decide whether to switch to the line-search mode
    2525             :                ! do not do this in the first iteration
    2526          74 :                IF (iteration .NE. 0) THEN
    2527             : 
    2528             :                   ! enforce at least one line search
    2529             :                   ! without even checking the error
    2530          68 :                   IF (.NOT. line_search) THEN
    2531             : 
    2532          30 :                      line_search = .TRUE.
    2533          30 :                      line_search_iteration = line_search_iteration + 1
    2534             : 
    2535             :                   ELSE
    2536             : 
    2537             :                      ! check the line-search error and decide whether to
    2538             :                      ! change the direction
    2539             :                      line_search_error = 0.0_dp
    2540             :                      denom = 0.0_dp
    2541             :                      denom2 = 0.0_dp
    2542             : 
    2543          76 :                      DO ispin = 1, nspins
    2544             : 
    2545          38 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2546          38 :                         line_search_error = line_search_error + tempreal
    2547          38 :                         CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    2548          38 :                         denom = denom + tempreal
    2549          38 :                         CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    2550          76 :                         denom2 = denom2 + tempreal
    2551             : 
    2552             :                      END DO ! ispin
    2553             : 
    2554             :                      ! cosine of the angle between the step and grad
    2555             :                      ! (must be close to zero at convergence)
    2556          38 :                      line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    2557             : 
    2558          38 :                      IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    2559          14 :                         line_search = .TRUE.
    2560          14 :                         line_search_iteration = line_search_iteration + 1
    2561             :                      ELSE
    2562             :                         line_search = .FALSE.
    2563             :                         line_search_iteration = 0
    2564             :                      END IF
    2565             : 
    2566             :                   END IF
    2567             : 
    2568             :                END IF ! iteration.ne.0
    2569             : 
    2570           6 :                IF (line_search) THEN
    2571          44 :                   objf_diff = 0.0_dp
    2572             :                ELSE
    2573          30 :                   objf_diff = objf_new - objf_old
    2574          30 :                   objf_old = objf_new
    2575             :                END IF
    2576             : 
    2577             :                ! update the step direction
    2578          74 :                IF (.NOT. line_search) THEN
    2579             : 
    2580          60 :                   cg_iteration = cg_iteration + 1
    2581             : 
    2582             :                   ! save the previous step
    2583          60 :                   DO ispin = 1, nspins
    2584          60 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    2585             :                   END DO ! ispin
    2586             : 
    2587             :                   ! compute the new step:
    2588             :                   ! if available use second derivative info - bfgs, hessian, preconditioner
    2589          30 :                   IF (prec_type .EQ. xalmo_prec_zero) THEN ! no second derivatives
    2590             : 
    2591             :                      ! no preconditioner
    2592           0 :                      DO ispin = 1, nspins
    2593             : 
    2594           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    2595           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    2596             : 
    2597             :                      END DO ! ispin
    2598             : 
    2599             :                   ELSE ! use second derivatives
    2600             : 
    2601             :                      ! compute and invert hessian/precond?
    2602          30 :                      IF (iteration .EQ. 0) THEN
    2603             : 
    2604             :                         IF (d_bfgs) THEN
    2605             : 
    2606             :                            ! create matrix filled with 1.0 here
    2607             :                            CALL fill_matrix_with_ones(approx_inv_hessian(1))
    2608             :                            IF (nspins .GT. 1) THEN
    2609             :                               DO ispin = 2, nspins
    2610             :                                  CALL dbcsr_copy(approx_inv_hessian(ispin), approx_inv_hessian(1))
    2611             :                               END DO
    2612             :                            END IF
    2613             : 
    2614           6 :                         ELSE IF (l_bfgs) THEN
    2615             : 
    2616           6 :                            CALL lbfgs_seed(nlmo_lbfgs_history, m_theta, grad)
    2617          12 :                            DO ispin = 1, nspins
    2618           6 :                               CALL dbcsr_copy(step(ispin), grad(ispin))
    2619          12 :                               CALL dbcsr_scale(step(ispin), -1.0_dp)
    2620             :                            END DO ! ispin
    2621             : 
    2622             :                         ELSE
    2623             : 
    2624             :                            ! computing preconditioner
    2625           0 :                            DO ispin = 1, nspins
    2626             : 
    2627             :                               ! TODO: write preconditioner code later
    2628             :                               ! For now, create matrix filled with 1.0 here
    2629           0 :                               CALL fill_matrix_with_ones(approx_inv_hessian(ispin))
    2630             :                               !CALL compute_preconditioner(&
    2631             :                               !       m_prec_out=approx_hessian(ispin),&
    2632             :                               !       m_ks=almo_scf_env%matrix_ks(ispin),&
    2633             :                               !       m_s=matrix_s,&
    2634             :                               !       m_siginv=almo_scf_env%template_matrix_sigma(ispin),&
    2635             :                               !       m_quench_t=quench_t(ispin),&
    2636             :                               !       m_FTsiginv=FTsiginv(ispin),&
    2637             :                               !       m_siginvTFTsiginv=siginvTFTsiginv(ispin),&
    2638             :                               !       m_ST=ST(ispin),&
    2639             :                               !       para_env=almo_scf_env%para_env,&
    2640             :                               !       blacs_env=almo_scf_env%blacs_env,&
    2641             :                               !       nocc_of_domain=almo_scf_env%nocc_of_domain(:,ispin),&
    2642             :                               !       domain_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
    2643             :                               !       domain_r_down=domain_r_down(:,ispin),&
    2644             :                               !       cpu_of_domain=almo_scf_env%cpu_of_domain,&
    2645             :                               !       domain_map=almo_scf_env%domain_map(ispin),&
    2646             :                               !       assume_t0_q0x=assume_t0_q0x,&
    2647             :                               !       penalty_occ_vol=penalty_occ_vol,&
    2648             :                               !       penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin),&
    2649             :                               !       eps_filter=eps_filter,&
    2650             :                               !       neg_thr=0.5_dp,&
    2651             :                               !       spin_factor=spin_factor,&
    2652             :                               !       special_case=my_special_case)
    2653             :                               !CALL invert hessian
    2654             :                            END DO ! ispin
    2655             : 
    2656             :                         END IF
    2657             : 
    2658             :                      ELSE ! not iteration zero
    2659             : 
    2660             :                         ! update approx inverse hessian
    2661             :                         IF (d_bfgs) THEN ! diagonal BFGS
    2662             : 
    2663             :                            DO ispin = 1, nspins
    2664             : 
    2665             :                               ! compute s and y
    2666             :                               CALL dbcsr_copy(bfgs_y(ispin), grad(ispin))
    2667             :                               CALL dbcsr_add(bfgs_y(ispin), prev_grad(ispin), 1.0_dp, -1.0_dp)
    2668             :                               CALL dbcsr_copy(bfgs_s(ispin), m_theta(ispin))
    2669             :                               CALL dbcsr_add(bfgs_s(ispin), prev_m_theta(ispin), 1.0_dp, -1.0_dp)
    2670             : 
    2671             :                               ! compute rho
    2672             :                               CALL dbcsr_dot(grad(ispin), step(ispin), bfgs_rho)
    2673             :                               bfgs_rho = 1.0_dp/bfgs_rho
    2674             : 
    2675             :                               ! compute the sum of the squared elements of bfgs_y
    2676             :                               CALL dbcsr_dot(bfgs_y(ispin), bfgs_y(ispin), bfgs_sum)
    2677             : 
    2678             :                               ! first term: start collecting new inv hessian in this temp matrix
    2679             :                               CALL dbcsr_copy(tempOccOcc2(ispin), approx_inv_hessian(ispin))
    2680             : 
    2681             :                               ! second term: + rho * s * s
    2682             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), bfgs_s(ispin), tempOccOcc1(ispin))
    2683             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc1(ispin), 1.0_dp, bfgs_rho)
    2684             : 
    2685             :                               ! third term: + rho^2 * s * s * H * sum_(y * y)
    2686             :                               CALL dbcsr_hadamard_product(tempOccOcc1(ispin), &
    2687             :                                                           approx_inv_hessian(ispin), tempOccOcc3(ispin))
    2688             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2689             :                                              1.0_dp, bfgs_rho*bfgs_rho*bfgs_sum)
    2690             : 
    2691             :                               ! fourth term: - 2 * rho * s * y * H
    2692             :                               CALL dbcsr_hadamard_product(bfgs_y(ispin), &
    2693             :                                                           approx_inv_hessian(ispin), tempOccOcc1(ispin))
    2694             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), tempOccOcc1(ispin), tempOccOcc3(ispin))
    2695             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2696             :                                              1.0_dp, -2.0_dp*bfgs_rho)
    2697             : 
    2698             :                               CALL dbcsr_copy(approx_inv_hessian(ispin), tempOccOcc2(ispin))
    2699             : 
    2700             :                            END DO
    2701             : 
    2702          24 :                         ELSE IF (l_bfgs) THEN
    2703             : 
    2704          24 :                            CALL lbfgs_get_direction(nlmo_lbfgs_history, m_theta, grad, step)
    2705             : 
    2706             :                         END IF ! which method?
    2707             : 
    2708             :                      END IF ! compute approximate inverse hessian
    2709             : 
    2710          30 :                      IF (.NOT. l_bfgs) THEN
    2711             : 
    2712           0 :                         DO ispin = 1, nspins
    2713             : 
    2714             :                            CALL dbcsr_hadamard_product(approx_inv_hessian(ispin), &
    2715           0 :                                                        grad(ispin), step(ispin))
    2716           0 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    2717             : 
    2718             :                         END DO ! ispin
    2719             : 
    2720             :                      END IF
    2721             : 
    2722             :                   END IF ! second derivative type fork
    2723             : 
    2724             :                   ! check whether we need to reset conjugate directions
    2725          30 :                   IF (iteration .EQ. 0) THEN
    2726           6 :                      reset_conjugator = .TRUE.
    2727             :                   END IF
    2728             : 
    2729             :                   ! compute the conjugation coefficient - beta
    2730          30 :                   IF (.NOT. reset_conjugator) THEN
    2731             :                      CALL compute_cg_beta( &
    2732             :                         beta=beta, &
    2733             :                         reset_conjugator=reset_conjugator, &
    2734             :                         conjugator=optimizer%conjugator, &
    2735             :                         grad=grad(:), &
    2736             :                         prev_grad=prev_grad(:), &
    2737             :                         step=step(:), &
    2738             :                         prev_step=prev_step(:), &
    2739             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    2740          24 :                         )
    2741             : 
    2742             :                   END IF
    2743             : 
    2744          30 :                   IF (reset_conjugator) THEN
    2745             : 
    2746           6 :                      beta = 0.0_dp
    2747           6 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    2748           0 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    2749             :                      END IF
    2750           6 :                      reset_conjugator = .FALSE.
    2751             : 
    2752             :                   END IF
    2753             : 
    2754             :                   ! save the preconditioned gradient (useful for beta)
    2755          60 :                   DO ispin = 1, nspins
    2756             : 
    2757          30 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    2758             : 
    2759             :                      ! conjugate the step direction
    2760          60 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    2761             : 
    2762             :                   END DO ! ispin
    2763             : 
    2764             :                END IF ! update the step direction
    2765             : 
    2766             :                ! estimate the step size
    2767          74 :                IF (.NOT. line_search) THEN
    2768             :                   ! we just changed the direction and
    2769             :                   ! we have only E and grad from the current step
    2770             :                   ! it is not enough to compute step_size - just guess it
    2771          30 :                   e0 = objf_new
    2772          30 :                   g0 = 0.0_dp
    2773          60 :                   DO ispin = 1, nspins
    2774          30 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2775          60 :                      g0 = g0 + tempreal
    2776             :                   END DO ! ispin
    2777             :                   g0sign = SIGN(1.0_dp, g0) ! sign of g0
    2778             :                   IF (linear_search_type .EQ. 1) THEN ! this is quadratic LS
    2779          30 :                      IF (iteration .EQ. 0) THEN
    2780           6 :                         step_size = optimizer%lin_search_step_size_guess
    2781             :                      ELSE
    2782          24 :                         IF (next_step_size_guess .LE. 0.0_dp) THEN
    2783           0 :                            step_size = optimizer%lin_search_step_size_guess
    2784             :                         ELSE
    2785             :                            ! take the last value
    2786          24 :                            step_size = optimizer%lin_search_step_size_guess
    2787             :                            !step_size = next_step_size_guess*1.05_dp
    2788             :                         END IF
    2789             :                      END IF
    2790             :                   ELSE IF (linear_search_type .EQ. 2) THEN ! this is cautious LS
    2791             :                      ! this LS type is designed not to trust quadratic appr
    2792             :                      ! so it always restarts from a safe step size
    2793             :                      step_size = optimizer%lin_search_step_size_guess
    2794             :                   END IF
    2795          30 :                   IF (unit_nr > 0) THEN
    2796          15 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2797          15 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", 0.0_dp, g0, step_size
    2798             :                   END IF
    2799          30 :                   next_step_size_guess = step_size
    2800             :                ELSE ! this is not the first line search
    2801          44 :                   e1 = objf_new
    2802          44 :                   g1 = 0.0_dp
    2803          88 :                   DO ispin = 1, nspins
    2804          44 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2805          88 :                      g1 = g1 + tempreal
    2806             :                   END DO ! ispin
    2807          44 :                   g1sign = SIGN(1.0_dp, g1) ! sign of g1
    2808             :                   IF (linear_search_type .EQ. 1) THEN
    2809             :                      ! we have accumulated some points along this direction
    2810             :                      ! use only the most recent g0 (quadratic approximation)
    2811          44 :                      appr_sec_der = (g1 - g0)/step_size
    2812             :                      !IF (unit_nr > 0) THEN
    2813             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    2814             :                      !      "DT", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    2815             :                      !ENDIF
    2816          44 :                      step_size = -g1/appr_sec_der
    2817             :                   ELSE IF (linear_search_type .EQ. 2) THEN
    2818             :                      ! alternative method for finding step size
    2819             :                      ! do not use quadratic approximation, only gradient signs
    2820             :                      IF (g1sign .NE. g0sign) THEN
    2821             :                         step_size = -step_size/2.0; 
    2822             :                      ELSE
    2823             :                         step_size = step_size*1.5; 
    2824             :                      END IF
    2825             :                   END IF
    2826             :                   ! end alternative LS types
    2827          44 :                   IF (unit_nr > 0) THEN
    2828          22 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2829          22 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", next_step_size_guess, g1, step_size
    2830             :                   END IF
    2831          44 :                   e0 = e1
    2832          44 :                   g0 = g1
    2833             :                   g0sign = g1sign
    2834          44 :                   next_step_size_guess = next_step_size_guess + step_size
    2835             :                END IF
    2836             : 
    2837             :                ! update theta
    2838         148 :                DO ispin = 1, nspins
    2839          74 :                   IF (.NOT. line_search) THEN ! we prepared to perform the first line search
    2840             :                      ! "previous" refers to the previous CG step, not the previous LS step
    2841          30 :                      CALL dbcsr_copy(prev_m_theta(ispin), m_theta(ispin))
    2842             :                   END IF
    2843         148 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    2844             :                END DO ! ispin
    2845             : 
    2846             :             END IF ! not.prepare_to_exit
    2847             : 
    2848          82 :             IF (line_search) THEN
    2849          50 :                iter_type = "LS"
    2850             :             ELSE
    2851          32 :                iter_type = "CG"
    2852             :             END IF
    2853             : 
    2854          82 :             t2 = m_walltime()
    2855          82 :             IF (unit_nr > 0) THEN
    2856          41 :                iter_type = TRIM("NLMO OPT "//iter_type)
    2857             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    2858          41 :                   iter_type, iteration, &
    2859          41 :                   objf_new, objf_diff, grad_norm, &
    2860          82 :                   t2 - t1
    2861             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2862          41 :                   "Localization:", localization_obj_function
    2863             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2864          41 :                   "Orthogonalization:", penalty_func_new
    2865             :             END IF
    2866          82 :             t1 = m_walltime()
    2867             : 
    2868          82 :             iteration = iteration + 1
    2869          82 :             IF (prepare_to_exit) EXIT
    2870             : 
    2871             :          END DO ! inner loop
    2872             : 
    2873           8 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    2874           8 :             outer_prepare_to_exit = .TRUE.
    2875             :          END IF
    2876             : 
    2877           8 :          outer_iteration = outer_iteration + 1
    2878           8 :          IF (outer_prepare_to_exit) EXIT
    2879             : 
    2880             :       END DO ! outer loop
    2881             : 
    2882             :       ! return the optimal determinant penalty
    2883           8 :       optimizer%opt_penalty%penalty_strength = 0.0_dp
    2884          16 :       DO ispin = 1, nspins
    2885             :          optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength + &
    2886          16 :                                                   (-1.0_dp)*penalty_vol_prefactor(ispin)
    2887             :       END DO
    2888           8 :       optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength/nspins
    2889             : 
    2890           8 :       IF (converged) THEN
    2891           8 :          iter_type = "Final"
    2892             :       ELSE
    2893           0 :          iter_type = "Unconverged"
    2894             :       END IF
    2895             : 
    2896           8 :       IF (unit_nr > 0) THEN
    2897           4 :          WRITE (unit_nr, '()')
    2898           4 :          print_string = TRIM(iter_type)//" localization:"
    2899             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2900           4 :             print_string, localization_obj_function
    2901           4 :          print_string = TRIM(iter_type)//" determinant:"
    2902             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2903           4 :             print_string, overlap_determinant
    2904           4 :          print_string = TRIM(iter_type)//" penalty strength:"
    2905             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2906           4 :             print_string, optimizer%opt_penalty%penalty_strength
    2907             :       END IF
    2908             : 
    2909             :       ! clean up
    2910           8 :       IF (l_bfgs) THEN
    2911           8 :          CALL lbfgs_release(nlmo_lbfgs_history)
    2912             :       END IF
    2913          16 :       DO ispin = 1, nspins
    2914          80 :          DO idim0 = 1, SIZE(m_B0, 2)
    2915         152 :             DO reim = 1, SIZE(m_B0, 1)
    2916         144 :                CALL dbcsr_release(m_B0(reim, idim0, ispin))
    2917             :             END DO
    2918             :          END DO
    2919           8 :          CALL dbcsr_release(m_theta(ispin))
    2920           8 :          CALL dbcsr_release(m_t_mo_local(ispin))
    2921           8 :          CALL dbcsr_release(tempNOcc1(ispin))
    2922           8 :          CALL dbcsr_release(approx_inv_hessian(ispin))
    2923           8 :          CALL dbcsr_release(prev_m_theta(ispin))
    2924           8 :          CALL dbcsr_release(m_theta_normalized(ispin))
    2925           8 :          CALL dbcsr_release(m_S0(ispin))
    2926           8 :          CALL dbcsr_release(prev_grad(ispin))
    2927           8 :          CALL dbcsr_release(grad(ispin))
    2928           8 :          CALL dbcsr_release(prev_step(ispin))
    2929           8 :          CALL dbcsr_release(step(ispin))
    2930           8 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    2931           8 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    2932           8 :          CALL dbcsr_release(m_sigma(ispin))
    2933           8 :          CALL dbcsr_release(m_siginv(ispin))
    2934           8 :          CALL dbcsr_release(tempOccOcc1(ispin))
    2935           8 :          CALL dbcsr_release(tempOccOcc2(ispin))
    2936           8 :          CALL dbcsr_release(tempOccOcc3(ispin))
    2937           8 :          CALL dbcsr_release(bfgs_y(ispin))
    2938          16 :          CALL dbcsr_release(bfgs_s(ispin))
    2939             :       END DO ! ispin
    2940             : 
    2941           8 :       DEALLOCATE (grad_norm_spin)
    2942           8 :       DEALLOCATE (nocc)
    2943           8 :       DEALLOCATE (penalty_vol_prefactor)
    2944           8 :       DEALLOCATE (suggested_vol_penalty)
    2945             : 
    2946           8 :       DEALLOCATE (approx_inv_hessian)
    2947           8 :       DEALLOCATE (prev_m_theta)
    2948           8 :       DEALLOCATE (m_theta_normalized)
    2949           8 :       DEALLOCATE (m_S0)
    2950           8 :       DEALLOCATE (prev_grad)
    2951           8 :       DEALLOCATE (grad)
    2952           8 :       DEALLOCATE (prev_step)
    2953           8 :       DEALLOCATE (step)
    2954           8 :       DEALLOCATE (prev_minus_prec_grad)
    2955           8 :       DEALLOCATE (m_sig_sqrti_ii)
    2956           8 :       DEALLOCATE (m_sigma)
    2957           8 :       DEALLOCATE (m_siginv)
    2958           8 :       DEALLOCATE (tempNOcc1)
    2959           8 :       DEALLOCATE (tempOccOcc1)
    2960           8 :       DEALLOCATE (tempOccOcc2)
    2961           8 :       DEALLOCATE (tempOccOcc3)
    2962           8 :       DEALLOCATE (bfgs_y)
    2963           8 :       DEALLOCATE (bfgs_s)
    2964             : 
    2965           8 :       DEALLOCATE (m_theta, m_t_mo_local)
    2966           8 :       DEALLOCATE (m_B0)
    2967           8 :       DEALLOCATE (weights)
    2968           8 :       DEALLOCATE (first_sgf, last_sgf, nsgf)
    2969             : 
    2970           8 :       IF (.NOT. converged) THEN
    2971           0 :          CPABORT("Optimization not converged! ")
    2972             :       END IF
    2973             : 
    2974           8 :       CALL timestop(handle)
    2975             : 
    2976          24 :    END SUBROUTINE almo_scf_construct_nlmos
    2977             : 
    2978             : ! **************************************************************************************************
    2979             : !> \brief Analysis of the orbitals
    2980             : !> \param detailed_analysis ...
    2981             : !> \param eps_filter ...
    2982             : !> \param m_T_in ...
    2983             : !> \param m_T0_in ...
    2984             : !> \param m_siginv_in ...
    2985             : !> \param m_siginv0_in ...
    2986             : !> \param m_S_in ...
    2987             : !> \param m_KS0_in ...
    2988             : !> \param m_quench_t_in ...
    2989             : !> \param energy_out ...
    2990             : !> \param m_eda_out ...
    2991             : !> \param m_cta_out ...
    2992             : !> \par History
    2993             : !>       2017.07 created [Rustam Z Khaliullin]
    2994             : !> \author Rustam Z Khaliullin
    2995             : ! **************************************************************************************************
    2996          24 :    SUBROUTINE xalmo_analysis(detailed_analysis, eps_filter, m_T_in, m_T0_in, &
    2997          24 :                              m_siginv_in, m_siginv0_in, m_S_in, m_KS0_in, m_quench_t_in, energy_out, &
    2998          24 :                              m_eda_out, m_cta_out)
    2999             : 
    3000             :       LOGICAL, INTENT(IN)                                :: detailed_analysis
    3001             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    3002             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_T_in, m_T0_in, m_siginv_in, &
    3003             :                                                             m_siginv0_in, m_S_in, m_KS0_in, &
    3004             :                                                             m_quench_t_in
    3005             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_out
    3006             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_eda_out, m_cta_out
    3007             : 
    3008             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xalmo_analysis'
    3009             : 
    3010             :       INTEGER                                            :: handle, ispin, nspins
    3011             :       REAL(KIND=dp)                                      :: energy_ispin, spin_factor
    3012             :       TYPE(dbcsr_type)                                   :: FTsiginv0, Fvo0, m_X, siginvTFTsiginv0, &
    3013             :                                                             ST0
    3014             : 
    3015          24 :       CALL timeset(routineN, handle)
    3016             : 
    3017          24 :       nspins = SIZE(m_T_in)
    3018             : 
    3019          24 :       IF (nspins == 1) THEN
    3020          24 :          spin_factor = 2.0_dp
    3021             :       ELSE
    3022           0 :          spin_factor = 1.0_dp
    3023             :       END IF
    3024             : 
    3025          24 :       energy_out = 0.0_dp
    3026          48 :       DO ispin = 1, nspins
    3027             : 
    3028             :          ! create temporary matrices
    3029             :          CALL dbcsr_create(Fvo0, &
    3030             :                            template=m_T_in(ispin), &
    3031          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3032             :          CALL dbcsr_create(FTsiginv0, &
    3033             :                            template=m_T_in(ispin), &
    3034          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3035             :          CALL dbcsr_create(ST0, &
    3036             :                            template=m_T_in(ispin), &
    3037          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3038             :          CALL dbcsr_create(m_X, &
    3039             :                            template=m_T_in(ispin), &
    3040          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3041             :          CALL dbcsr_create(siginvTFTsiginv0, &
    3042             :                            template=m_siginv0_in(ispin), &
    3043          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3044             : 
    3045             :          ! compute F_{virt,occ} for the zero-delocalization state
    3046             :          CALL compute_frequently_used_matrices( &
    3047             :             filter_eps=eps_filter, &
    3048             :             m_T_in=m_T0_in(ispin), &
    3049             :             m_siginv_in=m_siginv0_in(ispin), &
    3050             :             m_S_in=m_S_in(1), &
    3051             :             m_F_in=m_KS0_in(ispin), &
    3052             :             m_FTsiginv_out=FTsiginv0, &
    3053             :             m_siginvTFTsiginv_out=siginvTFTsiginv0, &
    3054          24 :             m_ST_out=ST0)
    3055          24 :          CALL dbcsr_copy(Fvo0, m_quench_t_in(ispin))
    3056          24 :          CALL dbcsr_copy(Fvo0, FTsiginv0, keep_sparsity=.TRUE.)
    3057             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3058             :                              ST0, &
    3059             :                              siginvTFTsiginv0, &
    3060             :                              1.0_dp, Fvo0, &
    3061          24 :                              retain_sparsity=.TRUE.)
    3062             : 
    3063             :          ! get single excitation amplitudes
    3064          24 :          CALL dbcsr_copy(m_X, m_T0_in(ispin))
    3065          24 :          CALL dbcsr_add(m_X, m_T_in(ispin), -1.0_dp, 1.0_dp)
    3066             : 
    3067          24 :          CALL dbcsr_dot(m_X, Fvo0, energy_ispin)
    3068          24 :          energy_out = energy_out + energy_ispin*spin_factor
    3069             : 
    3070          24 :          IF (detailed_analysis) THEN
    3071             : 
    3072           2 :             CALL dbcsr_hadamard_product(m_X, Fvo0, m_eda_out(ispin))
    3073           2 :             CALL dbcsr_scale(m_eda_out(ispin), spin_factor)
    3074           2 :             CALL dbcsr_filter(m_eda_out(ispin), eps_filter)
    3075             : 
    3076             :             ! first, compute [QR'R]_mu^i = [(S-SRS).X.siginv']_mu^i
    3077             :             ! a. FTsiginv0 = S.T0*siginv0
    3078             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3079             :                                 ST0, &
    3080             :                                 m_siginv0_in(ispin), &
    3081             :                                 0.0_dp, FTsiginv0, &
    3082           2 :                                 filter_eps=eps_filter)
    3083             :             ! c. tmp1(use ST0) = S.X
    3084             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3085             :                                 m_S_in(1), &
    3086             :                                 m_X, &
    3087             :                                 0.0_dp, ST0, &
    3088           2 :                                 filter_eps=eps_filter)
    3089             :             ! d. tmp2 = tr(T0).tmp1 = tr(T0).S.X
    3090             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3091             :                                 m_T0_in(ispin), &
    3092             :                                 ST0, &
    3093             :                                 0.0_dp, siginvTFTsiginv0, &
    3094           2 :                                 filter_eps=eps_filter)
    3095             :             ! e. tmp1 = tmp1 - tmp3.tmp2 = S.X - S.T0.siginv0*tr(T0).S.X
    3096             :             !         = (1-S.R0).S.X
    3097             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3098             :                                 FTsiginv0, &
    3099             :                                 siginvTFTsiginv0, &
    3100             :                                 1.0_dp, ST0, &
    3101           2 :                                 filter_eps=eps_filter)
    3102             :             ! f. tmp2(use FTsiginv0) = tmp1*siginv
    3103             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3104             :                                 ST0, &
    3105             :                                 m_siginv_in(ispin), &
    3106             :                                 0.0_dp, FTsiginv0, &
    3107           2 :                                 filter_eps=eps_filter)
    3108             :             ! second, compute traces of blocks [RR'Q]^x_y * [X]^y_x
    3109             :             CALL dbcsr_hadamard_product(m_X, &
    3110           2 :                                         FTsiginv0, m_cta_out(ispin))
    3111           2 :             CALL dbcsr_scale(m_cta_out(ispin), spin_factor)
    3112           2 :             CALL dbcsr_filter(m_cta_out(ispin), eps_filter)
    3113             : 
    3114             :          END IF ! do ALMO EDA/CTA
    3115             : 
    3116          24 :          CALL dbcsr_release(Fvo0)
    3117          24 :          CALL dbcsr_release(FTsiginv0)
    3118          24 :          CALL dbcsr_release(ST0)
    3119          24 :          CALL dbcsr_release(m_X)
    3120          48 :          CALL dbcsr_release(siginvTFTsiginv0)
    3121             : 
    3122             :       END DO ! ispin
    3123             : 
    3124          24 :       CALL timestop(handle)
    3125             : 
    3126          24 :    END SUBROUTINE xalmo_analysis
    3127             : 
    3128             : ! **************************************************************************************************
    3129             : !> \brief Compute matrices that are used often in various parts of the
    3130             : !>        optimization procedure
    3131             : !> \param filter_eps ...
    3132             : !> \param m_T_in ...
    3133             : !> \param m_siginv_in ...
    3134             : !> \param m_S_in ...
    3135             : !> \param m_F_in ...
    3136             : !> \param m_FTsiginv_out ...
    3137             : !> \param m_siginvTFTsiginv_out ...
    3138             : !> \param m_ST_out ...
    3139             : !> \par History
    3140             : !>       2016.12 created [Rustam Z Khaliullin]
    3141             : !> \author Rustam Z Khaliullin
    3142             : ! **************************************************************************************************
    3143        1498 :    SUBROUTINE compute_frequently_used_matrices(filter_eps, &
    3144             :                                                m_T_in, m_siginv_in, m_S_in, m_F_in, m_FTsiginv_out, &
    3145             :                                                m_siginvTFTsiginv_out, m_ST_out)
    3146             : 
    3147             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    3148             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_T_in, m_siginv_in, m_S_in, m_F_in
    3149             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_FTsiginv_out, m_siginvTFTsiginv_out, &
    3150             :                                                             m_ST_out
    3151             : 
    3152             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_frequently_used_matrices'
    3153             : 
    3154             :       INTEGER                                            :: handle
    3155             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    3156             : 
    3157        1498 :       CALL timeset(routineN, handle)
    3158             : 
    3159             :       CALL dbcsr_create(m_tmp_no_1, &
    3160             :                         template=m_T_in, &
    3161        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3162             :       CALL dbcsr_create(m_tmp_oo_1, &
    3163             :                         template=m_siginv_in, &
    3164        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3165             : 
    3166             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3167             :                           m_F_in, &
    3168             :                           m_T_in, &
    3169             :                           0.0_dp, m_tmp_no_1, &
    3170        1498 :                           filter_eps=filter_eps)
    3171             : 
    3172             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3173             :                           m_tmp_no_1, &
    3174             :                           m_siginv_in, &
    3175             :                           0.0_dp, m_FTsiginv_out, &
    3176        1498 :                           filter_eps=filter_eps)
    3177             : 
    3178             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3179             :                           m_T_in, &
    3180             :                           m_FTsiginv_out, &
    3181             :                           0.0_dp, m_tmp_oo_1, &
    3182        1498 :                           filter_eps=filter_eps)
    3183             : 
    3184             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3185             :                           m_siginv_in, &
    3186             :                           m_tmp_oo_1, &
    3187             :                           0.0_dp, m_siginvTFTsiginv_out, &
    3188        1498 :                           filter_eps=filter_eps)
    3189             : 
    3190             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3191             :                           m_S_in, &
    3192             :                           m_T_in, &
    3193             :                           0.0_dp, m_ST_out, &
    3194        1498 :                           filter_eps=filter_eps)
    3195             : 
    3196        1498 :       CALL dbcsr_release(m_tmp_no_1)
    3197        1498 :       CALL dbcsr_release(m_tmp_oo_1)
    3198             : 
    3199        1498 :       CALL timestop(handle)
    3200             : 
    3201        1498 :    END SUBROUTINE compute_frequently_used_matrices
    3202             : 
    3203             : ! **************************************************************************************************
    3204             : !> \brief Split the matrix of virtual orbitals into two:
    3205             : !>        retained orbs and discarded
    3206             : !> \param almo_scf_env ...
    3207             : !> \par History
    3208             : !>       2011.09 created [Rustam Z Khaliullin]
    3209             : !> \author Rustam Z Khaliullin
    3210             : ! **************************************************************************************************
    3211           0 :    SUBROUTINE split_v_blk(almo_scf_env)
    3212             : 
    3213             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3214             : 
    3215             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'split_v_blk'
    3216             : 
    3217             :       INTEGER                                            :: discarded_v, handle, iblock_col, &
    3218             :                                                             iblock_col_size, iblock_row, &
    3219             :                                                             iblock_row_size, ispin, retained_v
    3220           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_p, p_new_block
    3221             :       TYPE(dbcsr_iterator_type)                          :: iter
    3222             : 
    3223           0 :       CALL timeset(routineN, handle)
    3224             : 
    3225           0 :       DO ispin = 1, almo_scf_env%nspins
    3226             : 
    3227             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin), &
    3228           0 :                                 work_mutable=.TRUE.)
    3229             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_disc_blk(ispin), &
    3230           0 :                                 work_mutable=.TRUE.)
    3231             : 
    3232           0 :          CALL dbcsr_iterator_start(iter, almo_scf_env%matrix_v_full_blk(ispin))
    3233             : 
    3234           0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    3235             : 
    3236             :             CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, data_p, &
    3237           0 :                                            row_size=iblock_row_size, col_size=iblock_col_size)
    3238             : 
    3239           0 :             IF (iblock_row .NE. iblock_col) THEN
    3240           0 :                CPABORT("off-diagonal block found")
    3241             :             END IF
    3242             : 
    3243           0 :             retained_v = almo_scf_env%nvirt_of_domain(iblock_col, ispin)
    3244           0 :             discarded_v = almo_scf_env%nvirt_disc_of_domain(iblock_col, ispin)
    3245           0 :             CPASSERT(retained_v .GT. 0)
    3246           0 :             CPASSERT(discarded_v .GT. 0)
    3247             : 
    3248           0 :             NULLIFY (p_new_block)
    3249             :             CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_disc_blk(ispin), &
    3250           0 :                                        iblock_row, iblock_col, p_new_block)
    3251           0 :             CPASSERT(ASSOCIATED(p_new_block))
    3252           0 :             CPASSERT(retained_v + discarded_v .EQ. iblock_col_size)
    3253           0 :             p_new_block(:, :) = data_p(:, (retained_v + 1):iblock_col_size)
    3254             : 
    3255           0 :             NULLIFY (p_new_block)
    3256             :             CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin), &
    3257           0 :                                        iblock_row, iblock_col, p_new_block)
    3258           0 :             CPASSERT(ASSOCIATED(p_new_block))
    3259           0 :             p_new_block(:, :) = data_p(:, 1:retained_v)
    3260             : 
    3261             :          END DO ! iterator
    3262           0 :          CALL dbcsr_iterator_stop(iter)
    3263             : 
    3264           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    3265           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_disc_blk(ispin))
    3266             : 
    3267             :       END DO ! ispin
    3268             : 
    3269           0 :       CALL timestop(handle)
    3270             : 
    3271           0 :    END SUBROUTINE split_v_blk
    3272             : 
    3273             : ! **************************************************************************************************
    3274             : !> \brief various methods for calculating the Harris-Foulkes correction
    3275             : !> \param almo_scf_env ...
    3276             : !> \par History
    3277             : !>       2011.06 created [Rustam Z Khaliullin]
    3278             : !> \author Rustam Z Khaliullin
    3279             : ! **************************************************************************************************
    3280           0 :    SUBROUTINE harris_foulkes_correction(almo_scf_env)
    3281             : 
    3282             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3283             : 
    3284             :       CHARACTER(len=*), PARAMETER :: routineN = 'harris_foulkes_correction'
    3285             :       INTEGER, PARAMETER                                 :: cayley_transform = 1, dm_ls_step = 2
    3286             : 
    3287             :       INTEGER :: algorithm_id, handle, handle1, handle2, handle3, handle4, handle5, handle6, &
    3288             :          handle7, handle8, ispin, iteration, n, nmins, nspin, opt_k_max_iter, &
    3289             :          outer_opt_k_iteration, outer_opt_k_max_iter, unit_nr
    3290             :       INTEGER, DIMENSION(1)                              :: fake, nelectron_spin_real
    3291             :       LOGICAL :: converged, line_search, md_in_k_space, outer_opt_k_prepare_to_exit, &
    3292             :          prepare_to_exit, reset_conjugator, reset_step_size, use_cubic_approximation, &
    3293             :          use_quadratic_approximation
    3294             :       REAL(KIND=dp) :: aa, bb, beta, conjugacy_error, conjugacy_error_threshold, &
    3295             :          delta_obj_function, denom, energy_correction_final, frob_matrix, frob_matrix_base, fun0, &
    3296             :          fun1, gfun0, gfun1, grad_norm, grad_norm_frob, kappa, kin_energy, line_search_error, &
    3297             :          line_search_error_threshold, num_threshold, numer, obj_function, quadratic_approx_error, &
    3298             :          quadratic_approx_error_threshold, safety_multiplier, spin_factor, step_size, &
    3299             :          step_size_quadratic_approx, step_size_quadratic_approx2, t1, t1a, t1cholesky, t2, t2a, &
    3300             :          t2cholesky, tau, time_step, x_opt_eps_adaptive, x_opt_eps_adaptive_factor
    3301             :       REAL(KIND=dp), DIMENSION(1)                        :: local_mu
    3302             :       REAL(KIND=dp), DIMENSION(2)                        :: energy_correction
    3303             :       REAL(KIND=dp), DIMENSION(3)                        :: minima
    3304             :       TYPE(cp_logger_type), POINTER                      :: logger
    3305             :       TYPE(ct_step_env_type)                             :: ct_step_env
    3306             :       TYPE(dbcsr_type) :: grad, k_vd_index_down, k_vr_index_down, matrix_k_central, matrix_tmp1, &
    3307             :          matrix_tmp2, prec, prev_grad, prev_minus_prec_grad, prev_step, sigma_oo_curr, &
    3308             :          sigma_oo_curr_inv, sigma_vv_sqrt, sigma_vv_sqrt_guess, sigma_vv_sqrt_inv, &
    3309             :          sigma_vv_sqrt_inv_guess, step, t_curr, tmp1_n_vr, tmp2_n_o, tmp3_vd_vr, tmp4_o_vr, &
    3310             :          tmp_k_blk, vd_fixed, vd_index_sqrt, vd_index_sqrt_inv, velocity, vr_fixed, vr_index_sqrt, &
    3311             :          vr_index_sqrt_inv
    3312           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_p_almo_scf_converged
    3313             : 
    3314           0 :       CALL timeset(routineN, handle)
    3315             : 
    3316             :       ! get a useful output_unit
    3317           0 :       logger => cp_get_default_logger()
    3318           0 :       IF (logger%para_env%is_source()) THEN
    3319           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    3320             :       ELSE
    3321           0 :          unit_nr = -1
    3322             :       END IF
    3323             : 
    3324           0 :       nspin = almo_scf_env%nspins
    3325           0 :       energy_correction_final = 0.0_dp
    3326           0 :       IF (nspin .EQ. 1) THEN
    3327           0 :          spin_factor = 2.0_dp
    3328             :       ELSE
    3329           0 :          spin_factor = 1.0_dp
    3330             :       END IF
    3331             : 
    3332           0 :       IF (almo_scf_env%deloc_use_occ_orbs) THEN
    3333             :          algorithm_id = cayley_transform
    3334             :       ELSE
    3335           0 :          algorithm_id = dm_ls_step
    3336             :       END IF
    3337             : 
    3338           0 :       t1 = m_walltime()
    3339             : 
    3340           0 :       SELECT CASE (algorithm_id)
    3341             :       CASE (cayley_transform)
    3342             : 
    3343             :          ! rescale density matrix by spin factor
    3344             :          ! so the orbitals and density are consistent with each other
    3345           0 :          IF (almo_scf_env%nspins == 1) THEN
    3346           0 :             CALL dbcsr_scale(almo_scf_env%matrix_p(1), 1.0_dp/spin_factor)
    3347             :          END IF
    3348             : 
    3349             :          ! transform matrix_t not matrix_t_blk (we might need ALMOs later)
    3350           0 :          DO ispin = 1, nspin
    3351             : 
    3352             :             CALL dbcsr_copy(almo_scf_env%matrix_t(ispin), &
    3353           0 :                             almo_scf_env%matrix_t_blk(ispin))
    3354             : 
    3355             :             ! obtain orthogonalization matrices for ALMOs
    3356             :             ! RZK-warning - remove this sqrt(sigma) and inv(sqrt(sigma))
    3357             :             ! ideally ALMO scf should use sigma and sigma_inv in
    3358             :             ! the tensor_up_down representation
    3359             : 
    3360           0 :             IF (unit_nr > 0) THEN
    3361           0 :                WRITE (unit_nr, *) "sqrt and inv(sqrt) of MO overlap matrix"
    3362             :             END IF
    3363             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3364             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3365           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3366             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3367             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3368           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3369             : 
    3370             :             CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3371             :                                            almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3372             :                                            almo_scf_env%matrix_sigma(ispin), &
    3373             :                                            threshold=almo_scf_env%eps_filter, &
    3374             :                                            order=almo_scf_env%order_lanczos, &
    3375             :                                            eps_lanczos=almo_scf_env%eps_lanczos, &
    3376           0 :                                            max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3377             : 
    3378           0 :             IF (safe_mode) THEN
    3379             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma(ispin), &
    3380             :                                  matrix_type=dbcsr_type_no_symmetry)
    3381             :                CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma(ispin), &
    3382             :                                  matrix_type=dbcsr_type_no_symmetry)
    3383             : 
    3384             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3385             :                                    almo_scf_env%matrix_sigma(ispin), &
    3386             :                                    0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3387             :                CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3388             :                                    almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3389             :                                    0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3390             : 
    3391             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3392             :                CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3393             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3394             :                IF (unit_nr > 0) THEN
    3395             :                   WRITE (unit_nr, *) "Error for (inv(sqrt(SIG))*SIG*inv(sqrt(SIG))-I)", frob_matrix/frob_matrix_base
    3396             :                END IF
    3397             : 
    3398             :                CALL dbcsr_release(matrix_tmp1)
    3399             :                CALL dbcsr_release(matrix_tmp2)
    3400             :             END IF
    3401             :          END DO
    3402             : 
    3403           0 :          IF (almo_scf_env%almo_update_algorithm .EQ. almo_scf_diag) THEN
    3404             : 
    3405           0 :             DO ispin = 1, nspin
    3406             : 
    3407           0 :                t1a = m_walltime()
    3408             : 
    3409           0 :                line_search_error_threshold = almo_scf_env%real01
    3410           0 :                conjugacy_error_threshold = almo_scf_env%real02
    3411           0 :                quadratic_approx_error_threshold = almo_scf_env%real03
    3412           0 :                x_opt_eps_adaptive_factor = almo_scf_env%real04
    3413             : 
    3414             :                !! the outer loop for k optimization
    3415           0 :                outer_opt_k_max_iter = almo_scf_env%opt_k_outer_max_iter
    3416           0 :                outer_opt_k_prepare_to_exit = .FALSE.
    3417           0 :                outer_opt_k_iteration = 0
    3418           0 :                grad_norm = 0.0_dp
    3419           0 :                grad_norm_frob = 0.0_dp
    3420           0 :                CALL dbcsr_set(almo_scf_env%matrix_x(ispin), 0.0_dp)
    3421           0 :                IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) outer_opt_k_max_iter = 0
    3422             : 
    3423           0 :                DO
    3424             : 
    3425             :                   ! obtain proper retained virtuals (1-R)|ALMO_vr>
    3426             :                   CALL apply_projector(psi_in=almo_scf_env%matrix_v_blk(ispin), &
    3427             :                                        psi_out=almo_scf_env%matrix_v(ispin), &
    3428             :                                        psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3429             :                                        metric=almo_scf_env%matrix_s(1), &
    3430             :                                        project_out=.TRUE., &
    3431             :                                        psi_projector_orthogonal=.FALSE., &
    3432             :                                        proj_in_template=almo_scf_env%matrix_ov(ispin), &
    3433             :                                        eps_filter=almo_scf_env%eps_filter, &
    3434           0 :                                        sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3435             :                   !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3436             : 
    3437             :                   ! save initial retained virtuals
    3438             :                   CALL dbcsr_create(vr_fixed, &
    3439           0 :                                     template=almo_scf_env%matrix_v(ispin))
    3440           0 :                   CALL dbcsr_copy(vr_fixed, almo_scf_env%matrix_v(ispin))
    3441             : 
    3442             :                   ! init matrices common for optimized and non-optimized virts
    3443             :                   CALL dbcsr_create(sigma_vv_sqrt, &
    3444             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3445           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3446             :                   CALL dbcsr_create(sigma_vv_sqrt_inv, &
    3447             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3448           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3449             :                   CALL dbcsr_create(sigma_vv_sqrt_inv_guess, &
    3450             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3451           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3452             :                   CALL dbcsr_create(sigma_vv_sqrt_guess, &
    3453             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3454           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3455           0 :                   CALL dbcsr_set(sigma_vv_sqrt_guess, 0.0_dp)
    3456           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_guess, 1.0_dp)
    3457           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_guess, almo_scf_env%eps_filter)
    3458           0 :                   CALL dbcsr_set(sigma_vv_sqrt_inv_guess, 0.0_dp)
    3459           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_inv_guess, 1.0_dp)
    3460           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_inv_guess, almo_scf_env%eps_filter)
    3461             : 
    3462             :                   ! do things required to optimize virtuals
    3463           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3464             : 
    3465             :                      ! project retained virtuals out of discarded block-by-block
    3466             :                      ! (1-Q^VR_ALMO)|ALMO_vd>
    3467             :                      ! this is probably not necessary, do it just to be safe
    3468             :                      !CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin),&
    3469             :                      !        psi_out=almo_scf_env%matrix_v_disc(ispin),&
    3470             :                      !        psi_projector=almo_scf_env%matrix_v_blk(ispin),&
    3471             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3472             :                      !        project_out=.TRUE.,&
    3473             :                      !        psi_projector_orthogonal=.FALSE.,&
    3474             :                      !        proj_in_template=almo_scf_env%matrix_k_tr(ispin),&
    3475             :                      !        eps_filter=almo_scf_env%eps_filter,&
    3476             :                      !        sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin))
    3477             :                      !CALL dbcsr_copy(almo_scf_env%matrix_v_disc_blk(ispin),&
    3478             :                      !        almo_scf_env%matrix_v_disc(ispin))
    3479             : 
    3480             :                      ! construct discarded virtuals (1-R)|ALMO_vd>
    3481             :                      CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3482             :                                           psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3483             :                                           psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3484             :                                           metric=almo_scf_env%matrix_s(1), &
    3485             :                                           project_out=.TRUE., &
    3486             :                                           psi_projector_orthogonal=.FALSE., &
    3487             :                                           proj_in_template=almo_scf_env%matrix_ov_disc(ispin), &
    3488             :                                           eps_filter=almo_scf_env%eps_filter, &
    3489           0 :                                           sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3490             :                      !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3491             : 
    3492             :                      ! save initial discarded
    3493             :                      CALL dbcsr_create(vd_fixed, &
    3494           0 :                                        template=almo_scf_env%matrix_v_disc(ispin))
    3495           0 :                      CALL dbcsr_copy(vd_fixed, almo_scf_env%matrix_v_disc(ispin))
    3496             : 
    3497             :                      !! create the down metric in the retained k-subspace
    3498             :                      CALL dbcsr_create(k_vr_index_down, &
    3499             :                                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    3500           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3501             :                      !CALL dbcsr_copy(k_vr_index_down,&
    3502             :                      !        almo_scf_env%matrix_sigma_vv_blk(ispin))
    3503             : 
    3504             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_blk(ispin),&
    3505             :                      !        ket=almo_scf_env%matrix_v_blk(ispin),&
    3506             :                      !        overlap=k_vr_index_down,&
    3507             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3508             :                      !        retain_overlap_sparsity=.FALSE.,&
    3509             :                      !        eps_filter=almo_scf_env%eps_filter)
    3510             : 
    3511             :                      !! create the up metric in the discarded k-subspace
    3512             :                      CALL dbcsr_create(k_vd_index_down, &
    3513             :                                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    3514           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3515             :                      !CALL dbcsr_init(k_vd_index_up)
    3516             :                      !CALL dbcsr_create(k_vd_index_up,&
    3517             :                      !        template=almo_scf_env%matrix_vv_disc_blk(ispin),&
    3518             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3519             :                      !CALL dbcsr_copy(k_vd_index_down,&
    3520             :                      !        almo_scf_env%matrix_vv_disc_blk(ispin))
    3521             : 
    3522             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_disc_blk(ispin),&
    3523             :                      !        ket=almo_scf_env%matrix_v_disc_blk(ispin),&
    3524             :                      !        overlap=k_vd_index_down,&
    3525             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3526             :                      !        retain_overlap_sparsity=.FALSE.,&
    3527             :                      !        eps_filter=almo_scf_env%eps_filter)
    3528             : 
    3529             :                      !IF (unit_nr>0) THEN
    3530             :                      !   WRITE(unit_nr,*) "Inverting blocked overlap matrix of discarded virtuals"
    3531             :                      !ENDIF
    3532             :                      !CALL invert_Hotelling(k_vd_index_up,&
    3533             :                      !        k_vd_index_down,&
    3534             :                      !        almo_scf_env%eps_filter)
    3535             :                      !IF (safe_mode) THEN
    3536             :                      !   CALL dbcsr_init(matrix_tmp1)
    3537             :                      !   CALL dbcsr_create(matrix_tmp1,template=k_vd_index_down,&
    3538             :                      !                        matrix_type=dbcsr_type_no_symmetry)
    3539             :                      !   CALL dbcsr_multiply("N","N",1.0_dp,k_vd_index_up,&
    3540             :                      !                          k_vd_index_down,&
    3541             :                      !                          0.0_dp, matrix_tmp1,&
    3542             :                      !                          filter_eps=almo_scf_env%eps_filter)
    3543             :                      !   frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp1)
    3544             :                      !   CALL dbcsr_add_on_diag(matrix_tmp1,-1.0_dp)
    3545             :                      !   frob_matrix=dbcsr_frobenius_norm(matrix_tmp1)
    3546             :                      !   IF (unit_nr>0) THEN
    3547             :                      !      WRITE(unit_nr,*) "Error for (inv(SIG)*SIG-I)",&
    3548             :                      !            frob_matrix/frob_matrix_base
    3549             :                      !   ENDIF
    3550             :                      !   CALL dbcsr_release(matrix_tmp1)
    3551             :                      !ENDIF
    3552             : 
    3553             :                      ! init matrices necessary for optimization of truncated virts
    3554             :                      ! init blocked gradient before setting K to zero
    3555             :                      ! otherwise the block structure might be lost
    3556             :                      CALL dbcsr_create(grad, &
    3557           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3558           0 :                      CALL dbcsr_copy(grad, almo_scf_env%matrix_k_blk(ispin))
    3559             : 
    3560             :                      ! init MD in the k-space
    3561           0 :                      md_in_k_space = almo_scf_env%logical01
    3562           0 :                      IF (md_in_k_space) THEN
    3563             :                         CALL dbcsr_create(velocity, &
    3564           0 :                                           template=almo_scf_env%matrix_k_blk(ispin))
    3565           0 :                         CALL dbcsr_copy(velocity, almo_scf_env%matrix_k_blk(ispin))
    3566           0 :                         CALL dbcsr_set(velocity, 0.0_dp)
    3567           0 :                         time_step = almo_scf_env%opt_k_trial_step_size
    3568             :                      END IF
    3569             : 
    3570             :                      CALL dbcsr_create(prev_step, &
    3571           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3572             : 
    3573             :                      CALL dbcsr_create(prev_minus_prec_grad, &
    3574           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3575             : 
    3576             :                      ! initialize diagonal blocks of the preconditioner to 1.0_dp
    3577             :                      CALL dbcsr_create(prec, &
    3578           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3579           0 :                      CALL dbcsr_copy(prec, almo_scf_env%matrix_k_blk(ispin))
    3580           0 :                      CALL dbcsr_set(prec, 1.0_dp)
    3581             : 
    3582             :                      ! generate initial K (extrapolate if previous values are available)
    3583           0 :                      CALL dbcsr_set(almo_scf_env%matrix_k_blk(ispin), 0.0_dp)
    3584             :                      ! matrix_k_central stores current k because matrix_k_blk is updated
    3585             :                      ! during linear search
    3586             :                      CALL dbcsr_create(matrix_k_central, &
    3587           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3588             :                      CALL dbcsr_copy(matrix_k_central, &
    3589           0 :                                      almo_scf_env%matrix_k_blk(ispin))
    3590             :                      CALL dbcsr_create(tmp_k_blk, &
    3591           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3592             :                      CALL dbcsr_create(step, &
    3593           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3594           0 :                      CALL dbcsr_set(step, 0.0_dp)
    3595             :                      CALL dbcsr_create(t_curr, &
    3596           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3597             :                      CALL dbcsr_create(sigma_oo_curr, &
    3598             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3599           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3600             :                      CALL dbcsr_create(sigma_oo_curr_inv, &
    3601             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3602           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3603             :                      CALL dbcsr_create(tmp1_n_vr, &
    3604           0 :                                        template=almo_scf_env%matrix_v(ispin))
    3605             :                      CALL dbcsr_create(tmp3_vd_vr, &
    3606           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3607             :                      CALL dbcsr_create(tmp2_n_o, &
    3608           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3609             :                      CALL dbcsr_create(tmp4_o_vr, &
    3610           0 :                                        template=almo_scf_env%matrix_ov(ispin))
    3611             :                      CALL dbcsr_create(prev_grad, &
    3612           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3613           0 :                      CALL dbcsr_set(prev_grad, 0.0_dp)
    3614             : 
    3615             :                      !CALL dbcsr_init(sigma_oo_guess)
    3616             :                      !CALL dbcsr_create(sigma_oo_guess,&
    3617             :                      !        template=almo_scf_env%matrix_sigma(ispin),&
    3618             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3619             :                      !CALL dbcsr_set(sigma_oo_guess,0.0_dp)
    3620             :                      !CALL dbcsr_add_on_diag(sigma_oo_guess,1.0_dp)
    3621             :                      !CALL dbcsr_filter(sigma_oo_guess,almo_scf_env%eps_filter)
    3622             :                      !CALL dbcsr_print(sigma_oo_guess)
    3623             : 
    3624             :                   END IF ! done constructing discarded virtuals
    3625             : 
    3626             :                   ! init variables
    3627           0 :                   opt_k_max_iter = almo_scf_env%opt_k_max_iter
    3628           0 :                   iteration = 0
    3629           0 :                   converged = .FALSE.
    3630           0 :                   prepare_to_exit = .FALSE.
    3631           0 :                   beta = 0.0_dp
    3632           0 :                   line_search = .FALSE.
    3633           0 :                   obj_function = 0.0_dp
    3634           0 :                   conjugacy_error = 0.0_dp
    3635           0 :                   line_search_error = 0.0_dp
    3636           0 :                   fun0 = 0.0_dp
    3637           0 :                   fun1 = 0.0_dp
    3638           0 :                   gfun0 = 0.0_dp
    3639           0 :                   gfun1 = 0.0_dp
    3640           0 :                   step_size_quadratic_approx = 0.0_dp
    3641           0 :                   reset_step_size = .TRUE.
    3642           0 :                   IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) opt_k_max_iter = 0
    3643             : 
    3644             :                   ! start cg iterations to optimize matrix_k_blk
    3645           0 :                   DO
    3646             : 
    3647           0 :                      CALL timeset('k_opt_vr', handle1)
    3648             : 
    3649           0 :                      IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3650             : 
    3651             :                         ! construct k-excited virtuals
    3652             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_fixed, &
    3653             :                                             almo_scf_env%matrix_k_blk(ispin), &
    3654             :                                             0.0_dp, almo_scf_env%matrix_v(ispin), &
    3655           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3656             :                         CALL dbcsr_add(almo_scf_env%matrix_v(ispin), vr_fixed, &
    3657           0 :                                        +1.0_dp, +1.0_dp)
    3658             :                      END IF
    3659             : 
    3660             :                      ! decompose the overlap matrix of the current retained orbitals
    3661             :                      !IF (unit_nr>0) THEN
    3662             :                      !   WRITE(unit_nr,*) "decompose the active VV overlap matrix"
    3663             :                      !ENDIF
    3664             :                      CALL get_overlap(bra=almo_scf_env%matrix_v(ispin), &
    3665             :                                       ket=almo_scf_env%matrix_v(ispin), &
    3666             :                                       overlap=almo_scf_env%matrix_sigma_vv(ispin), &
    3667             :                                       metric=almo_scf_env%matrix_s(1), &
    3668             :                                       retain_overlap_sparsity=.FALSE., &
    3669           0 :                                       eps_filter=almo_scf_env%eps_filter)
    3670             :                      ! use either cholesky or sqrt
    3671             :                      !! RZK-warning: strangely, cholesky does not work with k-optimization
    3672           0 :                      IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) THEN
    3673           0 :                         CALL timeset('cholesky', handle2)
    3674           0 :                         t1cholesky = m_walltime()
    3675             : 
    3676             :                         ! re-create sigma_vv_sqrt because desymmetrize is buggy -
    3677             :                         ! it will create multiple copies of blocks
    3678             :                         CALL dbcsr_create(sigma_vv_sqrt, &
    3679             :                                           template=almo_scf_env%matrix_sigma_vv(ispin), &
    3680           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3681             :                         CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3682           0 :                                                 sigma_vv_sqrt)
    3683             :                         CALL cp_dbcsr_cholesky_decompose(sigma_vv_sqrt, &
    3684             :                                                          para_env=almo_scf_env%para_env, &
    3685           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    3686           0 :                         CALL make_triu(sigma_vv_sqrt)
    3687           0 :                         CALL dbcsr_filter(sigma_vv_sqrt, almo_scf_env%eps_filter)
    3688             :                         ! apply SOLVE to compute U^(-1) : U*U^(-1)=I
    3689           0 :                         CALL dbcsr_get_info(sigma_vv_sqrt, nfullrows_total=n)
    3690             :                         CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3691           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3692           0 :                         CALL dbcsr_set(matrix_tmp1, 0.0_dp)
    3693           0 :                         CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3694             :                         CALL cp_dbcsr_cholesky_restore(matrix_tmp1, n, sigma_vv_sqrt, &
    3695             :                                                        sigma_vv_sqrt_inv, op="SOLVE", pos="RIGHT", &
    3696             :                                                        para_env=almo_scf_env%para_env, &
    3697           0 :                                                        blacs_env=almo_scf_env%blacs_env)
    3698           0 :                         CALL dbcsr_filter(sigma_vv_sqrt_inv, almo_scf_env%eps_filter)
    3699           0 :                         CALL dbcsr_release(matrix_tmp1)
    3700             :                         IF (safe_mode) THEN
    3701             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3702             :                                              matrix_type=dbcsr_type_no_symmetry)
    3703             :                            CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3704             :                                                    matrix_tmp1)
    3705             :                            CALL dbcsr_multiply("T", "N", 1.0_dp, sigma_vv_sqrt, &
    3706             :                                                sigma_vv_sqrt, &
    3707             :                                                -1.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3708             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3709             :                            CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3710             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3711             :                            IF (unit_nr > 0) THEN
    3712             :                               WRITE (unit_nr, *) "Error for ( U^T * U - Sig )", &
    3713             :                                  frob_matrix/frob_matrix_base
    3714             :                            END IF
    3715             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3716             :                                                sigma_vv_sqrt, &
    3717             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3718             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3719             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3720             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3721             :                            IF (unit_nr > 0) THEN
    3722             :                               WRITE (unit_nr, *) "Error for ( inv(U) * U - I )", &
    3723             :                                  frob_matrix/frob_matrix_base
    3724             :                            END IF
    3725             :                            CALL dbcsr_release(matrix_tmp1)
    3726             :                         END IF ! safe_mode
    3727           0 :                         t2cholesky = m_walltime()
    3728           0 :                         IF (unit_nr > 0) THEN
    3729           0 :                            WRITE (unit_nr, *) "Cholesky+inverse wall-time: ", t2cholesky - t1cholesky
    3730             :                         END IF
    3731           0 :                         CALL timestop(handle2)
    3732             :                      ELSE
    3733             :                         CALL matrix_sqrt_Newton_Schulz(sigma_vv_sqrt, &
    3734             :                                                        sigma_vv_sqrt_inv, &
    3735             :                                                        almo_scf_env%matrix_sigma_vv(ispin), &
    3736             :                                                        !matrix_sqrt_inv_guess=sigma_vv_sqrt_inv_guess,&
    3737             :                                                        !matrix_sqrt_guess=sigma_vv_sqrt_guess,&
    3738             :                                                        threshold=almo_scf_env%eps_filter, &
    3739             :                                                        order=almo_scf_env%order_lanczos, &
    3740             :                                                        eps_lanczos=almo_scf_env%eps_lanczos, &
    3741           0 :                                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3742           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_inv_guess, sigma_vv_sqrt_inv)
    3743           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_guess, sigma_vv_sqrt)
    3744             :                         IF (safe_mode) THEN
    3745             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3746             :                                              matrix_type=dbcsr_type_no_symmetry)
    3747             :                            CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3748             :                                              matrix_type=dbcsr_type_no_symmetry)
    3749             : 
    3750             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3751             :                                                almo_scf_env%matrix_sigma_vv(ispin), &
    3752             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3753             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3754             :                                                sigma_vv_sqrt_inv, &
    3755             :                                                0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3756             : 
    3757             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3758             :                            CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3759             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3760             :                            IF (unit_nr > 0) THEN
    3761             :                               WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    3762             :                                  frob_matrix/frob_matrix_base
    3763             :                            END IF
    3764             : 
    3765             :                            CALL dbcsr_release(matrix_tmp1)
    3766             :                            CALL dbcsr_release(matrix_tmp2)
    3767             :                         END IF
    3768             :                      END IF
    3769           0 :                      CALL timestop(handle1)
    3770             : 
    3771             :                      ! compute excitation amplitudes (to the current set of retained virtuals)
    3772             :                      ! set convergence criterion for x-optimization
    3773           0 :                      IF ((iteration .EQ. 0) .AND. (.NOT. line_search) .AND. &
    3774             :                          (outer_opt_k_iteration .EQ. 0)) THEN
    3775             :                         x_opt_eps_adaptive = &
    3776           0 :                            almo_scf_env%deloc_cayley_eps_convergence
    3777             :                      ELSE
    3778             :                         x_opt_eps_adaptive = &
    3779             :                            MAX(ABS(almo_scf_env%deloc_cayley_eps_convergence), &
    3780           0 :                                ABS(x_opt_eps_adaptive_factor*grad_norm))
    3781             :                      END IF
    3782           0 :                      CALL ct_step_env_init(ct_step_env)
    3783             :                      CALL ct_step_env_set(ct_step_env, &
    3784             :                                           para_env=almo_scf_env%para_env, &
    3785             :                                           blacs_env=almo_scf_env%blacs_env, &
    3786             :                                           use_occ_orbs=.TRUE., &
    3787             :                                           use_virt_orbs=.TRUE., &
    3788             :                                           occ_orbs_orthogonal=.FALSE., &
    3789             :                                           virt_orbs_orthogonal=.FALSE., &
    3790             :                                           pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    3791             :                                           qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    3792             :                                           tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    3793             :                                           neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    3794             :                                           conjugator=almo_scf_env%deloc_cayley_conjugator, &
    3795             :                                           max_iter=almo_scf_env%deloc_cayley_max_iter, &
    3796             :                                           calculate_energy_corr=.TRUE., &
    3797             :                                           update_p=.FALSE., &
    3798             :                                           update_q=.FALSE., &
    3799             :                                           eps_convergence=x_opt_eps_adaptive, &
    3800             :                                           eps_filter=almo_scf_env%eps_filter, &
    3801             :                                           !nspins=1,&
    3802             :                                           q_index_up=sigma_vv_sqrt_inv, &
    3803             :                                           q_index_down=sigma_vv_sqrt, &
    3804             :                                           p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3805             :                                           p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    3806             :                                           matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    3807             :                                           matrix_t=almo_scf_env%matrix_t(ispin), &
    3808             :                                           matrix_qp_template=almo_scf_env%matrix_vo(ispin), &
    3809             :                                           matrix_pq_template=almo_scf_env%matrix_ov(ispin), &
    3810             :                                           matrix_v=almo_scf_env%matrix_v(ispin), &
    3811           0 :                                           matrix_x_guess=almo_scf_env%matrix_x(ispin))
    3812             :                      ! perform calculations
    3813           0 :                      CALL ct_step_execute(ct_step_env)
    3814             :                      ! get the energy correction
    3815             :                      CALL ct_step_env_get(ct_step_env, &
    3816             :                                           energy_correction=energy_correction(ispin), &
    3817           0 :                                           copy_matrix_x=almo_scf_env%matrix_x(ispin))
    3818           0 :                      CALL ct_step_env_clean(ct_step_env)
    3819             :                      ! RZK-warning matrix_x is being transformed
    3820             :                      ! back and forth between orth and up_down representations
    3821           0 :                      energy_correction(1) = energy_correction(1)*spin_factor
    3822             : 
    3823           0 :                      IF (opt_k_max_iter .NE. 0) THEN
    3824             : 
    3825           0 :                         CALL timeset('k_opt_t_curr', handle3)
    3826             : 
    3827             :                         ! construct current occupied orbitals T_blk + V_r*X
    3828             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3829             :                                             almo_scf_env%matrix_v(ispin), &
    3830             :                                             almo_scf_env%matrix_x(ispin), &
    3831             :                                             0.0_dp, t_curr, &
    3832           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3833             :                         CALL dbcsr_add(t_curr, almo_scf_env%matrix_t_blk(ispin), &
    3834           0 :                                        +1.0_dp, +1.0_dp)
    3835             : 
    3836             :                         ! calculate current occupied overlap
    3837             :                         !IF (unit_nr>0) THEN
    3838             :                         !   WRITE(unit_nr,*) "Inverting current occ overlap matrix"
    3839             :                         !ENDIF
    3840             :                         CALL get_overlap(bra=t_curr, &
    3841             :                                          ket=t_curr, &
    3842             :                                          overlap=sigma_oo_curr, &
    3843             :                                          metric=almo_scf_env%matrix_s(1), &
    3844             :                                          retain_overlap_sparsity=.FALSE., &
    3845           0 :                                          eps_filter=almo_scf_env%eps_filter)
    3846           0 :                         IF (iteration .EQ. 0) THEN
    3847             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3848             :                                                  sigma_oo_curr, &
    3849             :                                                  threshold=almo_scf_env%eps_filter, &
    3850           0 :                                                  use_inv_as_guess=.FALSE.)
    3851             :                         ELSE
    3852             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3853             :                                                  sigma_oo_curr, &
    3854             :                                                  threshold=almo_scf_env%eps_filter, &
    3855           0 :                                                  use_inv_as_guess=.TRUE.)
    3856             :                            !CALL dbcsr_copy(sigma_oo_guess,sigma_oo_curr_inv)
    3857             :                         END IF
    3858             :                         IF (safe_mode) THEN
    3859             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3860             :                                              matrix_type=dbcsr_type_no_symmetry)
    3861             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr, &
    3862             :                                                sigma_oo_curr_inv, &
    3863             :                                                0.0_dp, matrix_tmp1, &
    3864             :                                                filter_eps=almo_scf_env%eps_filter)
    3865             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3866             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3867             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3868             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3869             :                            !CALL dbcsr_print(matrix_tmp1)
    3870             :                            IF (unit_nr > 0) THEN
    3871             :                               WRITE (unit_nr, *) "Error for (SIG*inv(SIG)-I)", &
    3872             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3873             :                            END IF
    3874             :                            CALL dbcsr_release(matrix_tmp1)
    3875             :                         END IF
    3876             :                         IF (safe_mode) THEN
    3877             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3878             :                                              matrix_type=dbcsr_type_no_symmetry)
    3879             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr_inv, &
    3880             :                                                sigma_oo_curr, &
    3881             :                                                0.0_dp, matrix_tmp1, &
    3882             :                                                filter_eps=almo_scf_env%eps_filter)
    3883             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3884             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3885             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3886             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3887             :                            !CALL dbcsr_print(matrix_tmp1)
    3888             :                            IF (unit_nr > 0) THEN
    3889             :                               WRITE (unit_nr, *) "Error for (inv(SIG)*SIG-I)", &
    3890             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3891             :                            END IF
    3892             :                            CALL dbcsr_release(matrix_tmp1)
    3893             :                         END IF
    3894             : 
    3895           0 :                         CALL timestop(handle3)
    3896           0 :                         CALL timeset('k_opt_vd', handle4)
    3897             : 
    3898             :                         ! construct current discarded virtuals:
    3899             :                         ! (1-R_curr)(1-Q^VR_curr)|ALMO_vd_basis> =
    3900             :                         ! = (1-Q^VR_curr)|ALMO_vd_basis>
    3901             :                         ! use sigma_vv_sqrt to store the inverse of the overlap
    3902             :                         ! sigma_vv_inv is computed from sqrt/cholesky
    3903             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3904             :                                             sigma_vv_sqrt_inv, &
    3905             :                                             sigma_vv_sqrt_inv, &
    3906             :                                             0.0_dp, sigma_vv_sqrt, &
    3907           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3908             :                         CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3909             :                                              psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3910             :                                              psi_projector=almo_scf_env%matrix_v(ispin), &
    3911             :                                              metric=almo_scf_env%matrix_s(1), &
    3912             :                                              project_out=.FALSE., &
    3913             :                                              psi_projector_orthogonal=.FALSE., &
    3914             :                                              proj_in_template=almo_scf_env%matrix_k_tr(ispin), &
    3915             :                                              eps_filter=almo_scf_env%eps_filter, &
    3916           0 :                                              sig_inv_projector=sigma_vv_sqrt)
    3917             :                         !sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin),&
    3918             :                         CALL dbcsr_add(almo_scf_env%matrix_v_disc(ispin), &
    3919           0 :                                        vd_fixed, -1.0_dp, +1.0_dp)
    3920             : 
    3921           0 :                         CALL timestop(handle4)
    3922           0 :                         CALL timeset('k_opt_grad', handle5)
    3923             : 
    3924             :                         ! evaluate the gradient from the assembled components
    3925             :                         ! grad_xx = c0 [ (Vd_curr^tr)*F*T_curr*sigma_oo_curr_inv*(X^tr)]_xx
    3926             :                         ! save previous gradient to calculate conjugation coef
    3927           0 :                         IF (line_search) THEN
    3928           0 :                            CALL dbcsr_copy(prev_grad, grad)
    3929             :                         END IF
    3930             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3931             :                                             almo_scf_env%matrix_ks_0deloc(ispin), &
    3932             :                                             t_curr, &
    3933             :                                             0.0_dp, tmp2_n_o, &
    3934           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3935             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3936             :                                             sigma_oo_curr_inv, &
    3937             :                                             almo_scf_env%matrix_x(ispin), &
    3938             :                                             0.0_dp, tmp4_o_vr, &
    3939           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3940             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3941             :                                             tmp2_n_o, &
    3942             :                                             tmp4_o_vr, &
    3943             :                                             0.0_dp, tmp1_n_vr, &
    3944           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3945             :                         CALL dbcsr_multiply("T", "N", 2.0_dp*spin_factor, &
    3946             :                                             almo_scf_env%matrix_v_disc(ispin), &
    3947             :                                             tmp1_n_vr, &
    3948             :                                             0.0_dp, grad, &
    3949           0 :                                             retain_sparsity=.TRUE.)
    3950             :                         !filter_eps=almo_scf_env%eps_filter,&
    3951             :                         ! keep tmp2_n_o for the next step
    3952             :                         ! keep tmp4_o_vr for the preconditioner
    3953             : 
    3954             :                         ! check convergence and other exit criteria
    3955           0 :                         grad_norm_frob = dbcsr_frobenius_norm(grad)
    3956           0 :                         grad_norm = dbcsr_maxabs(grad)
    3957           0 :                         converged = (grad_norm .LT. almo_scf_env%opt_k_eps_convergence)
    3958           0 :                         IF (converged .OR. (iteration .GE. opt_k_max_iter)) THEN
    3959           0 :                            prepare_to_exit = .TRUE.
    3960             :                         END IF
    3961           0 :                         CALL timestop(handle5)
    3962             : 
    3963           0 :                         IF (.NOT. prepare_to_exit) THEN
    3964             : 
    3965           0 :                            CALL timeset('k_opt_energy', handle6)
    3966             : 
    3967             :                            ! compute "energy" c0*Tr[sig_inv_oo*t*F*t]
    3968             :                            CALL dbcsr_multiply("T", "N", spin_factor, &
    3969             :                                                t_curr, &
    3970             :                                                tmp2_n_o, &
    3971             :                                                0.0_dp, sigma_oo_curr, &
    3972           0 :                                                filter_eps=almo_scf_env%eps_filter)
    3973             :                            delta_obj_function = fun0
    3974           0 :                            CALL dbcsr_dot(sigma_oo_curr_inv, sigma_oo_curr, obj_function)
    3975           0 :                            delta_obj_function = obj_function - delta_obj_function
    3976           0 :                            IF (line_search) THEN
    3977             :                               fun1 = obj_function
    3978             :                            ELSE
    3979           0 :                               fun0 = obj_function
    3980             :                            END IF
    3981             : 
    3982           0 :                            CALL timestop(handle6)
    3983             : 
    3984             :                            ! update the step direction
    3985           0 :                            IF (.NOT. line_search) THEN
    3986             : 
    3987           0 :                               CALL timeset('k_opt_step', handle7)
    3988             : 
    3989           0 :                               IF ((.NOT. md_in_k_space) .AND. &
    3990             :                                   (iteration .GE. MAX(0, almo_scf_env%opt_k_prec_iter_start) .AND. &
    3991             :                                    MOD(iteration - almo_scf_env%opt_k_prec_iter_start, &
    3992             :                                        almo_scf_env%opt_k_prec_iter_freq) .EQ. 0)) THEN
    3993             : 
    3994             :                                  !IF ((iteration.eq.0).AND.(.NOT.md_in_k_space)) THEN
    3995             : 
    3996             :                                  ! compute the preconditioner
    3997           0 :                                  IF (unit_nr > 0) THEN
    3998           0 :                                     WRITE (unit_nr, *) "Computing preconditioner"
    3999             :                                  END IF
    4000             :                                  !CALL opt_k_create_preconditioner(prec,&
    4001             :                                  !        almo_scf_env%matrix_v_disc(ispin),&
    4002             :                                  !        almo_scf_env%matrix_ks_0deloc(ispin),&
    4003             :                                  !        almo_scf_env%matrix_x(ispin),&
    4004             :                                  !        tmp4_o_vr,&
    4005             :                                  !        almo_scf_env%matrix_s(1),&
    4006             :                                  !        grad,&
    4007             :                                  !        !almo_scf_env%matrix_v_disc_blk(ispin),&
    4008             :                                  !        vd_fixed,&
    4009             :                                  !        t_curr,&
    4010             :                                  !        k_vd_index_up,&
    4011             :                                  !        k_vr_index_down,&
    4012             :                                  !        tmp1_n_vr,&
    4013             :                                  !        spin_factor,&
    4014             :                                  !        almo_scf_env%eps_filter)
    4015             :                                  CALL opt_k_create_preconditioner_blk(almo_scf_env, &
    4016             :                                                                       almo_scf_env%matrix_v_disc(ispin), &
    4017             :                                                                       tmp4_o_vr, &
    4018             :                                                                       t_curr, &
    4019             :                                                                       ispin, &
    4020           0 :                                                                       spin_factor)
    4021             : 
    4022             :                               END IF
    4023             : 
    4024             :                               ! save the previous step
    4025           0 :                               CALL dbcsr_copy(prev_step, step)
    4026             : 
    4027             :                               ! compute the new step
    4028             :                               CALL opt_k_apply_preconditioner_blk(almo_scf_env, &
    4029           0 :                                                                   step, grad, ispin)
    4030             :                               !CALL dbcsr_hadamard_product(prec,grad,step)
    4031           0 :                               CALL dbcsr_scale(step, -1.0_dp)
    4032             : 
    4033             :                               ! check whether we need to reset conjugate directions
    4034           0 :                               reset_conjugator = .FALSE.
    4035             :                               ! first check if manual reset is active
    4036           0 :                               IF (iteration .LT. MAX(almo_scf_env%opt_k_conj_iter_start, 1) .OR. &
    4037             :                                   MOD(iteration - almo_scf_env%opt_k_conj_iter_start, &
    4038             :                                       almo_scf_env%opt_k_conj_iter_freq) .EQ. 0) THEN
    4039             : 
    4040             :                                  reset_conjugator = .TRUE.
    4041             : 
    4042             :                               ELSE
    4043             : 
    4044             :                                  ! check for the errors in the cg algorithm
    4045             :                                  !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4046             :                                  !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4047             :                                  !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4048           0 :                                  CALL dbcsr_dot(grad, prev_minus_prec_grad, numer)
    4049           0 :                                  CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4050           0 :                                  conjugacy_error = numer/denom
    4051             : 
    4052           0 :                                  IF (conjugacy_error .GT. MIN(0.5_dp, conjugacy_error_threshold)) THEN
    4053           0 :                                     reset_conjugator = .TRUE.
    4054           0 :                                     IF (unit_nr > 0) THEN
    4055           0 :                                        WRITE (unit_nr, *) "Lack of progress, conjugacy error is ", conjugacy_error
    4056             :                                     END IF
    4057             :                                  END IF
    4058             : 
    4059             :                                  ! check the gradient along the previous direction
    4060           0 :                                  IF ((iteration .NE. 0) .AND. (.NOT. reset_conjugator)) THEN
    4061           0 :                                     CALL dbcsr_dot(grad, prev_step, numer)
    4062           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4063           0 :                                     line_search_error = numer/denom
    4064           0 :                                     IF (line_search_error .GT. line_search_error_threshold) THEN
    4065           0 :                                        reset_conjugator = .TRUE.
    4066           0 :                                        IF (unit_nr > 0) THEN
    4067           0 :                                           WRITE (unit_nr, *) "Bad line search, line search error is ", line_search_error
    4068             :                                        END IF
    4069             :                                     END IF
    4070             :                                  END IF
    4071             : 
    4072             :                               END IF
    4073             : 
    4074             :                               ! compute the conjugation coefficient - beta
    4075           0 :                               IF (.NOT. reset_conjugator) THEN
    4076             : 
    4077           0 :                                  SELECT CASE (almo_scf_env%opt_k_conjugator)
    4078             :                                  CASE (cg_hestenes_stiefel)
    4079           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4080           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4081           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4082           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4083           0 :                                     beta = -1.0_dp*numer/denom
    4084             :                                  CASE (cg_fletcher_reeves)
    4085             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4086             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4087             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4088             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4089             :                                     !beta=numer/denom
    4090           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4091           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4092           0 :                                     beta = numer/denom
    4093             :                                  CASE (cg_polak_ribiere)
    4094             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4095             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4096             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4097             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4098             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4099           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4100           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4101           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4102           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4103           0 :                                     beta = numer/denom
    4104             :                                  CASE (cg_fletcher)
    4105             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4106             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4107             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4108             :                                     !beta=-1.0_dp*numer/denom
    4109           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4110           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4111           0 :                                     beta = numer/denom
    4112             :                                  CASE (cg_liu_storey)
    4113           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4114             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4115             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4116             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4117           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4118           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4119           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4120           0 :                                     beta = numer/denom
    4121             :                                  CASE (cg_dai_yuan)
    4122             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4123             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4124             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4125             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4126             :                                     !beta=numer/denom
    4127           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4128           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4129           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4130           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4131           0 :                                     beta = -1.0_dp*numer/denom
    4132             :                                  CASE (cg_hager_zhang)
    4133             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4134             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4135             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4136             :                                     !CALL dbcsr_dot(tmp_k_blk,prev_grad,numer)
    4137             :                                     !kappa=2.0_dp*numer/denom
    4138             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4139             :                                     !tau=numer/denom
    4140             :                                     !CALL dbcsr_dot(prev_step,grad,numer)
    4141             :                                     !beta=tau-kappa*numer/denom
    4142           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4143           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4144           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4145           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_minus_prec_grad, numer)
    4146           0 :                                     kappa = -2.0_dp*numer/denom
    4147           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4148           0 :                                     tau = -1.0_dp*numer/denom
    4149           0 :                                     CALL dbcsr_dot(prev_step, grad, numer)
    4150           0 :                                     beta = tau - kappa*numer/denom
    4151             :                                  CASE (cg_zero)
    4152           0 :                                     beta = 0.0_dp
    4153             :                                  CASE DEFAULT
    4154           0 :                                     CPABORT("illegal conjugator")
    4155             :                                  END SELECT
    4156             : 
    4157           0 :                                  IF (beta .LT. 0.0_dp) THEN
    4158           0 :                                     IF (unit_nr > 0) THEN
    4159           0 :                                        WRITE (unit_nr, *) "Beta is negative, ", beta
    4160             :                                     END IF
    4161             :                                     reset_conjugator = .TRUE.
    4162             :                                  END IF
    4163             : 
    4164             :                               END IF
    4165             : 
    4166           0 :                               IF (md_in_k_space) THEN
    4167             :                                  reset_conjugator = .TRUE.
    4168             :                               END IF
    4169             : 
    4170           0 :                               IF (reset_conjugator) THEN
    4171             : 
    4172           0 :                                  beta = 0.0_dp
    4173             :                                  !reset_step_size=.TRUE.
    4174             : 
    4175           0 :                                  IF (unit_nr > 0) THEN
    4176           0 :                                     WRITE (unit_nr, *) "(Re)-setting conjugator to zero"
    4177             :                                  END IF
    4178             : 
    4179             :                               END IF
    4180             : 
    4181             :                               ! save the preconditioned gradient
    4182           0 :                               CALL dbcsr_copy(prev_minus_prec_grad, step)
    4183             : 
    4184             :                               ! conjugate the step direction
    4185           0 :                               CALL dbcsr_add(step, prev_step, 1.0_dp, beta)
    4186             : 
    4187           0 :                               CALL timestop(handle7)
    4188             : 
    4189             :                               ! update the step direction
    4190             :                            ELSE ! step update
    4191           0 :                               conjugacy_error = 0.0_dp
    4192             :                            END IF
    4193             : 
    4194             :                            ! compute the gradient with respect to the step size in the curr direction
    4195             :                            IF (line_search) THEN
    4196           0 :                               CALL dbcsr_dot(grad, step, gfun1)
    4197           0 :                               line_search_error = gfun1/gfun0
    4198             :                            ELSE
    4199           0 :                               CALL dbcsr_dot(grad, step, gfun0)
    4200             :                            END IF
    4201             : 
    4202             :                            ! make a step - update k
    4203           0 :                            IF (line_search) THEN
    4204             : 
    4205             :                               ! check if the trial step provides enough numerical accuracy
    4206           0 :                               safety_multiplier = 1.0E+1_dp ! must be more than one
    4207             :                               num_threshold = MAX(EPSILON(1.0_dp), &
    4208           0 :                                                   safety_multiplier*(almo_scf_env%eps_filter**2)*almo_scf_env%ndomains)
    4209           0 :                               IF (ABS(fun1 - fun0 - gfun0*step_size) .LT. num_threshold) THEN
    4210           0 :                                  IF (unit_nr > 0) THEN
    4211             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4212           0 :                                        "Numerical accuracy is too low to observe non-linear behavior", &
    4213           0 :                                        ABS(fun1 - fun0 - gfun0*step_size)
    4214           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Error computing ", &
    4215           0 :                                        ABS(gfun0), &
    4216           0 :                                        " is smaller than the threshold", num_threshold
    4217             :                                  END IF
    4218           0 :                                  CPABORT("")
    4219             :                               END IF
    4220           0 :                               IF (ABS(gfun0) .LT. num_threshold) THEN
    4221           0 :                                  IF (unit_nr > 0) THEN
    4222           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4223           0 :                                        ABS(gfun0), &
    4224           0 :                                        " is smaller than the threshold", num_threshold
    4225             :                                  END IF
    4226           0 :                                  CPABORT("")
    4227             :                               END IF
    4228             : 
    4229           0 :                               use_quadratic_approximation = .TRUE.
    4230           0 :                               use_cubic_approximation = .FALSE.
    4231             : 
    4232             :                               ! find the minimum assuming quadratic form
    4233             :                               ! use f0, f1, g0
    4234           0 :                               step_size_quadratic_approx = -(gfun0*step_size*step_size)/(2.0_dp*(fun1 - fun0 - gfun0*step_size))
    4235             :                               ! use f0, f1, g1
    4236           0 :                              step_size_quadratic_approx2 = -(fun1 - fun0 - step_size*gfun1/2.0_dp)/(gfun1 - (fun1 - fun0)/step_size)
    4237             : 
    4238           0 :                               IF ((step_size_quadratic_approx .LT. 0.0_dp) .AND. &
    4239             :                                   (step_size_quadratic_approx2 .LT. 0.0_dp)) THEN
    4240           0 :                                  IF (unit_nr > 0) THEN
    4241             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') &
    4242           0 :                                        "Quadratic approximation gives negative steps", &
    4243           0 :                                        step_size_quadratic_approx, step_size_quadratic_approx2, &
    4244           0 :                                        "trying cubic..."
    4245             :                                  END IF
    4246             :                                  use_cubic_approximation = .TRUE.
    4247             :                                  use_quadratic_approximation = .FALSE.
    4248             :                               ELSE
    4249           0 :                                  IF (step_size_quadratic_approx .LT. 0.0_dp) THEN
    4250           0 :                                     step_size_quadratic_approx = step_size_quadratic_approx2
    4251             :                                  END IF
    4252           0 :                                  IF (step_size_quadratic_approx2 .LT. 0.0_dp) THEN
    4253           0 :                                     step_size_quadratic_approx2 = step_size_quadratic_approx
    4254             :                                  END IF
    4255             :                               END IF
    4256             : 
    4257             :                               ! check accuracy of the quadratic approximation
    4258             :                               IF (use_quadratic_approximation) THEN
    4259             :                                  quadratic_approx_error = ABS(step_size_quadratic_approx - &
    4260           0 :                                                               step_size_quadratic_approx2)/step_size_quadratic_approx
    4261           0 :                                  IF (quadratic_approx_error .GT. quadratic_approx_error_threshold) THEN
    4262           0 :                                     IF (unit_nr > 0) THEN
    4263           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') "Quadratic approximation is poor", &
    4264           0 :                                           step_size_quadratic_approx, step_size_quadratic_approx2, &
    4265           0 :                                           "Try cubic approximation"
    4266             :                                     END IF
    4267             :                                     use_cubic_approximation = .TRUE.
    4268             :                                     use_quadratic_approximation = .FALSE.
    4269             :                                  END IF
    4270             :                               END IF
    4271             : 
    4272             :                               ! check if numerics is fine enough to capture the cubic form
    4273           0 :                               IF (use_cubic_approximation) THEN
    4274             : 
    4275             :                                  ! if quadratic approximation is not accurate enough
    4276             :                                  ! try to find the minimum assuming cubic form
    4277             :                                  ! aa*x**3 + bb*x**2 + cc*x + dd = f(x)
    4278           0 :                                  bb = (-step_size*gfun1 + 3.0_dp*(fun1 - fun0) - 2.0_dp*step_size*gfun0)/(step_size*step_size)
    4279           0 :                                  aa = (gfun1 - 2.0_dp*step_size*bb - gfun0)/(3.0_dp*step_size*step_size)
    4280             : 
    4281           0 :                                  IF (ABS(gfun1 - 2.0_dp*step_size*bb - gfun0) .LT. num_threshold) THEN
    4282           0 :                                     IF (unit_nr > 0) THEN
    4283             :                                        WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4284           0 :                                           "Numerical accuracy is too low to observe cubic behavior", &
    4285           0 :                                           ABS(gfun1 - 2.0_dp*step_size*bb - gfun0)
    4286             :                                     END IF
    4287             :                                     use_cubic_approximation = .FALSE.
    4288             :                                     use_quadratic_approximation = .TRUE.
    4289             :                                  END IF
    4290           0 :                                  IF (ABS(gfun1) .LT. num_threshold) THEN
    4291           0 :                                     IF (unit_nr > 0) THEN
    4292           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4293           0 :                                           ABS(gfun1), &
    4294           0 :                                           " is smaller than the threshold", num_threshold
    4295             :                                     END IF
    4296             :                                     use_cubic_approximation = .FALSE.
    4297             :                                     use_quadratic_approximation = .TRUE.
    4298             :                                  END IF
    4299             :                               END IF
    4300             : 
    4301             :                               ! find the step assuming cubic approximation
    4302           0 :                               IF (use_cubic_approximation) THEN
    4303             :                                  ! to obtain the minimum of the cubic function solve the quadratic equation
    4304             :                                  ! 0.0*x**3 + 3.0*aa*x**2 + 2.0*bb*x + cc = 0
    4305           0 :                                  CALL analytic_line_search(0.0_dp, 3.0_dp*aa, 2.0_dp*bb, gfun0, minima, nmins)
    4306           0 :                                  IF (nmins .LT. 1) THEN
    4307           0 :                                     IF (unit_nr > 0) THEN
    4308             :                                        WRITE (unit_nr, '(T3,A)') &
    4309           0 :                                           "Cubic approximation gives zero soultions! Use quadratic approximation"
    4310             :                                     END IF
    4311             :                                     use_quadratic_approximation = .TRUE.
    4312             :                                     use_cubic_approximation = .TRUE.
    4313             :                                  ELSE
    4314           0 :                                     step_size = minima(1)
    4315           0 :                                     IF (nmins .GT. 1) THEN
    4316           0 :                                        IF (unit_nr > 0) THEN
    4317             :                                           WRITE (unit_nr, '(T3,A)') &
    4318           0 :                                              "More than one solution found! Use quadratic approximation"
    4319             :                                        END IF
    4320             :                                        use_quadratic_approximation = .TRUE.
    4321           0 :                                        use_cubic_approximation = .TRUE.
    4322             :                                     END IF
    4323             :                                  END IF
    4324             :                               END IF
    4325             : 
    4326           0 :                               IF (use_quadratic_approximation) THEN ! use quadratic approximation
    4327           0 :                                  IF (unit_nr > 0) THEN
    4328           0 :                                     WRITE (unit_nr, '(T3,A)') "Use quadratic approximation"
    4329             :                                  END IF
    4330           0 :                                  step_size = (step_size_quadratic_approx + step_size_quadratic_approx2)*0.5_dp
    4331             :                               END IF
    4332             : 
    4333             :                               ! one more check on the step size
    4334           0 :                               IF (step_size .LT. 0.0_dp) THEN
    4335           0 :                                  CPABORT("Negative step proposed")
    4336             :                               END IF
    4337             : 
    4338             :                               CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4339           0 :                                               matrix_k_central)
    4340             :                               CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4341           0 :                                              step, 1.0_dp, step_size)
    4342             :                               CALL dbcsr_copy(matrix_k_central, &
    4343           0 :                                               almo_scf_env%matrix_k_blk(ispin))
    4344           0 :                               line_search = .FALSE.
    4345             : 
    4346             :                            ELSE
    4347             : 
    4348           0 :                               IF (md_in_k_space) THEN
    4349             : 
    4350             :                                  ! update velocities v(i) = v(i-1) + 0.5*dT*(a(i-1) + a(i))
    4351           0 :                                  IF (iteration .NE. 0) THEN
    4352             :                                     CALL dbcsr_add(velocity, &
    4353           0 :                                                    step, 1.0_dp, 0.5_dp*time_step)
    4354             :                                     CALL dbcsr_add(velocity, &
    4355           0 :                                                    prev_step, 1.0_dp, 0.5_dp*time_step)
    4356             :                                  END IF
    4357           0 :                                  kin_energy = dbcsr_frobenius_norm(velocity)
    4358           0 :                                  kin_energy = 0.5_dp*kin_energy*kin_energy
    4359             : 
    4360             :                                  ! update positions k(i) = k(i-1) + dT*v(i-1) + 0.5*dT*dT*a(i-1)
    4361             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4362           0 :                                                 velocity, 1.0_dp, time_step)
    4363             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4364           0 :                                                 step, 1.0_dp, 0.5_dp*time_step*time_step)
    4365             : 
    4366             :                               ELSE
    4367             : 
    4368           0 :                                  IF (reset_step_size) THEN
    4369           0 :                                     step_size = almo_scf_env%opt_k_trial_step_size
    4370           0 :                                     reset_step_size = .FALSE.
    4371             :                                  ELSE
    4372           0 :                                     step_size = step_size*almo_scf_env%opt_k_trial_step_size_multiplier
    4373             :                                  END IF
    4374             :                                  CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4375           0 :                                                  matrix_k_central)
    4376             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4377           0 :                                                 step, 1.0_dp, step_size)
    4378           0 :                                  line_search = .TRUE.
    4379             :                               END IF
    4380             : 
    4381             :                            END IF
    4382             : 
    4383             :                         END IF ! .NOT.prepare_to_exit
    4384             : 
    4385             :                         ! print the status of the optimization
    4386           0 :                         t2a = m_walltime()
    4387           0 :                         IF (unit_nr > 0) THEN
    4388           0 :                            IF (md_in_k_space) THEN
    4389             :                               WRITE (unit_nr, '(T6,A,1X,I5,1X,E12.3,E16.7,F15.9,F15.9,F15.9,E12.3,F15.9,F15.9,F8.3)') &
    4390           0 :                                  "K iter CG", iteration, time_step, time_step*iteration, &
    4391           0 :                                  energy_correction(ispin), obj_function, delta_obj_function, grad_norm, &
    4392           0 :                                  kin_energy, kin_energy + obj_function, beta
    4393             :                            ELSE
    4394           0 :                               IF (line_search .OR. prepare_to_exit) THEN
    4395             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4396           0 :                                     "K iter CG", iteration, step_size, &
    4397           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4398           0 :                                     gfun0, line_search_error, beta, conjugacy_error, t2a - t1a
    4399             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4400             :                               ELSE
    4401             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4402           0 :                                     "K iter LS", iteration, step_size, &
    4403           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4404           0 :                                     gfun1, line_search_error, beta, conjugacy_error, t2a - t1a
    4405             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4406             :                               END IF
    4407             :                            END IF
    4408           0 :                            CALL m_flush(unit_nr)
    4409             :                         END IF
    4410           0 :                         t1a = m_walltime()
    4411             : 
    4412             :                      ELSE ! opt_k_max_iter .eq. 0
    4413             :                         prepare_to_exit = .TRUE.
    4414             :                      END IF ! opt_k_max_iter .ne. 0
    4415             : 
    4416           0 :                      IF (.NOT. line_search) iteration = iteration + 1
    4417             : 
    4418           0 :                      IF (prepare_to_exit) EXIT
    4419             : 
    4420             :                   END DO ! end iterations on K
    4421             : 
    4422           0 :                   IF (converged .OR. (outer_opt_k_iteration .GE. outer_opt_k_max_iter)) THEN
    4423           0 :                      outer_opt_k_prepare_to_exit = .TRUE.
    4424             :                   END IF
    4425             : 
    4426           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4427             : 
    4428           0 :                      IF (unit_nr > 0) THEN
    4429           0 :                         WRITE (unit_nr, *) "Updating ALMO virtuals"
    4430             :                      END IF
    4431             : 
    4432           0 :                      CALL timeset('k_opt_v0_update', handle8)
    4433             : 
    4434             :                      ! update retained ALMO virtuals to restart the cg iterations
    4435             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4436             :                                          almo_scf_env%matrix_v_disc_blk(ispin), &
    4437             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4438             :                                          0.0_dp, vr_fixed, &
    4439           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4440             :                      CALL dbcsr_add(vr_fixed, almo_scf_env%matrix_v_blk(ispin), &
    4441           0 :                                     +1.0_dp, +1.0_dp)
    4442             : 
    4443             :                      ! update discarded ALMO virtuals to restart the cg iterations
    4444             :                      CALL dbcsr_multiply("N", "T", 1.0_dp, &
    4445             :                                          almo_scf_env%matrix_v_blk(ispin), &
    4446             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4447             :                                          0.0_dp, vd_fixed, &
    4448           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4449             :                      CALL dbcsr_add(vd_fixed, almo_scf_env%matrix_v_disc_blk(ispin), &
    4450           0 :                                     -1.0_dp, +1.0_dp)
    4451             : 
    4452             :                      ! orthogonalize new orbitals on fragments
    4453             :                      CALL get_overlap(bra=vr_fixed, &
    4454             :                                       ket=vr_fixed, &
    4455             :                                       overlap=k_vr_index_down, &
    4456             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4457             :                                       retain_overlap_sparsity=.FALSE., &
    4458           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4459             :                      CALL dbcsr_create(vr_index_sqrt_inv, template=k_vr_index_down, &
    4460           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4461             :                      CALL dbcsr_create(vr_index_sqrt, template=k_vr_index_down, &
    4462           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4463             :                      CALL matrix_sqrt_Newton_Schulz(vr_index_sqrt, &
    4464             :                                                     vr_index_sqrt_inv, &
    4465             :                                                     k_vr_index_down, &
    4466             :                                                     threshold=almo_scf_env%eps_filter, &
    4467             :                                                     order=almo_scf_env%order_lanczos, &
    4468             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4469           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4470             :                      IF (safe_mode) THEN
    4471             :                         CALL dbcsr_create(matrix_tmp1, template=k_vr_index_down, &
    4472             :                                           matrix_type=dbcsr_type_no_symmetry)
    4473             :                         CALL dbcsr_create(matrix_tmp2, template=k_vr_index_down, &
    4474             :                                           matrix_type=dbcsr_type_no_symmetry)
    4475             : 
    4476             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vr_index_sqrt_inv, &
    4477             :                                             k_vr_index_down, &
    4478             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4479             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4480             :                                             vr_index_sqrt_inv, &
    4481             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4482             : 
    4483             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4484             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4485             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4486             :                         IF (unit_nr > 0) THEN
    4487             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4488             :                               frob_matrix/frob_matrix_base
    4489             :                         END IF
    4490             : 
    4491             :                         CALL dbcsr_release(matrix_tmp1)
    4492             :                         CALL dbcsr_release(matrix_tmp2)
    4493             :                      END IF
    4494             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4495             :                                          vr_fixed, &
    4496             :                                          vr_index_sqrt_inv, &
    4497             :                                          0.0_dp, almo_scf_env%matrix_v_blk(ispin), &
    4498           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4499             : 
    4500             :                      CALL get_overlap(bra=vd_fixed, &
    4501             :                                       ket=vd_fixed, &
    4502             :                                       overlap=k_vd_index_down, &
    4503             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4504             :                                       retain_overlap_sparsity=.FALSE., &
    4505           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4506             :                      CALL dbcsr_create(vd_index_sqrt_inv, template=k_vd_index_down, &
    4507           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4508             :                      CALL dbcsr_create(vd_index_sqrt, template=k_vd_index_down, &
    4509           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4510             :                      CALL matrix_sqrt_Newton_Schulz(vd_index_sqrt, &
    4511             :                                                     vd_index_sqrt_inv, &
    4512             :                                                     k_vd_index_down, &
    4513             :                                                     threshold=almo_scf_env%eps_filter, &
    4514             :                                                     order=almo_scf_env%order_lanczos, &
    4515             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4516           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4517             :                      IF (safe_mode) THEN
    4518             :                         CALL dbcsr_create(matrix_tmp1, template=k_vd_index_down, &
    4519             :                                           matrix_type=dbcsr_type_no_symmetry)
    4520             :                         CALL dbcsr_create(matrix_tmp2, template=k_vd_index_down, &
    4521             :                                           matrix_type=dbcsr_type_no_symmetry)
    4522             : 
    4523             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_index_sqrt_inv, &
    4524             :                                             k_vd_index_down, &
    4525             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4526             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4527             :                                             vd_index_sqrt_inv, &
    4528             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4529             : 
    4530             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4531             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4532             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4533             :                         IF (unit_nr > 0) THEN
    4534             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4535             :                               frob_matrix/frob_matrix_base
    4536             :                         END IF
    4537             : 
    4538             :                         CALL dbcsr_release(matrix_tmp1)
    4539             :                         CALL dbcsr_release(matrix_tmp2)
    4540             :                      END IF
    4541             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4542             :                                          vd_fixed, &
    4543             :                                          vd_index_sqrt_inv, &
    4544             :                                          0.0_dp, almo_scf_env%matrix_v_disc_blk(ispin), &
    4545           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4546             : 
    4547           0 :                      CALL dbcsr_release(vr_index_sqrt_inv)
    4548           0 :                      CALL dbcsr_release(vr_index_sqrt)
    4549           0 :                      CALL dbcsr_release(vd_index_sqrt_inv)
    4550           0 :                      CALL dbcsr_release(vd_index_sqrt)
    4551             : 
    4552           0 :                      CALL timestop(handle8)
    4553             : 
    4554             :                   END IF ! ne.virt_full
    4555             : 
    4556             :                   ! RZK-warning released outside the outer loop
    4557           0 :                   CALL dbcsr_release(sigma_vv_sqrt)
    4558           0 :                   CALL dbcsr_release(sigma_vv_sqrt_inv)
    4559           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4560           0 :                      CALL dbcsr_release(k_vr_index_down)
    4561           0 :                      CALL dbcsr_release(k_vd_index_down)
    4562             :                      !CALL dbcsr_release(k_vd_index_up)
    4563           0 :                      CALL dbcsr_release(matrix_k_central)
    4564           0 :                      CALL dbcsr_release(vr_fixed)
    4565           0 :                      CALL dbcsr_release(vd_fixed)
    4566           0 :                      CALL dbcsr_release(grad)
    4567           0 :                      CALL dbcsr_release(prec)
    4568           0 :                      CALL dbcsr_release(prev_grad)
    4569           0 :                      CALL dbcsr_release(tmp3_vd_vr)
    4570           0 :                      CALL dbcsr_release(tmp1_n_vr)
    4571           0 :                      CALL dbcsr_release(tmp_k_blk)
    4572           0 :                      CALL dbcsr_release(t_curr)
    4573           0 :                      CALL dbcsr_release(sigma_oo_curr)
    4574           0 :                      CALL dbcsr_release(sigma_oo_curr_inv)
    4575           0 :                      CALL dbcsr_release(step)
    4576           0 :                      CALL dbcsr_release(tmp2_n_o)
    4577           0 :                      CALL dbcsr_release(tmp4_o_vr)
    4578           0 :                      CALL dbcsr_release(prev_step)
    4579           0 :                      CALL dbcsr_release(prev_minus_prec_grad)
    4580           0 :                      IF (md_in_k_space) THEN
    4581           0 :                         CALL dbcsr_release(velocity)
    4582             :                      END IF
    4583             : 
    4584             :                   END IF
    4585             : 
    4586           0 :                   outer_opt_k_iteration = outer_opt_k_iteration + 1
    4587           0 :                   IF (outer_opt_k_prepare_to_exit) EXIT
    4588             : 
    4589             :                END DO ! outer loop for k
    4590             : 
    4591             :             END DO ! ispin
    4592             : 
    4593             :             ! RZK-warning update mo orbitals
    4594             : 
    4595             :          ELSE ! virtual orbitals might not be available use projected AOs
    4596             : 
    4597             :             ! compute sqrt(S) and inv(sqrt(S))
    4598             :             ! RZK-warning - remove this sqrt(S) and inv(sqrt(S))
    4599             :             ! ideally ALMO scf should use sigma and sigma_inv in
    4600             :             ! the tensor_up_down representation
    4601           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4602             : 
    4603           0 :                IF (unit_nr > 0) THEN
    4604           0 :                   WRITE (unit_nr, *) "sqrt and inv(sqrt) of AO overlap matrix"
    4605             :                END IF
    4606             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt(1), &
    4607             :                                  template=almo_scf_env%matrix_s(1), &
    4608           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4609             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt_inv(1), &
    4610             :                                  template=almo_scf_env%matrix_s(1), &
    4611           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4612             : 
    4613             :                CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_s_sqrt(1), &
    4614             :                                               almo_scf_env%matrix_s_sqrt_inv(1), &
    4615             :                                               almo_scf_env%matrix_s(1), &
    4616             :                                               threshold=almo_scf_env%eps_filter, &
    4617             :                                               order=almo_scf_env%order_lanczos, &
    4618             :                                               eps_lanczos=almo_scf_env%eps_lanczos, &
    4619           0 :                                               max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4620             : 
    4621             :                IF (safe_mode) THEN
    4622             :                   CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4623             :                                     matrix_type=dbcsr_type_no_symmetry)
    4624             :                   CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_s(1), &
    4625             :                                     matrix_type=dbcsr_type_no_symmetry)
    4626             : 
    4627             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4628             :                                       almo_scf_env%matrix_s(1), &
    4629             :                                       0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4630             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, almo_scf_env%matrix_s_sqrt_inv(1), &
    4631             :                                       0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4632             : 
    4633             :                   frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4634             :                   CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4635             :                   frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4636             :                   IF (unit_nr > 0) THEN
    4637             :                      WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
    4638             :                   END IF
    4639             : 
    4640             :                   CALL dbcsr_release(matrix_tmp1)
    4641             :                   CALL dbcsr_release(matrix_tmp2)
    4642             :                END IF
    4643             : 
    4644           0 :                almo_scf_env%s_sqrt_done = .TRUE.
    4645             : 
    4646             :             END IF
    4647             : 
    4648           0 :             DO ispin = 1, nspin
    4649             : 
    4650           0 :                CALL ct_step_env_init(ct_step_env)
    4651             :                CALL ct_step_env_set(ct_step_env, &
    4652             :                                     para_env=almo_scf_env%para_env, &
    4653             :                                     blacs_env=almo_scf_env%blacs_env, &
    4654             :                                     use_occ_orbs=.TRUE., &
    4655             :                                     use_virt_orbs=almo_scf_env%deloc_cayley_use_virt_orbs, &
    4656             :                                     occ_orbs_orthogonal=.FALSE., &
    4657             :                                     virt_orbs_orthogonal=almo_scf_env%orthogonal_basis, &
    4658             :                                     tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    4659             :                                     neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    4660             :                                     calculate_energy_corr=.TRUE., &
    4661             :                                     update_p=.TRUE., &
    4662             :                                     update_q=.FALSE., &
    4663             :                                     pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    4664             :                                     qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    4665             :                                     eps_convergence=almo_scf_env%deloc_cayley_eps_convergence, &
    4666             :                                     eps_filter=almo_scf_env%eps_filter, &
    4667             :                                     !nspins=almo_scf_env%nspins,&
    4668             :                                     q_index_up=almo_scf_env%matrix_s_sqrt_inv(1), &
    4669             :                                     q_index_down=almo_scf_env%matrix_s_sqrt(1), &
    4670             :                                     p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    4671             :                                     p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    4672             :                                     matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    4673             :                                     matrix_p=almo_scf_env%matrix_p(ispin), &
    4674             :                                     matrix_qp_template=almo_scf_env%matrix_t(ispin), &
    4675             :                                     matrix_pq_template=almo_scf_env%matrix_t_tr(ispin), &
    4676             :                                     matrix_t=almo_scf_env%matrix_t(ispin), &
    4677             :                                     conjugator=almo_scf_env%deloc_cayley_conjugator, &
    4678           0 :                                     max_iter=almo_scf_env%deloc_cayley_max_iter)
    4679             : 
    4680             :                ! perform calculations
    4681           0 :                CALL ct_step_execute(ct_step_env)
    4682             : 
    4683             :                ! for now we do not need the new set of orbitals
    4684             :                ! just get the energy correction
    4685             :                CALL ct_step_env_get(ct_step_env, &
    4686           0 :                                     energy_correction=energy_correction(ispin))
    4687             :                !copy_da_energy_matrix=matrix_eda(ispin),&
    4688             :                !copy_da_charge_matrix=matrix_cta(ispin),&
    4689             : 
    4690           0 :                CALL ct_step_env_clean(ct_step_env)
    4691             : 
    4692             :             END DO
    4693             : 
    4694           0 :             energy_correction(1) = energy_correction(1)*spin_factor
    4695             : 
    4696             :          END IF
    4697             : 
    4698             :          ! print the energy correction and exit
    4699           0 :          DO ispin = 1, nspin
    4700             : 
    4701           0 :             IF (unit_nr > 0) THEN
    4702           0 :                WRITE (unit_nr, *)
    4703           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4704           0 :                   energy_correction(ispin)
    4705           0 :                WRITE (unit_nr, *)
    4706             :             END IF
    4707           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4708             : 
    4709             :             !!! print out the results of decomposition analysis
    4710             :             !!IF (unit_nr>0) THEN
    4711             :             !!   WRITE(unit_nr,*)
    4712             :             !!   WRITE(unit_nr,'(T2,A)') "ENERGY DECOMPOSITION"
    4713             :             !!ENDIF
    4714             :             !!CALL dbcsr_print_block_sum(eda_matrix(ispin))
    4715             :             !!IF (unit_nr>0) THEN
    4716             :             !!   WRITE(unit_nr,*)
    4717             :             !!   WRITE(unit_nr,'(T2,A)') "CHARGE DECOMPOSITION"
    4718             :             !!ENDIF
    4719             :             !!CALL dbcsr_print_block_sum(cta_matrix(ispin))
    4720             : 
    4721             :             ! obtain density matrix from updated MOs
    4722             :             ! RZK-later sigma and sigma_inv are lost here
    4723             :             CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t(ispin), &
    4724             :                                     p=almo_scf_env%matrix_p(ispin), &
    4725             :                                     eps_filter=almo_scf_env%eps_filter, &
    4726             :                                     orthog_orbs=.FALSE., &
    4727             :                                     nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    4728             :                                     s=almo_scf_env%matrix_s(1), &
    4729             :                                     sigma=almo_scf_env%matrix_sigma(ispin), &
    4730             :                                     sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    4731             :                                     !use_guess=use_guess, &
    4732             :                                     algorithm=almo_scf_env%sigma_inv_algorithm, &
    4733             :                                     inverse_accelerator=almo_scf_env%order_lanczos, &
    4734             :                                     inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
    4735             :                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4736             :                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    4737             :                                     para_env=almo_scf_env%para_env, &
    4738           0 :                                     blacs_env=almo_scf_env%blacs_env)
    4739             : 
    4740           0 :             IF (almo_scf_env%nspins == 1) &
    4741             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4742           0 :                                 spin_factor)
    4743             : 
    4744             :          END DO
    4745             : 
    4746             :       CASE (dm_ls_step)
    4747             : 
    4748             :          ! compute the inverse of S
    4749           0 :          IF (.NOT. almo_scf_env%s_inv_done) THEN
    4750           0 :             IF (unit_nr > 0) THEN
    4751           0 :                WRITE (unit_nr, *) "Inverting AO overlap matrix"
    4752             :             END IF
    4753             :             CALL dbcsr_create(almo_scf_env%matrix_s_inv(1), &
    4754             :                               template=almo_scf_env%matrix_s(1), &
    4755           0 :                               matrix_type=dbcsr_type_no_symmetry)
    4756           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4757             :                CALL invert_Hotelling(almo_scf_env%matrix_s_inv(1), &
    4758             :                                      almo_scf_env%matrix_s(1), &
    4759           0 :                                      threshold=almo_scf_env%eps_filter)
    4760             :             ELSE
    4761             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4762             :                                    almo_scf_env%matrix_s_sqrt_inv(1), &
    4763             :                                    0.0_dp, almo_scf_env%matrix_s_inv(1), &
    4764           0 :                                    filter_eps=almo_scf_env%eps_filter)
    4765             :             END IF
    4766             : 
    4767             :             IF (safe_mode) THEN
    4768             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4769             :                                  matrix_type=dbcsr_type_no_symmetry)
    4770             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_inv(1), &
    4771             :                                    almo_scf_env%matrix_s(1), &
    4772             :                                    0.0_dp, matrix_tmp1, &
    4773             :                                    filter_eps=almo_scf_env%eps_filter)
    4774             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    4775             :                CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    4776             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    4777             :                IF (unit_nr > 0) THEN
    4778             :                   WRITE (unit_nr, *) "Error for (inv(S)*S-I)", &
    4779             :                      frob_matrix/frob_matrix_base
    4780             :                END IF
    4781             :                CALL dbcsr_release(matrix_tmp1)
    4782             :             END IF
    4783             : 
    4784           0 :             almo_scf_env%s_inv_done = .TRUE.
    4785             : 
    4786             :          END IF
    4787             : 
    4788           0 :          DO ispin = 1, nspin
    4789             :             ! RZK-warning the preconditioner is very important
    4790             :             !       IF (.FALSE.) THEN
    4791             :             !           CALL apply_matrix_preconditioner(almo_scf_env%matrix_ks(ispin),&
    4792             :             !                   "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4793             :             !                   almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4794             :             !       ENDIF
    4795             :             !CALL dbcsr_filter(almo_scf_env%matrix_ks(ispin),&
    4796             :             !         almo_scf_env%eps_filter)
    4797             :          END DO
    4798             : 
    4799           0 :          ALLOCATE (matrix_p_almo_scf_converged(nspin))
    4800           0 :          DO ispin = 1, nspin
    4801             :             CALL dbcsr_create(matrix_p_almo_scf_converged(ispin), &
    4802           0 :                               template=almo_scf_env%matrix_p(ispin))
    4803             :             CALL dbcsr_copy(matrix_p_almo_scf_converged(ispin), &
    4804           0 :                             almo_scf_env%matrix_p(ispin))
    4805             :          END DO
    4806             : 
    4807             :          ! update the density matrix
    4808           0 :          DO ispin = 1, nspin
    4809             : 
    4810           0 :             nelectron_spin_real(1) = almo_scf_env%nelectrons_spin(ispin)
    4811           0 :             IF (almo_scf_env%nspins == 1) &
    4812           0 :                nelectron_spin_real(1) = nelectron_spin_real(1)/2
    4813             : 
    4814           0 :             local_mu(1) = SUM(almo_scf_env%mu_of_domain(:, ispin))/almo_scf_env%ndomains
    4815           0 :             fake(1) = 123523
    4816             : 
    4817             :             ! RZK UPDATE! the update algorithm is removed because
    4818             :             ! RZK UPDATE! it requires updating core LS_SCF routines
    4819             :             ! RZK UPDATE! (the code exists in the CVS version)
    4820           0 :             CPABORT("CVS only: density_matrix_sign has not been updated in SVN")
    4821             :             ! RZK UPDATE!CALL density_matrix_sign(almo_scf_env%matrix_p(ispin),&
    4822             :             ! RZK UPDATE!                     local_mu,&
    4823             :             ! RZK UPDATE!                     almo_scf_env%fixed_mu,&
    4824             :             ! RZK UPDATE!                     almo_scf_env%matrix_ks_0deloc(ispin),&
    4825             :             ! RZK UPDATE!                     almo_scf_env%matrix_s(1), &
    4826             :             ! RZK UPDATE!                     almo_scf_env%matrix_s_inv(1), &
    4827             :             ! RZK UPDATE!                     nelectron_spin_real,&
    4828             :             ! RZK UPDATE!                     almo_scf_env%eps_filter,&
    4829             :             ! RZK UPDATE!                     fake)
    4830             :             ! RZK UPDATE!
    4831           0 :             almo_scf_env%mu = local_mu(1)
    4832             : 
    4833             :             !IF (almo_scf_env%has_s_preconditioner) THEN
    4834             :             !    CALL apply_matrix_preconditioner(&
    4835             :             !             almo_scf_env%matrix_p_blk(ispin),&
    4836             :             !             "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4837             :             !             almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4838             :             !ENDIF
    4839             :             !CALL dbcsr_filter(almo_scf_env%matrix_p(ispin),&
    4840             :             !        almo_scf_env%eps_filter)
    4841             : 
    4842           0 :             IF (almo_scf_env%nspins == 1) &
    4843             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4844           0 :                                 spin_factor)
    4845             : 
    4846             :             !CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin),&
    4847             :             !  almo_scf_env%matrix_p(ispin),&
    4848             :             !  energy_correction(ispin))
    4849             :             !IF (unit_nr>0) THEN
    4850             :             !   WRITE(unit_nr,*)
    4851             :             !   WRITE(unit_nr,'(T2,A,I6,F20.9)') "EFAKE",ispin,&
    4852             :             !           energy_correction(ispin)
    4853             :             !   WRITE(unit_nr,*)
    4854             :             !ENDIF
    4855             :             CALL dbcsr_add(matrix_p_almo_scf_converged(ispin), &
    4856           0 :                            almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
    4857             :             CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
    4858             :                            matrix_p_almo_scf_converged(ispin), &
    4859           0 :                            energy_correction(ispin))
    4860             : 
    4861           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4862             : 
    4863           0 :             IF (unit_nr > 0) THEN
    4864           0 :                WRITE (unit_nr, *)
    4865           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4866           0 :                   energy_correction(ispin)
    4867           0 :                WRITE (unit_nr, *)
    4868             :             END IF
    4869             : 
    4870             :          END DO
    4871             : 
    4872           0 :          DO ispin = 1, nspin
    4873           0 :             CALL dbcsr_release(matrix_p_almo_scf_converged(ispin))
    4874             :          END DO
    4875           0 :          DEALLOCATE (matrix_p_almo_scf_converged)
    4876             : 
    4877             :       END SELECT ! algorithm selection
    4878             : 
    4879           0 :       t2 = m_walltime()
    4880             : 
    4881           0 :       IF (unit_nr > 0) THEN
    4882           0 :          WRITE (unit_nr, *)
    4883           0 :          WRITE (unit_nr, '(T2,A,F18.9,F18.9,F18.9,F12.6)') "ETOT", &
    4884           0 :             almo_scf_env%almo_scf_energy, &
    4885           0 :             energy_correction_final, &
    4886           0 :             almo_scf_env%almo_scf_energy + energy_correction_final, &
    4887           0 :             t2 - t1
    4888           0 :          WRITE (unit_nr, *)
    4889             :       END IF
    4890             : 
    4891           0 :       CALL timestop(handle)
    4892             : 
    4893           0 :    END SUBROUTINE harris_foulkes_correction
    4894             : 
    4895             : ! **************************************************************************************************
    4896             : !> \brief triu of a dbcsr matrix
    4897             : !> \param matrix ...
    4898             : ! **************************************************************************************************
    4899           0 :    SUBROUTINE make_triu(matrix)
    4900             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    4901             : 
    4902             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'make_triu'
    4903             : 
    4904             :       INTEGER                                            :: col, handle, i, j, row
    4905           0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block
    4906             :       TYPE(dbcsr_iterator_type)                          :: iter
    4907             : 
    4908           0 :       CALL timeset(routineN, handle)
    4909             : 
    4910           0 :       CALL dbcsr_iterator_start(iter, matrix)
    4911           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    4912           0 :          CALL dbcsr_iterator_next_block(iter, row, col, block)
    4913           0 :          IF (row > col) block(:, :) = 0.0_dp
    4914           0 :          IF (row == col) THEN
    4915           0 :             DO j = 1, SIZE(block, 2)
    4916           0 :             DO i = j + 1, SIZE(block, 1)
    4917           0 :                block(i, j) = 0.0_dp
    4918             :             END DO
    4919             :             END DO
    4920             :          END IF
    4921             :       END DO
    4922           0 :       CALL dbcsr_iterator_stop(iter)
    4923           0 :       CALL dbcsr_filter(matrix, eps=0.0_dp)
    4924             : 
    4925           0 :       CALL timestop(handle)
    4926           0 :    END SUBROUTINE make_triu
    4927             : 
    4928             : ! **************************************************************************************************
    4929             : !> \brief Computes a diagonal preconditioner for the cg optimization of k matrix
    4930             : !> \param prec ...
    4931             : !> \param vd_prop ...
    4932             : !> \param f ...
    4933             : !> \param x ...
    4934             : !> \param oo_inv_x_tr ...
    4935             : !> \param s ...
    4936             : !> \param grad ...
    4937             : !> \param vd_blk ...
    4938             : !> \param t ...
    4939             : !> \param template_vd_vd_blk ...
    4940             : !> \param template_vr_vr_blk ...
    4941             : !> \param template_n_vr ...
    4942             : !> \param spin_factor ...
    4943             : !> \param eps_filter ...
    4944             : !> \par History
    4945             : !>       2011.09 created [Rustam Z Khaliullin]
    4946             : !> \author Rustam Z Khaliullin
    4947             : ! **************************************************************************************************
    4948           0 :    SUBROUTINE opt_k_create_preconditioner(prec, vd_prop, f, x, oo_inv_x_tr, s, grad, &
    4949             :                                           vd_blk, t, template_vd_vd_blk, template_vr_vr_blk, template_n_vr, &
    4950             :                                           spin_factor, eps_filter)
    4951             : 
    4952             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: prec
    4953             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, f, x, oo_inv_x_tr, s
    4954             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: grad
    4955             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_blk, t, template_vd_vd_blk, &
    4956             :                                                             template_vr_vr_blk, template_n_vr
    4957             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor, eps_filter
    4958             : 
    4959             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner'
    4960             : 
    4961             :       INTEGER                                            :: handle, p_nrows, q_nrows
    4962           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: p_diagonal, q_diagonal
    4963             :       TYPE(dbcsr_type)                                   :: pp_diag, qq_diag, t1, t2, tmp, &
    4964             :                                                             tmp1_n_vr, tmp2_n_vr, tmp_n_vd, &
    4965             :                                                             tmp_vd_vd_blk, tmp_vr_vr_blk
    4966             : 
    4967             : ! init diag blocks outside
    4968             : ! init diag blocks otside
    4969             : !INTEGER                                  :: iblock_row, iblock_col,&
    4970             : !                                            nblkrows_tot, nblkcols_tot
    4971             : !REAL(KIND=dp), DIMENSION(:, :), POINTER  :: p_new_block
    4972             : !INTEGER                                  :: mynode, hold, row, col
    4973             : 
    4974           0 :       CALL timeset(routineN, handle)
    4975             : 
    4976             :       ! initialize a matrix to 1.0
    4977           0 :       CALL dbcsr_create(tmp, template=prec)
    4978             :       ! in order to use dbcsr_set matrix blocks must exist
    4979           0 :       CALL dbcsr_copy(tmp, prec)
    4980           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    4981             : 
    4982             :       ! compute qq = (Vd^tr)*F*Vd
    4983           0 :       CALL dbcsr_create(tmp_n_vd, template=vd_prop)
    4984             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, vd_prop, &
    4985           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    4986             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    4987           0 :                         template=template_vd_vd_blk)
    4988           0 :       CALL dbcsr_copy(tmp_vd_vd_blk, template_vd_vd_blk)
    4989             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    4990             :                           0.0_dp, tmp_vd_vd_blk, &
    4991             :                           retain_sparsity=.TRUE., &
    4992           0 :                           filter_eps=eps_filter)
    4993             :       ! copy diagonal elements of the result into rows of a matrix
    4994           0 :       CALL dbcsr_get_info(tmp_vd_vd_blk, nfullrows_total=q_nrows)
    4995           0 :       ALLOCATE (q_diagonal(q_nrows))
    4996           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    4997             :       CALL dbcsr_create(qq_diag, &
    4998           0 :                         template=template_vd_vd_blk)
    4999           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    5000           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    5001           0 :       CALL dbcsr_create(t1, template=prec)
    5002             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    5003           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5004             : 
    5005             :       ! compute pp = X*sigma_oo_inv*X^tr
    5006           0 :       CALL dbcsr_create(tmp_vr_vr_blk, template=template_vr_vr_blk)
    5007           0 :       CALL dbcsr_copy(tmp_vr_vr_blk, template_vr_vr_blk)
    5008             :       CALL dbcsr_multiply("N", "N", 1.0_dp, x, oo_inv_x_tr, &
    5009             :                           0.0_dp, tmp_vr_vr_blk, &
    5010             :                           retain_sparsity=.TRUE., &
    5011           0 :                           filter_eps=eps_filter)
    5012             :       ! copy diagonal elements of the result into cols of a matrix
    5013           0 :       CALL dbcsr_get_info(tmp_vr_vr_blk, nfullrows_total=p_nrows)
    5014           0 :       ALLOCATE (p_diagonal(p_nrows))
    5015           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5016           0 :       CALL dbcsr_create(pp_diag, template=template_vr_vr_blk)
    5017           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5018           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5019           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5020           0 :       CALL dbcsr_create(t2, template=prec)
    5021             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5022           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5023             : 
    5024           0 :       CALL dbcsr_hadamard_product(t1, t2, prec)
    5025             : 
    5026             :       ! compute qq = (Vd^tr)*S*Vd
    5027             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_prop, &
    5028           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5029             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5030             :                           0.0_dp, tmp_vd_vd_blk, &
    5031             :                           retain_sparsity=.TRUE., &
    5032           0 :                           filter_eps=eps_filter)
    5033             :       ! copy diagonal elements of the result into rows of a matrix
    5034           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    5035           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    5036           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    5037           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5038             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    5039           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5040             : 
    5041             :       ! compute pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5042           0 :       CALL dbcsr_create(tmp1_n_vr, template=template_n_vr)
    5043           0 :       CALL dbcsr_create(tmp2_n_vr, template=template_n_vr)
    5044             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t, oo_inv_x_tr, &
    5045           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5046             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, tmp1_n_vr, &
    5047           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5048             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5049             :                           0.0_dp, tmp_vr_vr_blk, &
    5050             :                           retain_sparsity=.TRUE., &
    5051           0 :                           filter_eps=eps_filter)
    5052             :       ! copy diagonal elements of the result into cols of a matrix
    5053           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5054           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5055           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5056           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5057             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5058           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5059             : 
    5060           0 :       CALL dbcsr_hadamard_product(t1, t2, tmp)
    5061           0 :       CALL dbcsr_add(prec, tmp, 1.0_dp, -1.0_dp)
    5062           0 :       CALL dbcsr_scale(prec, 2.0_dp*spin_factor)
    5063             : 
    5064             :       ! compute qp = X*sig_oo_inv*(T^tr)*S*Vd
    5065             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_blk, &
    5066           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5067             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp_n_vd, tmp1_n_vr, &
    5068             :                           0.0_dp, tmp, retain_sparsity=.TRUE., &
    5069           0 :                           filter_eps=eps_filter)
    5070           0 :       CALL dbcsr_hadamard_product(grad, tmp, t1)
    5071             :       ! gradient already contains 2.0*spin_factor
    5072           0 :       CALL dbcsr_scale(t1, -2.0_dp)
    5073             : 
    5074           0 :       CALL dbcsr_add(prec, t1, 1.0_dp, 1.0_dp)
    5075             : 
    5076           0 :       CALL inverse_of_elements(prec)
    5077           0 :       CALL dbcsr_filter(prec, eps_filter)
    5078             : 
    5079           0 :       DEALLOCATE (q_diagonal)
    5080           0 :       DEALLOCATE (p_diagonal)
    5081           0 :       CALL dbcsr_release(tmp)
    5082           0 :       CALL dbcsr_release(qq_diag)
    5083           0 :       CALL dbcsr_release(t1)
    5084           0 :       CALL dbcsr_release(pp_diag)
    5085           0 :       CALL dbcsr_release(t2)
    5086           0 :       CALL dbcsr_release(tmp_n_vd)
    5087           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5088           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5089           0 :       CALL dbcsr_release(tmp1_n_vr)
    5090           0 :       CALL dbcsr_release(tmp2_n_vr)
    5091             : 
    5092           0 :       CALL timestop(handle)
    5093             : 
    5094           0 :    END SUBROUTINE opt_k_create_preconditioner
    5095             : 
    5096             : ! **************************************************************************************************
    5097             : !> \brief Computes a block-diagonal preconditioner for the optimization of
    5098             : !>        k matrix
    5099             : !> \param almo_scf_env ...
    5100             : !> \param vd_prop ...
    5101             : !> \param oo_inv_x_tr ...
    5102             : !> \param t_curr ...
    5103             : !> \param ispin ...
    5104             : !> \param spin_factor ...
    5105             : !> \par History
    5106             : !>       2011.10 created [Rustam Z Khaliullin]
    5107             : !> \author Rustam Z Khaliullin
    5108             : ! **************************************************************************************************
    5109           0 :    SUBROUTINE opt_k_create_preconditioner_blk(almo_scf_env, vd_prop, oo_inv_x_tr, &
    5110             :                                               t_curr, ispin, spin_factor)
    5111             : 
    5112             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5113             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, oo_inv_x_tr, t_curr
    5114             :       INTEGER, INTENT(IN)                                :: ispin
    5115             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor
    5116             : 
    5117             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner_blk'
    5118             : 
    5119             :       INTEGER                                            :: handle
    5120             :       REAL(KIND=dp)                                      :: eps_filter
    5121             :       TYPE(dbcsr_type)                                   :: opt_k_e_dd, opt_k_e_rr, s_dd_sqrt, &
    5122             :                                                             s_rr_sqrt, t1, tmp, tmp1_n_vr, &
    5123             :                                                             tmp2_n_vr, tmp_n_vd, tmp_vd_vd_blk, &
    5124             :                                                             tmp_vr_vr_blk
    5125             : 
    5126             : ! matrices that has been computed outside the routine already
    5127             : 
    5128           0 :       CALL timeset(routineN, handle)
    5129             : 
    5130           0 :       eps_filter = almo_scf_env%eps_filter
    5131             : 
    5132             :       ! compute S_qq = (Vd^tr)*S*Vd
    5133           0 :       CALL dbcsr_create(tmp_n_vd, template=almo_scf_env%matrix_v_disc(ispin))
    5134             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    5135             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5136           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5137             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5138             :                           almo_scf_env%matrix_s(1), &
    5139             :                           vd_prop, &
    5140           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5141             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5142           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5143             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5144             :                           0.0_dp, tmp_vd_vd_blk, &
    5145           0 :                           retain_sparsity=.TRUE.)
    5146             : 
    5147             :       CALL dbcsr_create(s_dd_sqrt, &
    5148             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5149           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5150             :       CALL matrix_sqrt_Newton_Schulz(s_dd_sqrt, &
    5151             :                                      almo_scf_env%opt_k_t_dd(ispin), &
    5152             :                                      tmp_vd_vd_blk, &
    5153             :                                      threshold=eps_filter, &
    5154             :                                      order=almo_scf_env%order_lanczos, &
    5155             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5156           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5157             : 
    5158             :       ! compute F_qq = (Vd^tr)*F*Vd
    5159             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5160             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5161             :                           vd_prop, &
    5162           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5163             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5164           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5165             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5166             :                           0.0_dp, tmp_vd_vd_blk, &
    5167           0 :                           retain_sparsity=.TRUE.)
    5168           0 :       CALL dbcsr_release(tmp_n_vd)
    5169             : 
    5170             :       ! bring to the blocked-orthogonalized basis
    5171             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5172             :                           tmp_vd_vd_blk, &
    5173             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5174           0 :                           0.0_dp, s_dd_sqrt, filter_eps=eps_filter)
    5175             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5176             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5177             :                           s_dd_sqrt, &
    5178           0 :                           0.0_dp, tmp_vd_vd_blk, filter_eps=eps_filter)
    5179             : 
    5180             :       ! diagonalize the matrix
    5181             :       CALL dbcsr_create(opt_k_e_dd, &
    5182           0 :                         template=almo_scf_env%matrix_vv_disc_blk(ispin))
    5183           0 :       CALL dbcsr_release(s_dd_sqrt)
    5184             :       CALL dbcsr_create(s_dd_sqrt, &
    5185             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5186           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5187             :       CALL diagonalize_diagonal_blocks(tmp_vd_vd_blk, &
    5188             :                                        s_dd_sqrt, &
    5189           0 :                                        opt_k_e_dd)
    5190             : 
    5191             :       ! obtain the transformation matrix in the discarded subspace
    5192             :       ! T = S^{-1/2}.U
    5193             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5194           0 :                       almo_scf_env%opt_k_t_dd(ispin))
    5195             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5196             :                           tmp_vd_vd_blk, &
    5197             :                           s_dd_sqrt, &
    5198             :                           0.0_dp, almo_scf_env%opt_k_t_dd(ispin), &
    5199           0 :                           filter_eps=eps_filter)
    5200           0 :       CALL dbcsr_release(s_dd_sqrt)
    5201           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5202             : 
    5203             :       ! copy diagonal elements of the result into rows of a matrix
    5204             :       CALL dbcsr_create(tmp, &
    5205           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5206             :       CALL dbcsr_copy(tmp, &
    5207           0 :                       almo_scf_env%matrix_k_blk_ones(ispin))
    5208             :       CALL dbcsr_create(t1, &
    5209           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5210             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5211             :                           opt_k_e_dd, tmp, &
    5212           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5213           0 :       CALL dbcsr_release(opt_k_e_dd)
    5214             : 
    5215             :       ! compute S_pp = X*sigma_oo_inv*X^tr
    5216             :       CALL dbcsr_create(tmp_vr_vr_blk, &
    5217             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5218           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5219             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5220           0 :                       almo_scf_env%matrix_sigma_vv_blk(ispin))
    5221             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5222             :                           almo_scf_env%matrix_x(ispin), &
    5223             :                           oo_inv_x_tr, &
    5224             :                           0.0_dp, tmp_vr_vr_blk, &
    5225           0 :                           retain_sparsity=.TRUE.)
    5226             : 
    5227             :       ! obtain the orthogonalization matrix
    5228             :       CALL dbcsr_create(s_rr_sqrt, &
    5229             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5230           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5231             :       CALL matrix_sqrt_Newton_Schulz(s_rr_sqrt, &
    5232             :                                      almo_scf_env%opt_k_t_rr(ispin), &
    5233             :                                      tmp_vr_vr_blk, &
    5234             :                                      threshold=eps_filter, &
    5235             :                                      order=almo_scf_env%order_lanczos, &
    5236             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5237           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5238             : 
    5239             :       ! compute F_pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5240             :       CALL dbcsr_create(tmp1_n_vr, &
    5241           0 :                         template=almo_scf_env%matrix_v(ispin))
    5242             :       CALL dbcsr_create(tmp2_n_vr, &
    5243           0 :                         template=almo_scf_env%matrix_v(ispin))
    5244             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t_curr, oo_inv_x_tr, &
    5245           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5246             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5247             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5248             :                           tmp1_n_vr, &
    5249           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5250             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5251             :                           0.0_dp, tmp_vr_vr_blk, &
    5252           0 :                           retain_sparsity=.TRUE.)
    5253           0 :       CALL dbcsr_release(tmp1_n_vr)
    5254           0 :       CALL dbcsr_release(tmp2_n_vr)
    5255             : 
    5256             :       ! bring to the blocked-orthogonalized basis
    5257             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5258             :                           tmp_vr_vr_blk, &
    5259             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5260           0 :                           0.0_dp, s_rr_sqrt, filter_eps=eps_filter)
    5261             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5262             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5263             :                           s_rr_sqrt, &
    5264           0 :                           0.0_dp, tmp_vr_vr_blk, filter_eps=eps_filter)
    5265             : 
    5266             :       ! diagonalize the matrix
    5267             :       CALL dbcsr_create(opt_k_e_rr, &
    5268           0 :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin))
    5269           0 :       CALL dbcsr_release(s_rr_sqrt)
    5270             :       CALL dbcsr_create(s_rr_sqrt, &
    5271             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5272           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5273             :       CALL diagonalize_diagonal_blocks(tmp_vr_vr_blk, &
    5274             :                                        s_rr_sqrt, &
    5275           0 :                                        opt_k_e_rr)
    5276             : 
    5277             :       ! obtain the transformation matrix in the retained subspace
    5278             :       ! T = S^{-1/2}.U
    5279             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5280           0 :                       almo_scf_env%opt_k_t_rr(ispin))
    5281             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5282             :                           tmp_vr_vr_blk, &
    5283             :                           s_rr_sqrt, &
    5284             :                           0.0_dp, almo_scf_env%opt_k_t_rr(ispin), &
    5285           0 :                           filter_eps=eps_filter)
    5286           0 :       CALL dbcsr_release(s_rr_sqrt)
    5287           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5288             : 
    5289             :       ! copy diagonal elements of the result into cols of a matrix
    5290             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5291             :                           tmp, opt_k_e_rr, &
    5292             :                           0.0_dp, almo_scf_env%opt_k_denom(ispin), &
    5293           0 :                           filter_eps=eps_filter)
    5294           0 :       CALL dbcsr_release(opt_k_e_rr)
    5295           0 :       CALL dbcsr_release(tmp)
    5296             : 
    5297             :       ! form the denominator matrix
    5298             :       CALL dbcsr_add(almo_scf_env%opt_k_denom(ispin), t1, &
    5299           0 :                      -1.0_dp, 1.0_dp)
    5300           0 :       CALL dbcsr_release(t1)
    5301             :       CALL dbcsr_scale(almo_scf_env%opt_k_denom(ispin), &
    5302           0 :                        2.0_dp*spin_factor)
    5303             : 
    5304           0 :       CALL inverse_of_elements(almo_scf_env%opt_k_denom(ispin))
    5305             :       CALL dbcsr_filter(almo_scf_env%opt_k_denom(ispin), &
    5306           0 :                         eps_filter)
    5307             : 
    5308           0 :       CALL timestop(handle)
    5309             : 
    5310           0 :    END SUBROUTINE opt_k_create_preconditioner_blk
    5311             : 
    5312             : ! **************************************************************************************************
    5313             : !> \brief Applies a block-diagonal preconditioner for the optimization of
    5314             : !>        k matrix (preconditioner matrices must be calculated and stored
    5315             : !>        beforehand)
    5316             : !> \param almo_scf_env ...
    5317             : !> \param step ...
    5318             : !> \param grad ...
    5319             : !> \param ispin ...
    5320             : !> \par History
    5321             : !>       2011.10 created [Rustam Z Khaliullin]
    5322             : !> \author Rustam Z Khaliullin
    5323             : ! **************************************************************************************************
    5324           0 :    SUBROUTINE opt_k_apply_preconditioner_blk(almo_scf_env, step, grad, ispin)
    5325             : 
    5326             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5327             :       TYPE(dbcsr_type), INTENT(OUT)                      :: step
    5328             :       TYPE(dbcsr_type), INTENT(IN)                       :: grad
    5329             :       INTEGER, INTENT(IN)                                :: ispin
    5330             : 
    5331             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_apply_preconditioner_blk'
    5332             : 
    5333             :       INTEGER                                            :: handle
    5334             :       REAL(KIND=dp)                                      :: eps_filter
    5335             :       TYPE(dbcsr_type)                                   :: tmp_k
    5336             : 
    5337           0 :       CALL timeset(routineN, handle)
    5338             : 
    5339           0 :       eps_filter = almo_scf_env%eps_filter
    5340             : 
    5341           0 :       CALL dbcsr_create(tmp_k, template=almo_scf_env%matrix_k_blk(ispin))
    5342             : 
    5343             :       ! transform gradient to the correct "diagonal" basis
    5344             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5345             :                           grad, almo_scf_env%opt_k_t_rr(ispin), &
    5346           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5347             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    5348             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5349           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5350             : 
    5351             :       ! apply diagonal preconditioner
    5352             :       CALL dbcsr_hadamard_product(step, &
    5353           0 :                                   almo_scf_env%opt_k_denom(ispin), tmp_k)
    5354             : 
    5355             :       ! back-transform the result to the initial basis
    5356             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5357             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5358           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5359             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    5360             :                           step, almo_scf_env%opt_k_t_rr(ispin), &
    5361           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5362             : 
    5363           0 :       CALL dbcsr_copy(step, tmp_k)
    5364             : 
    5365           0 :       CALL dbcsr_release(tmp_k)
    5366             : 
    5367           0 :       CALL timestop(handle)
    5368             : 
    5369           0 :    END SUBROUTINE opt_k_apply_preconditioner_blk
    5370             : 
    5371             : !! **************************************************************************************************
    5372             : !!> \brief Reduce the number of virtual orbitals by rotating them within
    5373             : !!>        a domain. The rotation is such that minimizes the frobenius norm of
    5374             : !!>        the Fov domain-blocks of the discarded virtuals
    5375             : !!> \par History
    5376             : !!>       2011.08 created [Rustam Z Khaliullin]
    5377             : !!> \author Rustam Z Khaliullin
    5378             : !! **************************************************************************************************
    5379             : !  SUBROUTINE truncate_subspace_v_blk(qs_env,almo_scf_env)
    5380             : !
    5381             : !    TYPE(qs_environment_type), POINTER       :: qs_env
    5382             : !    TYPE(almo_scf_env_type)                  :: almo_scf_env
    5383             : !
    5384             : !    CHARACTER(len=*), PARAMETER :: routineN = 'truncate_subspace_v_blk', &
    5385             : !      routineP = moduleN//':'//routineN
    5386             : !
    5387             : !    INTEGER                                  :: handle, ispin, iblock_row, &
    5388             : !                                                iblock_col, iblock_row_size, &
    5389             : !                                                iblock_col_size, retained_v, &
    5390             : !                                                iteration, line_search_step, &
    5391             : !                                                unit_nr, line_search_step_last
    5392             : !    REAL(KIND=dp)                            :: t1, obj_function, grad_norm,&
    5393             : !                                                c0, b0, a0, obj_function_new,&
    5394             : !                                                t2, alpha, ff1, ff2, step1,&
    5395             : !                                                step2,&
    5396             : !                                                frob_matrix_base,&
    5397             : !                                                frob_matrix
    5398             : !    LOGICAL                                  :: safe_mode, converged, &
    5399             : !                                                prepare_to_exit, failure
    5400             : !    TYPE(cp_logger_type), POINTER            :: logger
    5401             : !    TYPE(dbcsr_type)                      :: Fon, Fov, Fov_filtered, &
    5402             : !                                                temp1_oo, temp2_oo, Fov_original, &
    5403             : !                                                temp0_ov, U_blk_tot, U_blk, &
    5404             : !                                                grad_blk, step_blk, matrix_filter, &
    5405             : !                                                v_full_new,v_full_tmp,&
    5406             : !                                                matrix_sigma_vv_full,&
    5407             : !                                                matrix_sigma_vv_full_sqrt,&
    5408             : !                                                matrix_sigma_vv_full_sqrt_inv,&
    5409             : !                                                matrix_tmp1,&
    5410             : !                                                matrix_tmp2
    5411             : !
    5412             : !    REAL(kind=dp), DIMENSION(:, :), POINTER  :: data_p, p_new_block
    5413             : !    TYPE(dbcsr_iterator_type)                  :: iter
    5414             : !
    5415             : !
    5416             : !REAL(kind=dp), DIMENSION(:), ALLOCATABLE     :: eigenvalues, WORK
    5417             : !REAL(kind=dp), DIMENSION(:,:), ALLOCATABLE   :: data_copy, left_vectors, right_vectors
    5418             : !INTEGER                                      :: LWORK, INFO
    5419             : !TYPE(dbcsr_type)                          :: temp_u_v_full_blk
    5420             : !
    5421             : !    CALL timeset(routineN,handle)
    5422             : !
    5423             : !    safe_mode=.TRUE.
    5424             : !
    5425             : !    ! get a useful output_unit
    5426             : !    logger => cp_get_default_logger()
    5427             : !    IF (logger%para_env%is_source()) THEN
    5428             : !       unit_nr=cp_logger_get_default_unit_nr(logger,local=.TRUE.)
    5429             : !    ELSE
    5430             : !       unit_nr=-1
    5431             : !    ENDIF
    5432             : !
    5433             : !    DO ispin=1,almo_scf_env%nspins
    5434             : !
    5435             : !       t1 = m_walltime()
    5436             : !
    5437             : !       !!!!!!!!!!!!!!!!!
    5438             : !       ! 0. Orthogonalize virtuals
    5439             : !       !    Unfortunately, we have to do it in the FULL V subspace :(
    5440             : !
    5441             : !       CALL dbcsr_init(v_full_new)
    5442             : !       CALL dbcsr_create(v_full_new,&
    5443             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5444             : !               matrix_type=dbcsr_type_no_symmetry)
    5445             : !
    5446             : !       ! project the occupied subspace out
    5447             : !       CALL almo_scf_p_out_from_v(almo_scf_env%matrix_v_full_blk(ispin),&
    5448             : !              v_full_new,almo_scf_env%matrix_ov_full(ispin),&
    5449             : !              ispin,almo_scf_env)
    5450             : !
    5451             : !       ! init overlap and its functions
    5452             : !       CALL dbcsr_init(matrix_sigma_vv_full)
    5453             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt)
    5454             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt_inv)
    5455             : !       CALL dbcsr_create(matrix_sigma_vv_full,&
    5456             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5457             : !               matrix_type=dbcsr_type_no_symmetry)
    5458             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt,&
    5459             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5460             : !               matrix_type=dbcsr_type_no_symmetry)
    5461             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt_inv,&
    5462             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5463             : !               matrix_type=dbcsr_type_no_symmetry)
    5464             : !
    5465             : !       ! construct VV overlap
    5466             : !       CALL almo_scf_mo_to_sigma(v_full_new,&
    5467             : !               matrix_sigma_vv_full,&
    5468             : !               almo_scf_env%matrix_s(1),&
    5469             : !               almo_scf_env%eps_filter)
    5470             : !
    5471             : !       IF (unit_nr>0) THEN
    5472             : !          WRITE(unit_nr,*) "sqrt and inv(sqrt) of the FULL virtual MO overlap"
    5473             : !       ENDIF
    5474             : !
    5475             : !       ! construct orthogonalization matrices
    5476             : !       CALL matrix_sqrt_Newton_Schulz(matrix_sigma_vv_full_sqrt,&
    5477             : !                                      matrix_sigma_vv_full_sqrt_inv,&
    5478             : !                                      matrix_sigma_vv_full,&
    5479             : !                                      threshold=almo_scf_env%eps_filter,&
    5480             : !                                      order=almo_scf_env%order_lanczos,&
    5481             : !                                      eps_lanczos=almo_scf_env%eps_lanczos,&
    5482             : !                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5483             : !       IF (safe_mode) THEN
    5484             : !          CALL dbcsr_init(matrix_tmp1)
    5485             : !          CALL dbcsr_create(matrix_tmp1,template=matrix_sigma_vv_full,&
    5486             : !                               matrix_type=dbcsr_type_no_symmetry)
    5487             : !          CALL dbcsr_init(matrix_tmp2)
    5488             : !          CALL dbcsr_create(matrix_tmp2,template=matrix_sigma_vv_full,&
    5489             : !                               matrix_type=dbcsr_type_no_symmetry)
    5490             : !
    5491             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_sigma_vv_full_sqrt_inv,&
    5492             : !                                 matrix_sigma_vv_full,&
    5493             : !                                 0.0_dp,matrix_tmp1,filter_eps=almo_scf_env%eps_filter)
    5494             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_tmp1,&
    5495             : !                                 matrix_sigma_vv_full_sqrt_inv,&
    5496             : !                                 0.0_dp,matrix_tmp2,filter_eps=almo_scf_env%eps_filter)
    5497             : !
    5498             : !          frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp2)
    5499             : !          CALL dbcsr_add_on_diag(matrix_tmp2,-1.0_dp)
    5500             : !          frob_matrix=dbcsr_frobenius_norm(matrix_tmp2)
    5501             : !          IF (unit_nr>0) THEN
    5502             : !             WRITE(unit_nr,*) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)",frob_matrix/frob_matrix_base
    5503             : !          ENDIF
    5504             : !
    5505             : !          CALL dbcsr_release(matrix_tmp1)
    5506             : !          CALL dbcsr_release(matrix_tmp2)
    5507             : !       ENDIF
    5508             : !
    5509             : !       ! discard unnecessary overlap functions
    5510             : !       CALL dbcsr_release(matrix_sigma_vv_full)
    5511             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt)
    5512             : !
    5513             : !! this can be re-written because we have (1-P)|v>
    5514             : !
    5515             : !       !!!!!!!!!!!!!!!!!!!
    5516             : !       ! 1. Compute F_ov
    5517             : !       CALL dbcsr_init(Fon)
    5518             : !       CALL dbcsr_create(Fon,&
    5519             : !               template=almo_scf_env%matrix_v_full_blk(ispin))
    5520             : !       CALL dbcsr_init(Fov)
    5521             : !       CALL dbcsr_create(Fov,&
    5522             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5523             : !       CALL dbcsr_init(Fov_filtered)
    5524             : !       CALL dbcsr_create(Fov_filtered,&
    5525             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5526             : !       CALL dbcsr_init(temp1_oo)
    5527             : !       CALL dbcsr_create(temp1_oo,&
    5528             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5529             : !               !matrix_type=dbcsr_type_no_symmetry)
    5530             : !       CALL dbcsr_init(temp2_oo)
    5531             : !       CALL dbcsr_create(temp2_oo,&
    5532             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5533             : !               matrix_type=dbcsr_type_no_symmetry)
    5534             : !
    5535             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5536             : !               almo_scf_env%matrix_ks_0deloc(ispin),&
    5537             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5538             : !
    5539             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5540             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5541             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5542             : !
    5543             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5544             : !               almo_scf_env%matrix_t_blk(ispin),&
    5545             : !               0.0_dp,temp1_oo,filter_eps=almo_scf_env%eps_filter)
    5546             : !
    5547             : !       CALL dbcsr_multiply("N","N",1.0_dp,temp1_oo,&
    5548             : !               almo_scf_env%matrix_sigma_inv(ispin),&
    5549             : !               0.0_dp,temp2_oo,filter_eps=almo_scf_env%eps_filter)
    5550             : !       CALL dbcsr_release(temp1_oo)
    5551             : !
    5552             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5553             : !               almo_scf_env%matrix_s(1),&
    5554             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5555             : !
    5556             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5557             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5558             : !               0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5559             : !       CALL dbcsr_release(Fon)
    5560             : !
    5561             : !       CALL dbcsr_multiply("N","N",-1.0_dp,temp2_oo,&
    5562             : !               Fov_filtered,&
    5563             : !               1.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5564             : !       CALL dbcsr_release(temp2_oo)
    5565             : !
    5566             : !       CALL dbcsr_multiply("N","N",1.0_dp,almo_scf_env%matrix_sigma_inv(ispin),&
    5567             : !               Fov,0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5568             : !
    5569             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fov_filtered,&
    5570             : !               matrix_sigma_vv_full_sqrt_inv,&
    5571             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5572             : !       !CALL dbcsr_copy(Fov,Fov_filtered)
    5573             : !CALL dbcsr_print(Fov)
    5574             : !
    5575             : !       IF (safe_mode) THEN
    5576             : !          CALL dbcsr_init(Fov_original)
    5577             : !          CALL dbcsr_create(Fov_original,template=Fov)
    5578             : !          CALL dbcsr_copy(Fov_original,Fov)
    5579             : !       ENDIF
    5580             : !
    5581             : !!! remove diagonal blocks
    5582             : !!CALL dbcsr_iterator_start(iter,Fov)
    5583             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5584             : !!
    5585             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5586             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5587             : !!
    5588             : !!   IF (iblock_row.eq.iblock_col) data_p(:,:)=0.0_dp
    5589             : !!
    5590             : !!ENDDO
    5591             : !!CALL dbcsr_iterator_stop(iter)
    5592             : !!CALL dbcsr_finalize(Fov)
    5593             : !
    5594             : !!! perform svd of blocks
    5595             : !!!!! THIS ROUTINE WORKS ONLY ON ONE CPU AND ONLY FOR 2 MOLECULES !!!
    5596             : !!CALL dbcsr_init(temp_u_v_full_blk)
    5597             : !!CALL dbcsr_create(temp_u_v_full_blk,&
    5598             : !!        template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5599             : !!        matrix_type=dbcsr_type_no_symmetry)
    5600             : !!
    5601             : !!CALL dbcsr_work_create(temp_u_v_full_blk,&
    5602             : !!        work_mutable=.TRUE.)
    5603             : !!CALL dbcsr_iterator_start(iter,Fov)
    5604             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5605             : !!
    5606             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5607             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5608             : !!
    5609             : !!   IF (iblock_row.ne.iblock_col) THEN
    5610             : !!
    5611             : !!      ! Prepare data
    5612             : !!      allocate(eigenvalues(min(iblock_row_size,iblock_col_size)))
    5613             : !!      allocate(data_copy(iblock_row_size,iblock_col_size))
    5614             : !!      allocate(left_vectors(iblock_row_size,iblock_row_size))
    5615             : !!      allocate(right_vectors(iblock_col_size,iblock_col_size))
    5616             : !!      data_copy(:,:)=data_p(:,:)
    5617             : !!
    5618             : !!      ! Query the optimal workspace for dgesvd
    5619             : !!      LWORK = -1
    5620             : !!      allocate(WORK(MAX(1,LWORK)))
    5621             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5622             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5623             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5624             : !!      LWORK = INT(WORK( 1 ))
    5625             : !!      deallocate(WORK)
    5626             : !!
    5627             : !!      ! Allocate the workspace and perform svd
    5628             : !!      allocate(WORK(MAX(1,LWORK)))
    5629             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5630             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5631             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5632             : !!      deallocate(WORK)
    5633             : !!      IF( INFO.NE.0 ) THEN
    5634             : !!         CPABORT("DGESVD failed")
    5635             : !!      END IF
    5636             : !!
    5637             : !!      ! copy right singular vectors into a unitary matrix
    5638             : !!      NULLIFY (p_new_block)
    5639             : !!      CALL dbcsr_reserve_block2d(temp_u_v_full_blk,iblock_col,iblock_col,p_new_block)
    5640             : !!      CPASSERT(ASSOCIATED(p_new_block))
    5641             : !!      p_new_block(:,:) = right_vectors(:,:)
    5642             : !!
    5643             : !!      deallocate(eigenvalues)
    5644             : !!      deallocate(data_copy)
    5645             : !!      deallocate(left_vectors)
    5646             : !!      deallocate(right_vectors)
    5647             : !!
    5648             : !!   ENDIF
    5649             : !!ENDDO
    5650             : !!CALL dbcsr_iterator_stop(iter)
    5651             : !!CALL dbcsr_finalize(temp_u_v_full_blk)
    5652             : !!!CALL dbcsr_print(temp_u_v_full_blk)
    5653             : !!CALL dbcsr_multiply("N","T",1.0_dp,Fov,temp_u_v_full_blk,&
    5654             : !!        0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5655             : !!
    5656             : !!CALL dbcsr_copy(Fov,Fov_filtered)
    5657             : !!CALL dbcsr_print(Fov)
    5658             : !
    5659             : !       !!!!!!!!!!!!!!!!!!!
    5660             : !       ! 2. Initialize variables
    5661             : !
    5662             : !       ! temp space
    5663             : !       CALL dbcsr_init(temp0_ov)
    5664             : !       CALL dbcsr_create(temp0_ov,&
    5665             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5666             : !
    5667             : !       ! current unitary matrix
    5668             : !       CALL dbcsr_init(U_blk)
    5669             : !       CALL dbcsr_create(U_blk,&
    5670             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5671             : !               matrix_type=dbcsr_type_no_symmetry)
    5672             : !
    5673             : !       ! unitary matrix accumulator
    5674             : !       CALL dbcsr_init(U_blk_tot)
    5675             : !       CALL dbcsr_create(U_blk_tot,&
    5676             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5677             : !               matrix_type=dbcsr_type_no_symmetry)
    5678             : !       CALL dbcsr_add_on_diag(U_blk_tot,1.0_dp)
    5679             : !
    5680             : !!CALL dbcsr_add_on_diag(U_blk,1.0_dp)
    5681             : !!CALL dbcsr_multiply("N","T",1.0_dp,U_blk,temp_u_v_full_blk,&
    5682             : !!        0.0_dp,U_blk_tot,filter_eps=almo_scf_env%eps_filter)
    5683             : !!
    5684             : !!CALL dbcsr_release(temp_u_v_full_blk)
    5685             : !
    5686             : !       ! init gradient
    5687             : !       CALL dbcsr_init(grad_blk)
    5688             : !       CALL dbcsr_create(grad_blk,&
    5689             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5690             : !               matrix_type=dbcsr_type_no_symmetry)
    5691             : !
    5692             : !       ! init step matrix
    5693             : !       CALL dbcsr_init(step_blk)
    5694             : !       CALL dbcsr_create(step_blk,&
    5695             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5696             : !               matrix_type=dbcsr_type_no_symmetry)
    5697             : !
    5698             : !       ! "retain discarded" filter (0.0 - retain, 1.0 - discard)
    5699             : !       CALL dbcsr_init(matrix_filter)
    5700             : !       CALL dbcsr_create(matrix_filter,&
    5701             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5702             : !       ! copy Fov into the filter matrix temporarily
    5703             : !       ! so we know which blocks contain significant elements
    5704             : !       CALL dbcsr_copy(matrix_filter,Fov)
    5705             : !
    5706             : !       ! fill out filter elements block-by-block
    5707             : !       CALL dbcsr_iterator_start(iter,matrix_filter)
    5708             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5709             : !
    5710             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5711             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5712             : !
    5713             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5714             : !
    5715             : !          data_p(:,1:retained_v)=0.0_dp
    5716             : !          data_p(:,(retained_v+1):iblock_col_size)=1.0_dp
    5717             : !
    5718             : !       ENDDO
    5719             : !       CALL dbcsr_iterator_stop(iter)
    5720             : !       CALL dbcsr_finalize(matrix_filter)
    5721             : !
    5722             : !       ! apply the filter
    5723             : !       CALL dbcsr_hadamard_product(Fov,matrix_filter,Fov_filtered)
    5724             : !
    5725             : !       !!!!!!!!!!!!!!!!!!!!!
    5726             : !       ! 3. start iterative minimization of the elements to be discarded
    5727             : !       iteration=0
    5728             : !       converged=.FALSE.
    5729             : !       prepare_to_exit=.FALSE.
    5730             : !       DO
    5731             : !
    5732             : !          iteration=iteration+1
    5733             : !
    5734             : !          !!!!!!!!!!!!!!!!!!!!!!!!!
    5735             : !          ! 4. compute the gradient
    5736             : !          CALL dbcsr_set(grad_blk,0.0_dp)
    5737             : !          ! create the diagonal blocks only
    5738             : !          CALL dbcsr_add_on_diag(grad_blk,1.0_dp)
    5739             : !
    5740             : !          CALL dbcsr_multiply("T","N",2.0_dp,Fov_filtered,Fov,&
    5741             : !                  0.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5742             : !                  filter_eps=almo_scf_env%eps_filter)
    5743             : !          CALL dbcsr_multiply("T","N",-2.0_dp,Fov,Fov_filtered,&
    5744             : !                  1.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5745             : !                  filter_eps=almo_scf_env%eps_filter)
    5746             : !
    5747             : !          !!!!!!!!!!!!!!!!!!!!!!!
    5748             : !          ! 5. check convergence
    5749             : !          obj_function = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5750             : !          grad_norm = dbcsr_frobenius_norm(grad_blk)
    5751             : !          converged=(grad_norm.lt.almo_scf_env%truncate_v_eps_convergence)
    5752             : !          IF (converged.OR.(iteration.ge.almo_scf_env%truncate_v_max_iter)) THEN
    5753             : !             prepare_to_exit=.TRUE.
    5754             : !          ENDIF
    5755             : !
    5756             : !          IF (.NOT.prepare_to_exit) THEN
    5757             : !
    5758             : !             !!!!!!!!!!!!!!!!!!!!!!!
    5759             : !             ! 6. perform steps in the direction of the gradient
    5760             : !             !    a. first, perform a trial step to "see" the parameters
    5761             : !             !       of the parabola along the gradient:
    5762             : !             !       a0 * x^2 + b0 * x + c0
    5763             : !             !    b. then perform the step to the bottom of the parabola
    5764             : !
    5765             : !             ! get c0
    5766             : !             c0 = obj_function
    5767             : !             ! get b0 <= d_f/d_alpha along grad
    5768             : !             !!!CALL dbcsr_multiply("N","N",4.0_dp,Fov,grad_blk,&
    5769             : !             !!!        0.0_dp,temp0_ov,&
    5770             : !             !!!        filter_eps=almo_scf_env%eps_filter)
    5771             : !             !!!CALL dbcsr_dot(Fov_filtered,temp0_ov,b0)
    5772             : !
    5773             : !             alpha=almo_scf_env%truncate_v_trial_step_size
    5774             : !
    5775             : !             line_search_step_last=3
    5776             : !             DO line_search_step=1,line_search_step_last
    5777             : !                CALL dbcsr_copy(step_blk,grad_blk)
    5778             : !                CALL dbcsr_scale(step_blk,-1.0_dp*alpha)
    5779             : !                CALL generator_to_unitary(step_blk,U_blk,&
    5780             : !                        almo_scf_env%eps_filter)
    5781             : !                CALL dbcsr_multiply("N","N",1.0_dp,Fov,U_blk,0.0_dp,temp0_ov,&
    5782             : !                        filter_eps=almo_scf_env%eps_filter)
    5783             : !                CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5784             : !                        Fov_filtered)
    5785             : !
    5786             : !                obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5787             : !                IF (line_search_step.eq.1) THEN
    5788             : !                   ff1 = obj_function_new
    5789             : !                   step1 = alpha
    5790             : !                ELSE IF (line_search_step.eq.2) THEN
    5791             : !                   ff2 = obj_function_new
    5792             : !                   step2 = alpha
    5793             : !                ENDIF
    5794             : !
    5795             : !                IF (unit_nr>0.AND.(line_search_step.ne.line_search_step_last)) THEN
    5796             : !                   WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3)') &
    5797             : !                         "JOINT_SVD_lin",&
    5798             : !                         iteration,&
    5799             : !                         alpha,&
    5800             : !                         obj_function,&
    5801             : !                         obj_function_new,&
    5802             : !                         obj_function_new-obj_function
    5803             : !                ENDIF
    5804             : !
    5805             : !                IF (line_search_step.eq.1) THEN
    5806             : !                   alpha=2.0_dp*alpha
    5807             : !                ENDIF
    5808             : !                IF (line_search_step.eq.2) THEN
    5809             : !                   a0 = ((ff1-c0)/step1 - (ff2-c0)/step2) / (step1 - step2)
    5810             : !                   b0 = (ff1-c0)/step1 - a0*step1
    5811             : !                   ! step size in to the bottom of "the parabola"
    5812             : !                   alpha=-b0/(2.0_dp*a0)
    5813             : !                   ! update the default step size
    5814             : !                   almo_scf_env%truncate_v_trial_step_size=alpha
    5815             : !                ENDIF
    5816             : !                !!!IF (line_search_step.eq.1) THEN
    5817             : !                !!!   a0 = (obj_function_new - b0 * alpha - c0) / (alpha*alpha)
    5818             : !                !!!   ! step size in to the bottom of "the parabola"
    5819             : !                !!!   alpha=-b0/(2.0_dp*a0)
    5820             : !                !!!   !IF (alpha.gt.10.0_dp) alpha=10.0_dp
    5821             : !                !!!ENDIF
    5822             : !
    5823             : !             ENDDO
    5824             : !
    5825             : !             ! update Fov and U_blk_tot (use grad_blk as tmp storage)
    5826             : !             CALL dbcsr_copy(Fov,temp0_ov)
    5827             : !             CALL dbcsr_multiply("N","N",1.0_dp,U_blk_tot,U_blk,&
    5828             : !                     0.0_dp,grad_blk,&
    5829             : !                     filter_eps=almo_scf_env%eps_filter)
    5830             : !             CALL dbcsr_copy(U_blk_tot,grad_blk)
    5831             : !
    5832             : !          ENDIF
    5833             : !
    5834             : !          t2 = m_walltime()
    5835             : !
    5836             : !          IF (unit_nr>0) THEN
    5837             : !             WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3,E12.3,F10.3)') &
    5838             : !                   "JOINT_SVD_itr",&
    5839             : !                   iteration,&
    5840             : !                   alpha,&
    5841             : !                   obj_function,&
    5842             : !                   obj_function_new,&
    5843             : !                   obj_function_new-obj_function,&
    5844             : !                   grad_norm,&
    5845             : !                   t2-t1
    5846             : !                   !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    5847             : !             CALL m_flush(unit_nr)
    5848             : !          ENDIF
    5849             : !
    5850             : !          t1 = m_walltime()
    5851             : !
    5852             : !          IF (prepare_to_exit) EXIT
    5853             : !
    5854             : !       ENDDO ! stop iterations
    5855             : !
    5856             : !       IF (safe_mode) THEN
    5857             : !          CALL dbcsr_multiply("N","N",1.0_dp,Fov_original,&
    5858             : !                  U_blk_tot,0.0_dp,temp0_ov,&
    5859             : !                  filter_eps=almo_scf_env%eps_filter)
    5860             : !CALL dbcsr_print(temp0_ov)
    5861             : !          CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5862             : !                  Fov_filtered)
    5863             : !          obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5864             : !
    5865             : !          IF (unit_nr>0) THEN
    5866             : !             WRITE(unit_nr,'(T6,A,1X,E12.3)') &
    5867             : !                   "SANITY CHECK:",&
    5868             : !                   obj_function_new
    5869             : !             CALL m_flush(unit_nr)
    5870             : !          ENDIF
    5871             : !
    5872             : !          CALL dbcsr_release(Fov_original)
    5873             : !       ENDIF
    5874             : !
    5875             : !       CALL dbcsr_release(temp0_ov)
    5876             : !       CALL dbcsr_release(U_blk)
    5877             : !       CALL dbcsr_release(grad_blk)
    5878             : !       CALL dbcsr_release(step_blk)
    5879             : !       CALL dbcsr_release(matrix_filter)
    5880             : !       CALL dbcsr_release(Fov)
    5881             : !       CALL dbcsr_release(Fov_filtered)
    5882             : !
    5883             : !       ! compute rotated virtual orbitals
    5884             : !       CALL dbcsr_init(v_full_tmp)
    5885             : !       CALL dbcsr_create(v_full_tmp,&
    5886             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5887             : !               matrix_type=dbcsr_type_no_symmetry)
    5888             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5889             : !               v_full_new,&
    5890             : !               matrix_sigma_vv_full_sqrt_inv,0.0_dp,v_full_tmp,&
    5891             : !               filter_eps=almo_scf_env%eps_filter)
    5892             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5893             : !               v_full_tmp,&
    5894             : !               U_blk_tot,0.0_dp,v_full_new,&
    5895             : !               filter_eps=almo_scf_env%eps_filter)
    5896             : !
    5897             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt_inv)
    5898             : !       CALL dbcsr_release(v_full_tmp)
    5899             : !       CALL dbcsr_release(U_blk_tot)
    5900             : !
    5901             : !!!!! orthogonalized virtuals are not blocked
    5902             : !       ! copy new virtuals into the truncated matrix
    5903             : !       !CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin),&
    5904             : !       CALL dbcsr_work_create(almo_scf_env%matrix_v(ispin),&
    5905             : !               work_mutable=.TRUE.)
    5906             : !       CALL dbcsr_iterator_start(iter,v_full_new)
    5907             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5908             : !
    5909             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5910             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5911             : !
    5912             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5913             : !
    5914             : !          NULLIFY (p_new_block)
    5915             : !          !CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin),&
    5916             : !          CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v(ispin),&
    5917             : !                  iblock_row,iblock_col,p_new_block)
    5918             : !          CPASSERT(ASSOCIATED(p_new_block))
    5919             : !          CPASSERT(retained_v.gt.0)
    5920             : !          p_new_block(:,:) = data_p(:,1:retained_v)
    5921             : !
    5922             : !       ENDDO ! iterator
    5923             : !       CALL dbcsr_iterator_stop(iter)
    5924             : !       !!CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    5925             : !       CALL dbcsr_finalize(almo_scf_env%matrix_v(ispin))
    5926             : !
    5927             : !       CALL dbcsr_release(v_full_new)
    5928             : !
    5929             : !    ENDDO ! ispin
    5930             : !
    5931             : !    CALL timestop(handle)
    5932             : !
    5933             : !  END SUBROUTINE truncate_subspace_v_blk
    5934             : 
    5935             : ! **************************************************************************************************
    5936             : !> \brief Compute the gradient wrt the main variable (e.g. Theta, X)
    5937             : !> \param m_grad_out ...
    5938             : !> \param m_ks ...
    5939             : !> \param m_s ...
    5940             : !> \param m_t ...
    5941             : !> \param m_t0 ...
    5942             : !> \param m_siginv ...
    5943             : !> \param m_quench_t ...
    5944             : !> \param m_FTsiginv ...
    5945             : !> \param m_siginvTFTsiginv ...
    5946             : !> \param m_ST ...
    5947             : !> \param m_STsiginv0 ...
    5948             : !> \param m_theta ...
    5949             : !> \param domain_s_inv ...
    5950             : !> \param domain_r_down ...
    5951             : !> \param cpu_of_domain ...
    5952             : !> \param domain_map ...
    5953             : !> \param assume_t0_q0x ...
    5954             : !> \param optimize_theta ...
    5955             : !> \param normalize_orbitals ...
    5956             : !> \param penalty_occ_vol ...
    5957             : !> \param penalty_occ_local ...
    5958             : !> \param penalty_occ_vol_prefactor ...
    5959             : !> \param envelope_amplitude ...
    5960             : !> \param eps_filter ...
    5961             : !> \param spin_factor ...
    5962             : !> \param special_case ...
    5963             : !> \param m_sig_sqrti_ii ...
    5964             : !> \param op_sm_set ...
    5965             : !> \param weights ...
    5966             : !> \param energy_coeff ...
    5967             : !> \param localiz_coeff ...
    5968             : !> \par History
    5969             : !>       2015.03 created [Rustam Z Khaliullin]
    5970             : !> \author Rustam Z Khaliullin
    5971             : ! **************************************************************************************************
    5972        1474 :    SUBROUTINE compute_gradient(m_grad_out, m_ks, m_s, m_t, m_t0, &
    5973             :                                m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5974        1474 :                                m_theta, domain_s_inv, domain_r_down, &
    5975        1474 :                                cpu_of_domain, domain_map, assume_t0_q0x, optimize_theta, &
    5976             :                                normalize_orbitals, penalty_occ_vol, penalty_occ_local, &
    5977             :                                penalty_occ_vol_prefactor, envelope_amplitude, eps_filter, spin_factor, &
    5978        1474 :                                special_case, m_sig_sqrti_ii, op_sm_set, weights, energy_coeff, &
    5979             :                                localiz_coeff)
    5980             : 
    5981             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out, m_ks, m_s, m_t, m_t0, &
    5982             :                                                             m_siginv, m_quench_t, m_FTsiginv, &
    5983             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5984             :                                                             m_theta
    5985             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    5986             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    5987             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    5988             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    5989             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, optimize_theta, &
    5990             :                                                             normalize_orbitals, penalty_occ_vol
    5991             :       LOGICAL, INTENT(IN), OPTIONAL                      :: penalty_occ_local
    5992             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    5993             :                                                             envelope_amplitude, eps_filter, &
    5994             :                                                             spin_factor
    5995             :       INTEGER, INTENT(IN)                                :: special_case
    5996             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: m_sig_sqrti_ii
    5997             :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL, &
    5998             :          POINTER                                         :: op_sm_set
    5999             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: weights
    6000             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: energy_coeff, localiz_coeff
    6001             : 
    6002             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_gradient'
    6003             : 
    6004             :       INTEGER                                            :: dim0, handle, idim0, nao, reim
    6005             :       LOGICAL                                            :: my_penalty_local
    6006             :       REAL(KIND=dp)                                      :: coeff, energy_g_norm, my_energy_coeff, &
    6007             :                                                             my_localiz_coeff, &
    6008             :                                                             penalty_occ_vol_g_norm
    6009        1474 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    6010             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_no_3, &
    6011             :                                                             m_tmp_oo_1, m_tmp_oo_2, temp1, temp2, &
    6012             :                                                             tempNOcc1, tempOccOcc1
    6013             : 
    6014        1474 :       CALL timeset(routineN, handle)
    6015             : 
    6016        1474 :       IF (normalize_orbitals .AND. (.NOT. PRESENT(m_sig_sqrti_ii))) THEN
    6017           0 :          CPABORT("Normalization matrix is required")
    6018             :       END IF
    6019             : 
    6020        1474 :       my_penalty_local = .FALSE.
    6021        1474 :       my_localiz_coeff = 1.0_dp
    6022        1474 :       my_energy_coeff = 0.0_dp
    6023        1474 :       IF (PRESENT(localiz_coeff)) THEN
    6024        1048 :          my_localiz_coeff = localiz_coeff
    6025             :       END IF
    6026        1474 :       IF (PRESENT(energy_coeff)) THEN
    6027        1048 :          my_energy_coeff = energy_coeff
    6028             :       END IF
    6029        1474 :       IF (PRESENT(penalty_occ_local)) THEN
    6030        1048 :          my_penalty_local = penalty_occ_local
    6031             :       END IF
    6032             : 
    6033             :       ! use this otherways unused variables
    6034        1474 :       CALL dbcsr_get_info(matrix=m_ks, nfullrows_total=nao)
    6035        1474 :       CALL dbcsr_get_info(matrix=m_s, nfullrows_total=nao)
    6036        1474 :       CALL dbcsr_get_info(matrix=m_t, nfullrows_total=nao)
    6037             : 
    6038             :       CALL dbcsr_create(m_tmp_no_1, &
    6039             :                         template=m_quench_t, &
    6040        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6041             :       CALL dbcsr_create(m_tmp_no_2, &
    6042             :                         template=m_quench_t, &
    6043        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6044             :       CALL dbcsr_create(m_tmp_no_3, &
    6045             :                         template=m_quench_t, &
    6046        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6047             :       CALL dbcsr_create(m_tmp_oo_1, &
    6048             :                         template=m_siginv, &
    6049        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6050             :       CALL dbcsr_create(m_tmp_oo_2, &
    6051             :                         template=m_siginv, &
    6052        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6053             :       CALL dbcsr_create(tempNOcc1, &
    6054             :                         template=m_t, &
    6055        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6056             :       CALL dbcsr_create(tempOccOcc1, &
    6057             :                         template=m_siginv, &
    6058        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6059             :       CALL dbcsr_create(temp1, &
    6060             :                         template=m_t, &
    6061        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6062             :       CALL dbcsr_create(temp2, &
    6063             :                         template=m_t, &
    6064        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6065             : 
    6066             :       ! do d_E/d_T first
    6067             :       !IF (.NOT.PRESENT(m_FTsiginv)) THEN
    6068             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6069             :       !           m_ks,&
    6070             :       !           m_t,&
    6071             :       !           0.0_dp,m_tmp_no_1,&
    6072             :       !           filter_eps=eps_filter)
    6073             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6074             :       !           m_tmp_no_1,&
    6075             :       !           m_siginv,&
    6076             :       !           0.0_dp,m_FTsiginv,&
    6077             :       !           filter_eps=eps_filter)
    6078             :       !ENDIF
    6079             : 
    6080        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_quench_t)
    6081        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_FTsiginv, keep_sparsity=.TRUE.)
    6082             : 
    6083             :       !IF (.NOT.PRESENT(m_siginvTFTsiginv)) THEN
    6084             :       !   CALL dbcsr_multiply("T","N",1.0_dp,&
    6085             :       !           m_t,&
    6086             :       !           m_FTsiginv,&
    6087             :       !           0.0_dp,m_tmp_oo_1,&
    6088             :       !           filter_eps=eps_filter)
    6089             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6090             :       !           m_siginv,&
    6091             :       !           m_tmp_oo_1,&
    6092             :       !           0.0_dp,m_siginvTFTsiginv,&
    6093             :       !           filter_eps=eps_filter)
    6094             :       !ENDIF
    6095             : 
    6096             :       !IF (.NOT.PRESENT(m_ST)) THEN
    6097             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6098             :       !           m_s,&
    6099             :       !           m_t,&
    6100             :       !           0.0_dp,m_ST,&
    6101             :       !           filter_eps=eps_filter)
    6102             :       !ENDIF
    6103             : 
    6104             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6105             :                           m_ST, &
    6106             :                           m_siginvTFTsiginv, &
    6107             :                           1.0_dp, m_tmp_no_2, &
    6108        1474 :                           retain_sparsity=.TRUE.)
    6109        1474 :       CALL dbcsr_scale(m_tmp_no_2, 2.0_dp*spin_factor)
    6110             : 
    6111             :       ! LzL Add gradient for Localization
    6112        1474 :       IF (my_penalty_local) THEN
    6113             : 
    6114           0 :          CALL dbcsr_set(temp2, 0.0_dp) ! accumulate the localization gradient here
    6115             : 
    6116           0 :          DO idim0 = 1, SIZE(op_sm_set, 2) ! this loop is over miller ind
    6117             : 
    6118           0 :             DO reim = 1, SIZE(op_sm_set, 1) ! this loop is over Re/Im
    6119             : 
    6120             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6121             :                                    op_sm_set(reim, idim0)%matrix, &
    6122             :                                    m_t, &
    6123             :                                    0.0_dp, tempNOcc1, &
    6124           0 :                                    filter_eps=eps_filter)
    6125             : 
    6126             :                ! warning - save time by computing only the diagonal elements
    6127             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6128             :                                    m_t, &
    6129             :                                    tempNOcc1, &
    6130             :                                    0.0_dp, tempOccOcc1, &
    6131           0 :                                    filter_eps=eps_filter)
    6132             : 
    6133           0 :                CALL dbcsr_get_info(tempOccOcc1, nfullrows_total=dim0)
    6134           0 :                ALLOCATE (tg_diagonal(dim0))
    6135           0 :                CALL dbcsr_get_diag(tempOccOcc1, tg_diagonal)
    6136           0 :                CALL dbcsr_set(tempOccOcc1, 0.0_dp)
    6137           0 :                CALL dbcsr_set_diag(tempOccOcc1, tg_diagonal)
    6138           0 :                DEALLOCATE (tg_diagonal)
    6139             : 
    6140             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6141             :                                    tempNOcc1, &
    6142             :                                    tempOccOcc1, &
    6143             :                                    0.0_dp, temp1, &
    6144           0 :                                    filter_eps=eps_filter)
    6145             : 
    6146             :             END DO
    6147             : 
    6148             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6149             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6150           0 :                CPABORT("Localization function is not implemented")
    6151             :                !coeff = -(weights(idim0)/z2(ielem))
    6152             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6153           0 :                coeff = -weights(idim0)
    6154             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6155             :                CPABORT("Localization function is not implemented")
    6156             :                !coeff = -(weights(idim0)/(2.0_dp*z2(ielem)))
    6157             :             END SELECT
    6158           0 :             CALL dbcsr_add(temp2, temp1, 1.0_dp, coeff)
    6159             :             !CALL dbcsr_add(grad_loc, temp1, 1.0_dp, 1.0_dp)
    6160             : 
    6161             :          END DO ! end loop over idim0
    6162           0 :          CALL dbcsr_add(m_tmp_no_2, temp2, my_energy_coeff, my_localiz_coeff*4.0_dp)
    6163             :       END IF
    6164             : 
    6165             :       ! add penalty on the occupied volume: det(sigma)
    6166        1474 :       IF (penalty_occ_vol) THEN
    6167             :          !RZK-warning CALL dbcsr_multiply("N","N",&
    6168             :          !RZK-warning         penalty_occ_vol_prefactor,&
    6169             :          !RZK-warning         m_ST,&
    6170             :          !RZK-warning         m_siginv,&
    6171             :          !RZK-warning         1.0_dp,m_tmp_no_2,&
    6172             :          !RZK-warning         retain_sparsity=.TRUE.,&
    6173             :          !RZK-warning         )
    6174           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6175             :          CALL dbcsr_multiply("N", "N", &
    6176             :                              penalty_occ_vol_prefactor, &
    6177             :                              m_ST, &
    6178             :                              m_siginv, &
    6179             :                              0.0_dp, m_tmp_no_1, &
    6180           0 :                              retain_sparsity=.TRUE.)
    6181             :          ! this norm does not contain the normalization factors
    6182           0 :          penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6183           0 :          energy_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6184             :          !WRITE (*, "(A30,2F20.10)") "Energy/penalty g norms (no norm): ", energy_g_norm, penalty_occ_vol_g_norm
    6185           0 :          CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, 1.0_dp)
    6186             :       END IF
    6187             : 
    6188             :       ! take into account the factor from the normalization constraint
    6189        1474 :       IF (normalize_orbitals) THEN
    6190             : 
    6191             :          ! G = ( G - ST.[tr(T).G]_ii ) . [sig_sqrti]_ii
    6192             :          ! this expression can be simplified to
    6193             :          ! G = ( G - c0*ST ) . [sig_sqrti]_ii
    6194             :          ! where c0 = penalty_occ_vol_prefactor
    6195             :          ! This is because tr(T).G_Energy = 0 and
    6196             :          !                 tr(T).G_Penalty = c0*I
    6197             : 
    6198             :          !! faster way to take the norm into account (tested for vol penalty olny)
    6199             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6200             :          !!CALL dbcsr_copy(m_tmp_no_1, m_ST, keep_sparsity=.TRUE.)
    6201             :          !!CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, -penalty_occ_vol_prefactor)
    6202             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6203             :          !!CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6204             :          !!                    m_tmp_no_2, &
    6205             :          !!                    m_sig_sqrti_ii, &
    6206             :          !!                    0.0_dp, m_tmp_no_1, &
    6207             :          !!                    retain_sparsity=.TRUE.)
    6208             : 
    6209             :          ! slower way of taking the norm into account
    6210           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6211             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6212             :                              m_tmp_no_2, &
    6213             :                              m_sig_sqrti_ii, &
    6214             :                              0.0_dp, m_tmp_no_1, &
    6215           0 :                              retain_sparsity=.TRUE.)
    6216             : 
    6217             :          ! get [tr(T).G]_ii
    6218           0 :          CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii)
    6219             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6220             :                              m_t, &
    6221             :                              m_tmp_no_2, &
    6222             :                              0.0_dp, m_tmp_oo_1, &
    6223           0 :                              retain_sparsity=.TRUE.)
    6224             : 
    6225           0 :          CALL dbcsr_get_info(m_sig_sqrti_ii, nfullrows_total=dim0)
    6226           0 :          ALLOCATE (tg_diagonal(dim0))
    6227           0 :          CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    6228           0 :          CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    6229           0 :          CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    6230           0 :          DEALLOCATE (tg_diagonal)
    6231             : 
    6232             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6233             :                              m_sig_sqrti_ii, &
    6234             :                              m_tmp_oo_1, &
    6235             :                              0.0_dp, m_tmp_oo_2, &
    6236           0 :                              filter_eps=eps_filter)
    6237             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6238             :                              m_ST, &
    6239             :                              m_tmp_oo_2, &
    6240             :                              1.0_dp, m_tmp_no_1, &
    6241           0 :                              retain_sparsity=.TRUE.)
    6242             : 
    6243             :       ELSE
    6244             : 
    6245        1474 :          CALL dbcsr_copy(m_tmp_no_1, m_tmp_no_2)
    6246             : 
    6247             :       END IF ! normalize_orbitals
    6248             : 
    6249             :       ! project out the occupied space from the gradient
    6250        1474 :       IF (assume_t0_q0x) THEN
    6251         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6252         160 :             CALL dbcsr_copy(m_grad_out, m_tmp_no_1)
    6253             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6254             :                                 m_t0, &
    6255             :                                 m_grad_out, &
    6256             :                                 0.0_dp, m_tmp_oo_1, &
    6257         160 :                                 filter_eps=eps_filter)
    6258             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6259             :                                 m_STsiginv0, &
    6260             :                                 m_tmp_oo_1, &
    6261             :                                 1.0_dp, m_grad_out, &
    6262         160 :                                 filter_eps=eps_filter)
    6263         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6264           0 :             CPABORT("Cannot project the zero-order space from itself")
    6265             :          ELSE
    6266             :             ! no special case: normal xALMOs
    6267             :             CALL apply_domain_operators( &
    6268             :                matrix_in=m_tmp_no_1, &
    6269             :                matrix_out=m_grad_out, &
    6270             :                operator2=domain_r_down(:), &
    6271             :                operator1=domain_s_inv(:), &
    6272             :                dpattern=m_quench_t, &
    6273             :                map=domain_map, &
    6274             :                node_of_domain=cpu_of_domain, &
    6275             :                my_action=1, &
    6276             :                filter_eps=eps_filter, &
    6277             :                !matrix_trimmer=,&
    6278         306 :                use_trimmer=.FALSE.)
    6279             :          END IF ! my_special_case
    6280         466 :          CALL dbcsr_copy(m_tmp_no_1, m_grad_out)
    6281             :       END IF
    6282             : 
    6283             :       !! check whether the gradient lies entirely in R or Q
    6284             :       !CALL dbcsr_multiply("T","N",1.0_dp,&
    6285             :       !        m_t,&
    6286             :       !        m_tmp_no_1,&
    6287             :       !        0.0_dp,m_tmp_oo_1,&
    6288             :       !        filter_eps=eps_filter,&
    6289             :       !        )
    6290             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    6291             :       !        m_siginv,&
    6292             :       !        m_tmp_oo_1,&
    6293             :       !        0.0_dp,m_tmp_oo_2,&
    6294             :       !        filter_eps=eps_filter,&
    6295             :       !        )
    6296             :       !CALL dbcsr_copy(m_tmp_no_2,m_tmp_no_1)
    6297             :       !CALL dbcsr_multiply("N","N",-1.0_dp,&
    6298             :       !        m_ST,&
    6299             :       !        m_tmp_oo_2,&
    6300             :       !        1.0_dp,m_tmp_no_2,&
    6301             :       !        retain_sparsity=.TRUE.,&
    6302             :       !        )
    6303             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6304             :       !WRITE(*,"(A50,2F20.10)") "Virtual-space projection of the gradient", penalty_occ_vol_g_norm
    6305             :       !CALL dbcsr_add(m_tmp_no_2,m_tmp_no_1,1.0_dp,-1.0_dp)
    6306             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6307             :       !WRITE(*,"(A50,2F20.10)") "Occupied-space projection of the gradient", penalty_occ_vol_g_norm
    6308             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6309             :       !WRITE(*,"(A50,2F20.10)") "Full gradient", penalty_occ_vol_g_norm
    6310             : 
    6311             :       ! transform d_E/d_T to d_E/d_theta
    6312        1474 :       IF (optimize_theta) THEN
    6313           0 :          CALL dbcsr_copy(m_tmp_no_2, m_theta)
    6314           0 :          CALL dtanh_of_elements(m_tmp_no_2, alpha=1.0_dp/envelope_amplitude)
    6315           0 :          CALL dbcsr_scale(m_tmp_no_2, envelope_amplitude)
    6316           0 :          CALL dbcsr_set(m_tmp_no_3, 0.0_dp)
    6317           0 :          CALL dbcsr_filter(m_tmp_no_3, eps_filter)
    6318             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6319             :                                      m_tmp_no_2, &
    6320           0 :                                      m_tmp_no_3)
    6321             :          CALL dbcsr_hadamard_product(m_tmp_no_3, &
    6322             :                                      m_quench_t, &
    6323           0 :                                      m_grad_out)
    6324             :       ELSE ! simply copy
    6325             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6326             :                                      m_quench_t, &
    6327        1474 :                                      m_grad_out)
    6328             :       END IF
    6329        1474 :       CALL dbcsr_filter(m_grad_out, eps_filter)
    6330             : 
    6331        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6332        1474 :       CALL dbcsr_release(m_tmp_no_2)
    6333        1474 :       CALL dbcsr_release(m_tmp_no_3)
    6334        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6335        1474 :       CALL dbcsr_release(m_tmp_oo_2)
    6336        1474 :       CALL dbcsr_release(tempNOcc1)
    6337        1474 :       CALL dbcsr_release(tempOccOcc1)
    6338        1474 :       CALL dbcsr_release(temp1)
    6339        1474 :       CALL dbcsr_release(temp2)
    6340             : 
    6341        1474 :       CALL timestop(handle)
    6342             : 
    6343        2948 :    END SUBROUTINE compute_gradient
    6344             : 
    6345             : ! **************************************************************************************************
    6346             : !> \brief Serial code that prints matrices readable by Mathematica
    6347             : !> \param matrix - matrix to print
    6348             : !> \param filename ...
    6349             : !> \par History
    6350             : !>       2015.05 created [Rustam Z. Khaliullin]
    6351             : !> \author Rustam Z. Khaliullin
    6352             : ! **************************************************************************************************
    6353           0 :    SUBROUTINE print_mathematica_matrix(matrix, filename)
    6354             : 
    6355             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
    6356             :       CHARACTER(len=*), INTENT(IN)                       :: filename
    6357             : 
    6358             :       CHARACTER(len=*), PARAMETER :: routineN = 'print_mathematica_matrix'
    6359             : 
    6360             :       CHARACTER(LEN=20)                                  :: formatstr, Scols
    6361             :       INTEGER                                            :: col, fiunit, handle, hori_offset, jj, &
    6362             :                                                             nblkcols_tot, nblkrows_tot, Ncols, &
    6363             :                                                             ncores, Nrows, row, unit_nr, &
    6364             :                                                             vert_offset
    6365           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, mo_block_sizes
    6366           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    6367             :       LOGICAL                                            :: found
    6368           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: H
    6369           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    6370             :       TYPE(cp_logger_type), POINTER                      :: logger
    6371             :       TYPE(dbcsr_distribution_type)                      :: dist
    6372             :       TYPE(dbcsr_type)                                   :: matrix_asym
    6373             : 
    6374           0 :       CALL timeset(routineN, handle)
    6375             : 
    6376             :       ! get a useful output_unit
    6377           0 :       logger => cp_get_default_logger()
    6378           0 :       IF (logger%para_env%is_source()) THEN
    6379           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6380             :       ELSE
    6381             :          unit_nr = -1
    6382             :       END IF
    6383             : 
    6384             :       ! serial code only
    6385           0 :       CALL dbcsr_get_info(matrix, distribution=dist)
    6386           0 :       CALL dbcsr_distribution_get(dist, numnodes=ncores)
    6387           0 :       IF (ncores .GT. 1) THEN
    6388           0 :          CPABORT("mathematica files: serial code only")
    6389             :       END IF
    6390             : 
    6391           0 :       nblkrows_tot = dbcsr_nblkrows_total(matrix)
    6392           0 :       nblkcols_tot = dbcsr_nblkcols_total(matrix)
    6393           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    6394           0 :       CALL dbcsr_get_info(matrix, row_blk_size=ao_blk_sizes)
    6395           0 :       CALL dbcsr_get_info(matrix, col_blk_size=mo_blk_sizes)
    6396           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    6397           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    6398           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    6399             : 
    6400             :       CALL dbcsr_create(matrix_asym, &
    6401             :                         template=matrix, &
    6402           0 :                         matrix_type=dbcsr_type_no_symmetry)
    6403           0 :       CALL dbcsr_desymmetrize(matrix, matrix_asym)
    6404             : 
    6405           0 :       Ncols = SUM(mo_block_sizes)
    6406           0 :       Nrows = SUM(ao_block_sizes)
    6407           0 :       ALLOCATE (H(Nrows, Ncols))
    6408           0 :       H(:, :) = 0.0_dp
    6409             : 
    6410           0 :       hori_offset = 0
    6411           0 :       DO col = 1, nblkcols_tot
    6412             : 
    6413           0 :          vert_offset = 0
    6414           0 :          DO row = 1, nblkrows_tot
    6415             : 
    6416           0 :             CALL dbcsr_get_block_p(matrix_asym, row, col, block_p, found)
    6417           0 :             IF (found) THEN
    6418             : 
    6419             :                H(vert_offset + 1:vert_offset + ao_block_sizes(row), &
    6420             :                  hori_offset + 1:hori_offset + mo_block_sizes(col)) &
    6421           0 :                   = block_p(:, :)
    6422             : 
    6423             :             END IF
    6424             : 
    6425           0 :             vert_offset = vert_offset + ao_block_sizes(row)
    6426             : 
    6427             :          END DO
    6428             : 
    6429           0 :          hori_offset = hori_offset + mo_block_sizes(col)
    6430             : 
    6431             :       END DO ! loop over electron blocks
    6432             : 
    6433           0 :       CALL dbcsr_release(matrix_asym)
    6434             : 
    6435           0 :       IF (unit_nr > 0) THEN
    6436           0 :          CALL open_file(filename, unit_number=fiunit, file_status='REPLACE')
    6437           0 :          WRITE (Scols, "(I10)") Ncols
    6438           0 :          formatstr = "("//TRIM(Scols)//"E27.17)"
    6439           0 :          DO jj = 1, Nrows
    6440           0 :             WRITE (fiunit, formatstr) H(jj, :)
    6441             :          END DO
    6442           0 :          CALL close_file(fiunit)
    6443             :       END IF
    6444             : 
    6445           0 :       DEALLOCATE (mo_block_sizes)
    6446           0 :       DEALLOCATE (ao_block_sizes)
    6447           0 :       DEALLOCATE (H)
    6448             : 
    6449           0 :       CALL timestop(handle)
    6450             : 
    6451           0 :    END SUBROUTINE print_mathematica_matrix
    6452             : 
    6453             : ! **************************************************************************************************
    6454             : !> \brief Compute the objective functional of NLMOs
    6455             : !> \param localization_obj_function_ispin ...
    6456             : !> \param penalty_func_ispin ...
    6457             : !> \param penalty_vol_prefactor ...
    6458             : !> \param overlap_determinant ...
    6459             : !> \param m_sigma ...
    6460             : !> \param nocc ...
    6461             : !> \param m_B0 ...
    6462             : !> \param m_theta_normalized ...
    6463             : !> \param template_matrix_mo ...
    6464             : !> \param weights ...
    6465             : !> \param m_S0 ...
    6466             : !> \param just_started ...
    6467             : !> \param penalty_amplitude ...
    6468             : !> \param eps_filter ...
    6469             : !> \par History
    6470             : !>       2020.01 created [Ziling Luo]
    6471             : !> \author Ziling Luo
    6472             : ! **************************************************************************************************
    6473          82 :    SUBROUTINE compute_obj_nlmos(localization_obj_function_ispin, penalty_func_ispin, &
    6474          82 :                                 penalty_vol_prefactor, overlap_determinant, m_sigma, nocc, m_B0, &
    6475          82 :                                 m_theta_normalized, template_matrix_mo, weights, m_S0, just_started, &
    6476             :                                 penalty_amplitude, eps_filter)
    6477             : 
    6478             :       REAL(KIND=dp), INTENT(INOUT) :: localization_obj_function_ispin, penalty_func_ispin, &
    6479             :          penalty_vol_prefactor, overlap_determinant
    6480             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sigma
    6481             :       INTEGER, INTENT(IN)                                :: nocc
    6482             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6483             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_theta_normalized, template_matrix_mo
    6484             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6485             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0
    6486             :       LOGICAL, INTENT(IN)                                :: just_started
    6487             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_amplitude, eps_filter
    6488             : 
    6489             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_obj_nlmos'
    6490             : 
    6491             :       INTEGER                                            :: handle, idim0, ielem, reim
    6492             :       REAL(KIND=dp)                                      :: det1, fval
    6493          82 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: reim_diag, z2
    6494             :       TYPE(dbcsr_type)                                   :: tempNOcc1, tempOccOcc1, tempOccOcc2
    6495             :       TYPE(mp_comm_type)                                 :: group
    6496             : 
    6497          82 :       CALL timeset(routineN, handle)
    6498             : 
    6499             :       CALL dbcsr_create(tempNOcc1, &
    6500             :                         template=template_matrix_mo, &
    6501          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6502             :       CALL dbcsr_create(tempOccOcc1, &
    6503             :                         template=m_theta_normalized, &
    6504          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6505             :       CALL dbcsr_create(tempOccOcc2, &
    6506             :                         template=m_theta_normalized, &
    6507          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6508             : 
    6509          82 :       localization_obj_function_ispin = 0.0_dp
    6510          82 :       penalty_func_ispin = 0.0_dp
    6511         246 :       ALLOCATE (z2(nocc))
    6512         164 :       ALLOCATE (reim_diag(nocc))
    6513             : 
    6514          82 :       CALL dbcsr_get_info(tempOccOcc2, group=group)
    6515             : 
    6516         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6517             : 
    6518       12608 :          z2(:) = 0.0_dp
    6519             : 
    6520        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6521             : 
    6522             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6523             :                                 m_B0(reim, idim0), &
    6524             :                                 m_theta_normalized, &
    6525             :                                 0.0_dp, tempOccOcc1, &
    6526         760 :                                 filter_eps=eps_filter)
    6527         760 :             CALL dbcsr_set(tempOccOcc2, 0.0_dp)
    6528         760 :             CALL dbcsr_add_on_diag(tempOccOcc2, 1.0_dp)
    6529             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6530             :                                 m_theta_normalized, &
    6531             :                                 tempOccOcc1, &
    6532             :                                 0.0_dp, tempOccOcc2, &
    6533         760 :                                 retain_sparsity=.TRUE.)
    6534             : 
    6535       12608 :             reim_diag = 0.0_dp
    6536         760 :             CALL dbcsr_get_diag(tempOccOcc2, reim_diag)
    6537         760 :             CALL group%sum(reim_diag)
    6538       13368 :             z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    6539             : 
    6540             :          END DO
    6541             : 
    6542       12690 :          DO ielem = 1, nocc
    6543             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6544             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6545       11848 :                fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    6546             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6547       11848 :                fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    6548             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6549             :                fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    6550             :             END SELECT
    6551       12608 :             localization_obj_function_ispin = localization_obj_function_ispin + fval
    6552             :          END DO
    6553             : 
    6554             :       END DO ! end loop over idim0
    6555             : 
    6556          82 :       DEALLOCATE (z2)
    6557          82 :       DEALLOCATE (reim_diag)
    6558             : 
    6559             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6560             :                           m_S0, &
    6561             :                           m_theta_normalized, &
    6562             :                           0.0_dp, tempOccOcc1, &
    6563          82 :                           filter_eps=eps_filter)
    6564             :       ! compute current sigma
    6565             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6566             :                           m_theta_normalized, &
    6567             :                           tempOccOcc1, &
    6568             :                           0.0_dp, m_sigma, &
    6569          82 :                           filter_eps=eps_filter)
    6570             : 
    6571             :       CALL determinant(m_sigma, det1, &
    6572          82 :                        eps_filter)
    6573             :       ! save the current determinant
    6574          82 :       overlap_determinant = det1
    6575             : 
    6576          82 :       IF (just_started .AND. penalty_amplitude .LT. 0.0_dp) THEN
    6577           4 :          penalty_vol_prefactor = -(-penalty_amplitude)*localization_obj_function_ispin
    6578             :       END IF
    6579          82 :       penalty_func_ispin = penalty_func_ispin + penalty_vol_prefactor*LOG(det1)
    6580             : 
    6581          82 :       CALL dbcsr_release(tempNOcc1)
    6582          82 :       CALL dbcsr_release(tempOccOcc1)
    6583          82 :       CALL dbcsr_release(tempOccOcc2)
    6584             : 
    6585          82 :       CALL timestop(handle)
    6586             : 
    6587         164 :    END SUBROUTINE compute_obj_nlmos
    6588             : 
    6589             : ! **************************************************************************************************
    6590             : !> \brief Compute the gradient wrt the main variable
    6591             : !> \param m_grad_out ...
    6592             : !> \param m_B0 ...
    6593             : !> \param weights ...
    6594             : !> \param m_S0 ...
    6595             : !> \param m_theta_normalized ...
    6596             : !> \param m_siginv ...
    6597             : !> \param m_sig_sqrti_ii ...
    6598             : !> \param penalty_vol_prefactor ...
    6599             : !> \param eps_filter ...
    6600             : !> \param suggested_vol_penalty ...
    6601             : !> \par History
    6602             : !>       2018.10 created [Ziling Luo]
    6603             : !> \author Ziling Luo
    6604             : ! **************************************************************************************************
    6605          82 :    SUBROUTINE compute_gradient_nlmos(m_grad_out, m_B0, weights, &
    6606             :                                      m_S0, m_theta_normalized, m_siginv, m_sig_sqrti_ii, &
    6607             :                                      penalty_vol_prefactor, eps_filter, suggested_vol_penalty)
    6608             : 
    6609             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
    6610             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6611             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6612             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0, m_theta_normalized, m_siginv, &
    6613             :                                                             m_sig_sqrti_ii
    6614             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_vol_prefactor, eps_filter
    6615             :       REAL(KIND=dp), INTENT(INOUT)                       :: suggested_vol_penalty
    6616             : 
    6617             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient_nlmos'
    6618             : 
    6619             :       INTEGER                                            :: dim0, handle, idim0, reim
    6620             :       REAL(KIND=dp)                                      :: norm_loc, norm_vol
    6621             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal, z2
    6622             :       TYPE(dbcsr_type)                                   :: m_temp_oo_1, m_temp_oo_2, m_temp_oo_3, &
    6623             :                                                             m_temp_oo_4
    6624             : 
    6625          82 :       CALL timeset(routineN, handle)
    6626             : 
    6627             :       CALL dbcsr_create(m_temp_oo_1, &
    6628             :                         template=m_theta_normalized, &
    6629          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6630             :       CALL dbcsr_create(m_temp_oo_2, &
    6631             :                         template=m_theta_normalized, &
    6632          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6633             :       CALL dbcsr_create(m_temp_oo_3, &
    6634             :                         template=m_theta_normalized, &
    6635          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6636             :       CALL dbcsr_create(m_temp_oo_4, &
    6637             :                         template=m_theta_normalized, &
    6638          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6639             : 
    6640          82 :       CALL dbcsr_get_info(m_siginv, nfullrows_total=dim0)
    6641         246 :       ALLOCATE (tg_diagonal(dim0))
    6642         164 :       ALLOCATE (z2(dim0))
    6643          82 :       CALL dbcsr_set(m_temp_oo_1, 0.0_dp) ! accumulate the gradient wrt a_norm here
    6644             : 
    6645             :       ! do d_Omega/d_a_normalized first
    6646         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6647             : 
    6648       12608 :          z2(:) = 0.0_dp
    6649         760 :          CALL dbcsr_set(m_temp_oo_2, 0.0_dp) ! accumulate index gradient here
    6650        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6651             : 
    6652             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6653             :                                 m_B0(reim, idim0), &
    6654             :                                 m_theta_normalized, &
    6655             :                                 0.0_dp, m_temp_oo_3, &
    6656         760 :                                 filter_eps=eps_filter)
    6657             : 
    6658             :             ! result contain Re/Im part of Z for the current Miller index
    6659             :             ! warning - save time by computing only the diagonal elements
    6660             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6661             :                                 m_theta_normalized, &
    6662             :                                 m_temp_oo_3, &
    6663             :                                 0.0_dp, m_temp_oo_4, &
    6664         760 :                                 filter_eps=eps_filter)
    6665             : 
    6666       12608 :             tg_diagonal(:) = 0.0_dp
    6667         760 :             CALL dbcsr_get_diag(m_temp_oo_4, tg_diagonal)
    6668         760 :             CALL dbcsr_set(m_temp_oo_4, 0.0_dp)
    6669         760 :             CALL dbcsr_set_diag(m_temp_oo_4, tg_diagonal)
    6670             :             !CALL para_group%sum(tg_diagonal)
    6671       12608 :             z2(:) = z2(:) + tg_diagonal(:)*tg_diagonal(:)
    6672             : 
    6673             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6674             :                                 m_temp_oo_3, &
    6675             :                                 m_temp_oo_4, &
    6676             :                                 1.0_dp, m_temp_oo_2, &
    6677        1520 :                                 filter_eps=eps_filter)
    6678             : 
    6679             :          END DO
    6680             : 
    6681             :          ! TODO: because some elements are zeros on some MPI tasks the
    6682             :          ! gradient evaluation will fail for CASE 1 and 3
    6683             :          SELECT CASE (2) ! allows for selection of different spread functionals
    6684             :          CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6685             :             z2(:) = -weights(idim0)/z2(:)
    6686             :          CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6687       12608 :             z2(:) = -weights(idim0)
    6688             :          CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6689             :             z2(:) = -weights(idim0)/(2*SQRT(z2(:)))
    6690             :          END SELECT
    6691         760 :          CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6692         760 :          CALL dbcsr_set_diag(m_temp_oo_3, z2)
    6693             :          ! TODO: print this matrix to make sure its block structure is fine
    6694             :          ! and there are no unecessary elements
    6695             : 
    6696             :          CALL dbcsr_multiply("N", "N", 4.0_dp, &
    6697             :                              m_temp_oo_2, &
    6698             :                              m_temp_oo_3, &
    6699             :                              1.0_dp, m_temp_oo_1, &
    6700         842 :                              filter_eps=eps_filter)
    6701             : 
    6702             :       END DO ! end loop over idim0
    6703          82 :       DEALLOCATE (z2)
    6704             : 
    6705             :       ! sigma0.a_norm is necessary for the volume penalty and normalization
    6706             :       CALL dbcsr_multiply("N", "N", &
    6707             :                           1.0_dp, &
    6708             :                           m_S0, &
    6709             :                           m_theta_normalized, &
    6710             :                           0.0_dp, m_temp_oo_2, &
    6711          82 :                           filter_eps=eps_filter)
    6712             : 
    6713             :       ! add gradient of the penalty functional log[det(sigma)]
    6714             :       ! G = 2*prefactor*sigma0.a_norm.sigma_inv
    6715             :       CALL dbcsr_multiply("N", "N", &
    6716             :                           1.0_dp, &
    6717             :                           m_temp_oo_2, &
    6718             :                           m_siginv, &
    6719             :                           0.0_dp, m_temp_oo_3, &
    6720          82 :                           filter_eps=eps_filter)
    6721          82 :       norm_vol = dbcsr_maxabs(m_temp_oo_3)
    6722          82 :       norm_loc = dbcsr_maxabs(m_temp_oo_1)
    6723          82 :       suggested_vol_penalty = norm_loc/norm_vol
    6724             :       CALL dbcsr_add(m_temp_oo_1, m_temp_oo_3, &
    6725          82 :                      1.0_dp, 2.0_dp*penalty_vol_prefactor)
    6726             : 
    6727             :       ! take into account the factor from the normalization constraint
    6728             :       ! G = ( G - sigma0.a_norm.[tr(a_norm).G]_ii ) . [sig_sqrti]_ii
    6729             :       ! 1. get G.[sig_sqrti]_ii
    6730             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6731             :                           m_temp_oo_1, &
    6732             :                           m_sig_sqrti_ii, &
    6733             :                           0.0_dp, m_grad_out, &
    6734          82 :                           filter_eps=eps_filter)
    6735             : 
    6736             :       ! 2. get [tr(a_norm).G]_ii
    6737             :       ! it is possible to save time by computing only the diagonal elements
    6738             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6739             :                           m_theta_normalized, &
    6740             :                           m_temp_oo_1, &
    6741             :                           0.0_dp, m_temp_oo_3, &
    6742          82 :                           filter_eps=eps_filter)
    6743          82 :       CALL dbcsr_get_diag(m_temp_oo_3, tg_diagonal)
    6744          82 :       CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6745          82 :       CALL dbcsr_set_diag(m_temp_oo_3, tg_diagonal)
    6746             : 
    6747             :       ! 3. [X]_ii . [sig_sqrti]_ii
    6748             :       ! it is possible to save time by computing only the diagonal elements
    6749             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6750             :                           m_sig_sqrti_ii, &
    6751             :                           m_temp_oo_3, &
    6752             :                           0.0_dp, m_temp_oo_1, &
    6753          82 :                           filter_eps=eps_filter)
    6754             :       ! 4. (sigma0*a_norm) .[X]_ii
    6755             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6756             :                           m_temp_oo_2, &
    6757             :                           m_temp_oo_1, &
    6758             :                           1.0_dp, m_grad_out, &
    6759          82 :                           filter_eps=eps_filter)
    6760             : 
    6761          82 :       DEALLOCATE (tg_diagonal)
    6762          82 :       CALL dbcsr_release(m_temp_oo_1)
    6763          82 :       CALL dbcsr_release(m_temp_oo_2)
    6764          82 :       CALL dbcsr_release(m_temp_oo_3)
    6765          82 :       CALL dbcsr_release(m_temp_oo_4)
    6766             : 
    6767          82 :       CALL timestop(handle)
    6768             : 
    6769         164 :    END SUBROUTINE compute_gradient_nlmos
    6770             : 
    6771             : ! **************************************************************************************************
    6772             : !> \brief Compute MO coeffs from the main optimized variable (e.g. Theta, X)
    6773             : !> \param m_var_in ...
    6774             : !> \param m_t_out ...
    6775             : !> \param m_quench_t ...
    6776             : !> \param m_t0 ...
    6777             : !> \param m_oo_template ...
    6778             : !> \param m_STsiginv0 ...
    6779             : !> \param m_s ...
    6780             : !> \param m_sig_sqrti_ii_out ...
    6781             : !> \param domain_r_down ...
    6782             : !> \param domain_s_inv ...
    6783             : !> \param domain_map ...
    6784             : !> \param cpu_of_domain ...
    6785             : !> \param assume_t0_q0x ...
    6786             : !> \param just_started ...
    6787             : !> \param optimize_theta ...
    6788             : !> \param normalize_orbitals ...
    6789             : !> \param envelope_amplitude ...
    6790             : !> \param eps_filter ...
    6791             : !> \param special_case ...
    6792             : !> \param nocc_of_domain ...
    6793             : !> \param order_lanczos ...
    6794             : !> \param eps_lanczos ...
    6795             : !> \param max_iter_lanczos ...
    6796             : !> \par History
    6797             : !>       2015.03 created [Rustam Z Khaliullin]
    6798             : !> \author Rustam Z Khaliullin
    6799             : ! **************************************************************************************************
    6800        2948 :    SUBROUTINE compute_xalmos_from_main_var(m_var_in, m_t_out, m_quench_t, &
    6801        1474 :                                            m_t0, m_oo_template, m_STsiginv0, m_s, m_sig_sqrti_ii_out, domain_r_down, &
    6802        1474 :                                            domain_s_inv, domain_map, cpu_of_domain, assume_t0_q0x, just_started, &
    6803             :                                            optimize_theta, normalize_orbitals, envelope_amplitude, eps_filter, &
    6804        1474 :                                            special_case, nocc_of_domain, order_lanczos, eps_lanczos, max_iter_lanczos)
    6805             : 
    6806             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_var_in
    6807             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_t_out, m_quench_t, m_t0, &
    6808             :                                                             m_oo_template, m_STsiginv0, m_s, &
    6809             :                                                             m_sig_sqrti_ii_out
    6810             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6811             :          INTENT(IN)                                      :: domain_r_down, domain_s_inv
    6812             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6813             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6814             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
    6815             :                                                             optimize_theta, normalize_orbitals
    6816             :       REAL(KIND=dp), INTENT(IN)                          :: envelope_amplitude, eps_filter
    6817             :       INTEGER, INTENT(IN)                                :: special_case
    6818             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6819             :       INTEGER, INTENT(IN)                                :: order_lanczos
    6820             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    6821             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    6822             : 
    6823             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_xalmos_from_main_var'
    6824             : 
    6825             :       INTEGER                                            :: handle, unit_nr
    6826             :       REAL(KIND=dp)                                      :: t_norm
    6827             :       TYPE(cp_logger_type), POINTER                      :: logger
    6828             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    6829             : 
    6830        1474 :       CALL timeset(routineN, handle)
    6831             : 
    6832             :       ! get a useful output_unit
    6833        1474 :       logger => cp_get_default_logger()
    6834        1474 :       IF (logger%para_env%is_source()) THEN
    6835         737 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6836             :       ELSE
    6837             :          unit_nr = -1
    6838             :       END IF
    6839             : 
    6840             :       CALL dbcsr_create(m_tmp_no_1, &
    6841             :                         template=m_quench_t, &
    6842        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6843             :       CALL dbcsr_create(m_tmp_oo_1, &
    6844             :                         template=m_oo_template, &
    6845        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6846             : 
    6847        1474 :       CALL dbcsr_copy(m_tmp_no_1, m_var_in)
    6848        1474 :       IF (optimize_theta) THEN
    6849             :          ! check that all MO coefficients of the guess are less
    6850             :          ! than the maximum allowed amplitude
    6851           0 :          t_norm = dbcsr_maxabs(m_tmp_no_1)
    6852           0 :          IF (unit_nr > 0) THEN
    6853           0 :             WRITE (unit_nr, *) "Maximum norm of the initial guess: ", t_norm
    6854           0 :             WRITE (unit_nr, *) "Maximum allowed amplitude: ", &
    6855           0 :                envelope_amplitude
    6856             :          END IF
    6857           0 :          IF (t_norm .GT. envelope_amplitude .AND. just_started) THEN
    6858           0 :             CPABORT("Max norm of the initial guess is too large")
    6859             :          END IF
    6860             :          ! use artanh to tame MOs
    6861           0 :          CALL tanh_of_elements(m_tmp_no_1, alpha=1.0_dp/envelope_amplitude)
    6862           0 :          CALL dbcsr_scale(m_tmp_no_1, envelope_amplitude)
    6863             :       END IF
    6864             :       CALL dbcsr_hadamard_product(m_tmp_no_1, m_quench_t, &
    6865        1474 :                                   m_t_out)
    6866             : 
    6867             :       ! project out R_0
    6868        1474 :       IF (assume_t0_q0x) THEN
    6869         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6870             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6871             :                                 m_STsiginv0, &
    6872             :                                 m_t_out, &
    6873             :                                 0.0_dp, m_tmp_oo_1, &
    6874         160 :                                 filter_eps=eps_filter)
    6875             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6876             :                                 m_t0, &
    6877             :                                 m_tmp_oo_1, &
    6878             :                                 1.0_dp, m_t_out, &
    6879         160 :                                 filter_eps=eps_filter)
    6880         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6881           0 :             CPABORT("cannot use projector with block-daigonal ALMOs")
    6882             :          ELSE
    6883             :             ! no special case
    6884             :             CALL apply_domain_operators( &
    6885             :                matrix_in=m_t_out, &
    6886             :                matrix_out=m_tmp_no_1, &
    6887             :                operator1=domain_r_down, &
    6888             :                operator2=domain_s_inv, &
    6889             :                dpattern=m_quench_t, &
    6890             :                map=domain_map, &
    6891             :                node_of_domain=cpu_of_domain, &
    6892             :                my_action=1, &
    6893             :                filter_eps=eps_filter, &
    6894         306 :                use_trimmer=.FALSE.)
    6895             :             CALL dbcsr_copy(m_t_out, &
    6896         306 :                             m_tmp_no_1)
    6897             :          END IF ! special case
    6898             :          CALL dbcsr_add(m_t_out, &
    6899         466 :                         m_t0, 1.0_dp, 1.0_dp)
    6900             :       END IF
    6901             : 
    6902        1474 :       IF (normalize_orbitals) THEN
    6903             :          CALL orthogonalize_mos( &
    6904             :             ket=m_t_out, &
    6905             :             overlap=m_tmp_oo_1, &
    6906             :             metric=m_s, &
    6907             :             retain_locality=.TRUE., &
    6908             :             only_normalize=.TRUE., &
    6909             :             nocc_of_domain=nocc_of_domain(:), &
    6910             :             eps_filter=eps_filter, &
    6911             :             order_lanczos=order_lanczos, &
    6912             :             eps_lanczos=eps_lanczos, &
    6913             :             max_iter_lanczos=max_iter_lanczos, &
    6914           0 :             overlap_sqrti=m_sig_sqrti_ii_out)
    6915             :       END IF
    6916             : 
    6917        1474 :       CALL dbcsr_filter(m_t_out, eps_filter)
    6918             : 
    6919        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6920        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6921             : 
    6922        1474 :       CALL timestop(handle)
    6923             : 
    6924        1474 :    END SUBROUTINE compute_xalmos_from_main_var
    6925             : 
    6926             : ! **************************************************************************************************
    6927             : !> \brief Compute the preconditioner matrices and invert them if necessary
    6928             : !> \param domain_prec_out ...
    6929             : !> \param m_prec_out ...
    6930             : !> \param m_ks ...
    6931             : !> \param m_s ...
    6932             : !> \param m_siginv ...
    6933             : !> \param m_quench_t ...
    6934             : !> \param m_FTsiginv ...
    6935             : !> \param m_siginvTFTsiginv ...
    6936             : !> \param m_ST ...
    6937             : !> \param m_STsiginv_out ...
    6938             : !> \param m_s_vv_out ...
    6939             : !> \param m_f_vv_out ...
    6940             : !> \param para_env ...
    6941             : !> \param blacs_env ...
    6942             : !> \param nocc_of_domain ...
    6943             : !> \param domain_s_inv ...
    6944             : !> \param domain_s_inv_half ...
    6945             : !> \param domain_s_half ...
    6946             : !> \param domain_r_down ...
    6947             : !> \param cpu_of_domain ...
    6948             : !> \param domain_map ...
    6949             : !> \param assume_t0_q0x ...
    6950             : !> \param penalty_occ_vol ...
    6951             : !> \param penalty_occ_vol_prefactor ...
    6952             : !> \param eps_filter ...
    6953             : !> \param neg_thr ...
    6954             : !> \param spin_factor ...
    6955             : !> \param special_case ...
    6956             : !> \param bad_modes_projector_down_out ...
    6957             : !> \param skip_inversion ...
    6958             : !> \par History
    6959             : !>       2015.03 created [Rustam Z Khaliullin]
    6960             : !> \author Rustam Z Khaliullin
    6961             : ! **************************************************************************************************
    6962        1500 :    SUBROUTINE compute_preconditioner(domain_prec_out, m_prec_out, m_ks, m_s, &
    6963             :                                      m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    6964             :                                      m_STsiginv_out, m_s_vv_out, m_f_vv_out, para_env, &
    6965        1000 :                                      blacs_env, nocc_of_domain, domain_s_inv, domain_s_inv_half, domain_s_half, &
    6966         500 :                                      domain_r_down, cpu_of_domain, &
    6967             :                                      domain_map, assume_t0_q0x, penalty_occ_vol, penalty_occ_vol_prefactor, &
    6968         500 :                                      eps_filter, neg_thr, spin_factor, special_case, bad_modes_projector_down_out, &
    6969             :                                      skip_inversion)
    6970             : 
    6971             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6972             :          INTENT(INOUT)                                   :: domain_prec_out
    6973             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_prec_out, m_ks, m_s
    6974             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_siginv, m_quench_t, m_FTsiginv, &
    6975             :                                                             m_siginvTFTsiginv, m_ST
    6976             :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m_STsiginv_out, m_s_vv_out, m_f_vv_out
    6977             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    6978             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    6979             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6980             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6981             :          INTENT(IN)                                      :: domain_s_inv
    6982             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6983             :          INTENT(IN), OPTIONAL                            :: domain_s_inv_half, domain_s_half
    6984             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6985             :          INTENT(IN)                                      :: domain_r_down
    6986             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6987             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6988             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, penalty_occ_vol
    6989             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, eps_filter, &
    6990             :                                                             neg_thr, spin_factor
    6991             :       INTEGER, INTENT(IN)                                :: special_case
    6992             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6993             :          INTENT(INOUT), OPTIONAL                         :: bad_modes_projector_down_out
    6994             :       LOGICAL, INTENT(IN)                                :: skip_inversion
    6995             : 
    6996             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_preconditioner'
    6997             : 
    6998             :       INTEGER                                            :: handle, ndim, precond_domain_projector
    6999         500 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: nn_diagonal
    7000             :       TYPE(dbcsr_type)                                   :: m_tmp_nn_1, m_tmp_no_3
    7001             : 
    7002         500 :       CALL timeset(routineN, handle)
    7003             : 
    7004             :       CALL dbcsr_create(m_tmp_nn_1, &
    7005             :                         template=m_s, &
    7006         500 :                         matrix_type=dbcsr_type_no_symmetry)
    7007             :       CALL dbcsr_create(m_tmp_no_3, &
    7008             :                         template=m_quench_t, &
    7009         500 :                         matrix_type=dbcsr_type_no_symmetry)
    7010             : 
    7011             :       ! calculate (1-R)F(1-R) and S-SRS
    7012             :       ! RZK-warning take advantage: some elements will be removed by the quencher
    7013             :       ! RZK-warning S operations can be performed outside the spin loop to save time
    7014             :       ! IT IS REQUIRED THAT PRECONDITIONER DOES NOT BREAK THE LOCALITY!!!!
    7015             :       ! RZK-warning: further optimization is ABSOLUTELY NECESSARY
    7016             : 
    7017             :       ! First S-SRS
    7018             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7019             :                           m_ST, &
    7020             :                           m_siginv, &
    7021             :                           0.0_dp, m_tmp_no_3, &
    7022         500 :                           filter_eps=eps_filter)
    7023         500 :       CALL dbcsr_desymmetrize(m_s, m_tmp_nn_1)
    7024             :       ! return STsiginv if necessary
    7025         500 :       IF (PRESENT(m_STsiginv_out)) THEN
    7026           0 :          CALL dbcsr_copy(m_STsiginv_out, m_tmp_no_3)
    7027             :       END IF
    7028         500 :       IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    7029             :          ! use S instead of S-SRS
    7030             :       ELSE
    7031             :          CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7032             :                              m_ST, &
    7033             :                              m_tmp_no_3, &
    7034             :                              1.0_dp, m_tmp_nn_1, &
    7035         456 :                              filter_eps=eps_filter)
    7036             :       END IF
    7037             :       ! return S_vv = (S or S-SRS) if necessary
    7038         500 :       IF (PRESENT(m_s_vv_out)) THEN
    7039           0 :          CALL dbcsr_copy(m_s_vv_out, m_tmp_nn_1)
    7040             :       END IF
    7041             : 
    7042             :       ! Second (1-R)F(1-R)
    7043             :       ! re-create matrix because desymmetrize is buggy -
    7044             :       ! it will create multiple copies of blocks
    7045         500 :       CALL dbcsr_desymmetrize(m_ks, m_prec_out)
    7046             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7047             :                           m_FTsiginv, &
    7048             :                           m_ST, &
    7049             :                           1.0_dp, m_prec_out, &
    7050         500 :                           filter_eps=eps_filter)
    7051             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7052             :                           m_ST, &
    7053             :                           m_FTsiginv, &
    7054             :                           1.0_dp, m_prec_out, &
    7055         500 :                           filter_eps=eps_filter)
    7056             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7057             :                           m_ST, &
    7058             :                           m_siginvTFTsiginv, &
    7059             :                           0.0_dp, m_tmp_no_3, &
    7060         500 :                           filter_eps=eps_filter)
    7061             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    7062             :                           m_tmp_no_3, &
    7063             :                           m_ST, &
    7064             :                           1.0_dp, m_prec_out, &
    7065         500 :                           filter_eps=eps_filter)
    7066             :       ! return F_vv = (I-SR)F(I-RS) if necessary
    7067         500 :       IF (PRESENT(m_f_vv_out)) THEN
    7068           0 :          CALL dbcsr_copy(m_f_vv_out, m_prec_out)
    7069             :       END IF
    7070             : 
    7071             : #if 0
    7072             : !penalty_only=.TRUE.
    7073             :       WRITE (unit_nr, *) "prefactor0:", penalty_occ_vol_prefactor
    7074             :       !IF (penalty_occ_vol) THEN
    7075             :       CALL dbcsr_desymmetrize(m_s, &
    7076             :                               m_prec_out)
    7077             :       !CALL dbcsr_scale(m_prec_out,-penalty_occ_vol_prefactor)
    7078             :       !ENDIF
    7079             : #else
    7080             :       ! sum up the F_vv and S_vv terms
    7081             :       CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7082         500 :                      1.0_dp, 1.0_dp)
    7083             :       ! Scale to obtain unit step length
    7084         500 :       CALL dbcsr_scale(m_prec_out, 2.0_dp*spin_factor)
    7085             : 
    7086             :       ! add the contribution from the penalty on the occupied volume
    7087         500 :       IF (penalty_occ_vol) THEN
    7088             :          CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7089           0 :                         1.0_dp, penalty_occ_vol_prefactor)
    7090             :       END IF
    7091             : #endif
    7092             : 
    7093         500 :       CALL dbcsr_copy(m_tmp_nn_1, m_prec_out)
    7094             : 
    7095             :       ! invert using various algorithms
    7096         500 :       IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    7097             : 
    7098          96 :          IF (skip_inversion) THEN
    7099             : 
    7100             :             ! impose block-diagonal structure
    7101          92 :             CALL dbcsr_get_info(m_s, nfullrows_total=ndim)
    7102         276 :             ALLOCATE (nn_diagonal(ndim))
    7103          92 :             CALL dbcsr_get_diag(m_s, nn_diagonal)
    7104          92 :             CALL dbcsr_set(m_prec_out, 0.0_dp)
    7105          92 :             CALL dbcsr_set_diag(m_prec_out, nn_diagonal)
    7106          92 :             CALL dbcsr_filter(m_prec_out, eps_filter)
    7107          92 :             DEALLOCATE (nn_diagonal)
    7108             : 
    7109         184 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1, keep_sparsity=.TRUE.)
    7110             : 
    7111             :          ELSE
    7112             : 
    7113             :             CALL pseudo_invert_diagonal_blk( &
    7114             :                matrix_in=m_tmp_nn_1, &
    7115             :                matrix_out=m_prec_out, &
    7116             :                nocc=nocc_of_domain(:) &
    7117           4 :                )
    7118             : 
    7119             :          END IF
    7120             : 
    7121         404 :       ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    7122             : 
    7123          44 :          IF (skip_inversion) THEN
    7124           0 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1)
    7125             :          ELSE
    7126             : 
    7127             :             ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7128             :             CALL cp_dbcsr_cholesky_decompose(m_prec_out, &
    7129             :                                              para_env=para_env, &
    7130          44 :                                              blacs_env=blacs_env)
    7131             :             CALL cp_dbcsr_cholesky_invert(m_prec_out, &
    7132             :                                           para_env=para_env, &
    7133             :                                           blacs_env=blacs_env, &
    7134          44 :                                           uplo_to_full=.TRUE.)
    7135             :          END IF !skip_inversion
    7136             : 
    7137          44 :          CALL dbcsr_filter(m_prec_out, eps_filter)
    7138             : 
    7139             :       ELSE
    7140             : 
    7141             :          !!! use a true domain preconditioner with overlapping domains
    7142         360 :          IF (assume_t0_q0x) THEN
    7143          26 :             precond_domain_projector = -1
    7144             :          ELSE
    7145         334 :             precond_domain_projector = 0
    7146             :          END IF
    7147             :          !! RZK-warning: use PRESENT to make two nearly-identical calls
    7148             :          !! this is done because intel compiler does not seem to conform
    7149             :          !! to the FORTRAN standard for passing through optional arguments
    7150         360 :          IF (PRESENT(bad_modes_projector_down_out)) THEN
    7151             :             CALL construct_domain_preconditioner( &
    7152             :                matrix_main=m_tmp_nn_1, &
    7153             :                subm_s_inv=domain_s_inv(:), &
    7154             :                subm_s_inv_half=domain_s_inv_half(:), &
    7155             :                subm_s_half=domain_s_half(:), &
    7156             :                subm_r_down=domain_r_down(:), &
    7157             :                matrix_trimmer=m_quench_t, &
    7158             :                dpattern=m_quench_t, &
    7159             :                map=domain_map, &
    7160             :                node_of_domain=cpu_of_domain, &
    7161             :                preconditioner=domain_prec_out(:), &
    7162             :                use_trimmer=.FALSE., &
    7163             :                bad_modes_projector_down=bad_modes_projector_down_out(:), &
    7164             :                eps_zero_eigenvalues=neg_thr, &
    7165             :                my_action=precond_domain_projector, &
    7166             :                skip_inversion=skip_inversion &
    7167          18 :                )
    7168             :          ELSE
    7169             :             CALL construct_domain_preconditioner( &
    7170             :                matrix_main=m_tmp_nn_1, &
    7171             :                subm_s_inv=domain_s_inv(:), &
    7172             :                subm_r_down=domain_r_down(:), &
    7173             :                matrix_trimmer=m_quench_t, &
    7174             :                dpattern=m_quench_t, &
    7175             :                map=domain_map, &
    7176             :                node_of_domain=cpu_of_domain, &
    7177             :                preconditioner=domain_prec_out(:), &
    7178             :                use_trimmer=.FALSE., &
    7179             :                !eps_zero_eigenvalues=neg_thr,&
    7180             :                my_action=precond_domain_projector, &
    7181             :                skip_inversion=skip_inversion &
    7182         342 :                )
    7183             :          END IF
    7184             : 
    7185             :       END IF ! special_case
    7186             : 
    7187             :       ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7188             :       !!!CALL cp_dbcsr_cholesky_decompose(prec_vv,&
    7189             :       !!!        para_env=almo_scf_env%para_env,&
    7190             :       !!!        blacs_env=almo_scf_env%blacs_env)
    7191             :       !!!CALL cp_dbcsr_cholesky_invert(prec_vv,&
    7192             :       !!!        para_env=almo_scf_env%para_env,&
    7193             :       !!!        blacs_env=almo_scf_env%blacs_env,&
    7194             :       !!!        uplo_to_full=.TRUE.)
    7195             :       !!!CALL dbcsr_filter(prec_vv,&
    7196             :       !!!        almo_scf_env%eps_filter)
    7197             :       !!!
    7198             : 
    7199             :       ! re-create the matrix because desymmetrize is buggy -
    7200             :       ! it will create multiple copies of blocks
    7201             :       !!!DESYM!CALL dbcsr_create(prec_vv,&
    7202             :       !!!DESYM!        template=almo_scf_env%matrix_s(1),&
    7203             :       !!!DESYM!        matrix_type=dbcsr_type_no_symmetry)
    7204             :       !!!DESYM!CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1),&
    7205             :       !!!DESYM!        prec_vv)
    7206             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7207             :       !        almo_scf_env%matrix_s(1),&
    7208             :       !        matrix_t_out(ispin),&
    7209             :       !        0.0_dp,m_tmp_no_1,&
    7210             :       !        filter_eps=almo_scf_env%eps_filter)
    7211             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7212             :       !        m_tmp_no_1,&
    7213             :       !        almo_scf_env%matrix_sigma_inv(ispin),&
    7214             :       !        0.0_dp,m_tmp_no_3,&
    7215             :       !        filter_eps=almo_scf_env%eps_filter)
    7216             :       !CALL dbcsr_multiply("N","T",-1.0_dp,&
    7217             :       !        m_tmp_no_3,&
    7218             :       !        m_tmp_no_1,&
    7219             :       !        1.0_dp,prec_vv,&
    7220             :       !        filter_eps=almo_scf_env%eps_filter)
    7221             :       !CALL dbcsr_add_on_diag(prec_vv,&
    7222             :       !        prec_sf_mixing_s)
    7223             : 
    7224             :       !CALL dbcsr_create(prec_oo,&
    7225             :       !        template=almo_scf_env%matrix_sigma(ispin),&
    7226             :       !        matrix_type=dbcsr_type_no_symmetry)
    7227             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7228             :       !        matrix_type=dbcsr_type_no_symmetry)
    7229             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7230             :       !        prec_oo)
    7231             :       !CALL dbcsr_filter(prec_oo,&
    7232             :       !        almo_scf_env%eps_filter)
    7233             : 
    7234             :       !! invert using cholesky
    7235             :       !CALL dbcsr_create(prec_oo_inv,&
    7236             :       !        template=prec_oo,&
    7237             :       !        matrix_type=dbcsr_type_no_symmetry)
    7238             :       !CALL dbcsr_desymmetrize(prec_oo,&
    7239             :       !        prec_oo_inv)
    7240             :       !CALL cp_dbcsr_cholesky_decompose(prec_oo_inv,&
    7241             :       !        para_env=almo_scf_env%para_env,&
    7242             :       !        blacs_env=almo_scf_env%blacs_env)
    7243             :       !CALL cp_dbcsr_cholesky_invert(prec_oo_inv,&
    7244             :       !        para_env=almo_scf_env%para_env,&
    7245             :       !        blacs_env=almo_scf_env%blacs_env,&
    7246             :       !        uplo_to_full=.TRUE.)
    7247             : 
    7248         500 :       CALL dbcsr_release(m_tmp_nn_1)
    7249         500 :       CALL dbcsr_release(m_tmp_no_3)
    7250             : 
    7251         500 :       CALL timestop(handle)
    7252             : 
    7253        1000 :    END SUBROUTINE compute_preconditioner
    7254             : 
    7255             : ! **************************************************************************************************
    7256             : !> \brief Compute beta for conjugate gradient algorithms
    7257             : !> \param beta ...
    7258             : !> \param numer ...
    7259             : !> \param denom ...
    7260             : !> \param reset_conjugator ...
    7261             : !> \param conjugator ...
    7262             : !> \param grad ...
    7263             : !> \param prev_grad ...
    7264             : !> \param step ...
    7265             : !> \param prev_step ...
    7266             : !> \param prev_minus_prec_grad ...
    7267             : !> \par History
    7268             : !>       2015.04 created [Rustam Z Khaliullin]
    7269             : !> \author Rustam Z Khaliullin
    7270             : ! **************************************************************************************************
    7271        1016 :    SUBROUTINE compute_cg_beta(beta, numer, denom, reset_conjugator, conjugator, &
    7272         508 :                               grad, prev_grad, step, prev_step, prev_minus_prec_grad)
    7273             : 
    7274             :       REAL(KIND=dp), INTENT(INOUT)                       :: beta
    7275             :       REAL(KIND=dp), INTENT(INOUT), OPTIONAL             :: numer, denom
    7276             :       LOGICAL, INTENT(INOUT)                             :: reset_conjugator
    7277             :       INTEGER, INTENT(IN)                                :: conjugator
    7278             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad, prev_grad, step, prev_step
    7279             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT), &
    7280             :          OPTIONAL                                        :: prev_minus_prec_grad
    7281             : 
    7282             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_cg_beta'
    7283             : 
    7284             :       INTEGER                                            :: handle, i, nsize, unit_nr
    7285             :       REAL(KIND=dp)                                      :: den, kappa, my_denom, my_numer, &
    7286             :                                                             my_numer2, my_numer3, num, num2, num3, &
    7287             :                                                             tau
    7288             :       TYPE(cp_logger_type), POINTER                      :: logger
    7289             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1
    7290             : 
    7291         508 :       CALL timeset(routineN, handle)
    7292             : 
    7293             :       ! get a useful output_unit
    7294         508 :       logger => cp_get_default_logger()
    7295         508 :       IF (logger%para_env%is_source()) THEN
    7296         254 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7297             :       ELSE
    7298             :          unit_nr = -1
    7299             :       END IF
    7300             : 
    7301         508 :       IF (.NOT. PRESENT(prev_minus_prec_grad)) THEN
    7302             :          IF (conjugator .EQ. cg_fletcher_reeves .OR. &
    7303          82 :              conjugator .EQ. cg_polak_ribiere .OR. &
    7304             :              conjugator .EQ. cg_hager_zhang) THEN
    7305           0 :             CPABORT("conjugator needs more input")
    7306             :          END IF
    7307             :       END IF
    7308             : 
    7309             :       ! return num denom so beta can be calculated spin-by-spin
    7310         508 :       IF (PRESENT(numer) .OR. PRESENT(denom)) THEN
    7311             :          IF (conjugator .EQ. cg_hestenes_stiefel .OR. &
    7312           0 :              conjugator .EQ. cg_dai_yuan .OR. &
    7313             :              conjugator .EQ. cg_hager_zhang) THEN
    7314           0 :             CPABORT("cannot return numer/denom")
    7315             :          END IF
    7316             :       END IF
    7317             : 
    7318         508 :       nsize = SIZE(grad)
    7319             : 
    7320         508 :       my_numer = 0.0_dp
    7321         508 :       my_numer2 = 0.0_dp
    7322         508 :       my_numer3 = 0.0_dp
    7323         508 :       my_denom = 0.0_dp
    7324             : 
    7325        1016 :       DO i = 1, nsize
    7326             : 
    7327             :          CALL dbcsr_create(m_tmp_no_1, &
    7328             :                            template=grad(i), &
    7329         508 :                            matrix_type=dbcsr_type_no_symmetry)
    7330             : 
    7331         570 :          SELECT CASE (conjugator)
    7332             :          CASE (cg_hestenes_stiefel)
    7333          62 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7334             :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), &
    7335          62 :                            1.0_dp, -1.0_dp)
    7336          62 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7337          62 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7338             :          CASE (cg_fletcher_reeves)
    7339          94 :             CALL dbcsr_dot(grad(i), step(i), num)
    7340          94 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7341             :          CASE (cg_polak_ribiere)
    7342          30 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7343          30 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7344          30 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7345          30 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7346             :          CASE (cg_fletcher)
    7347         172 :             CALL dbcsr_dot(grad(i), step(i), num)
    7348         172 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7349             :          CASE (cg_liu_storey)
    7350          20 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7351          20 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7352          20 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7353          20 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7354             :          CASE (cg_dai_yuan)
    7355          34 :             CALL dbcsr_dot(grad(i), step(i), num)
    7356          34 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7357          34 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7358          34 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7359             :          CASE (cg_hager_zhang)
    7360          72 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7361          72 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7362          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7363          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_minus_prec_grad(i), num)
    7364          72 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num2)
    7365          72 :             CALL dbcsr_dot(prev_step(i), grad(i), num3)
    7366          72 :             my_numer2 = my_numer2 + num2
    7367          72 :             my_numer3 = my_numer3 + num3
    7368             :          CASE (cg_zero)
    7369          24 :             num = 0.0_dp
    7370          24 :             den = 1.0_dp
    7371             :          CASE DEFAULT
    7372         508 :             CPABORT("illegal conjugator")
    7373             :          END SELECT
    7374         508 :          my_numer = my_numer + num
    7375         508 :          my_denom = my_denom + den
    7376             : 
    7377        1016 :          CALL dbcsr_release(m_tmp_no_1)
    7378             : 
    7379             :       END DO ! i - nsize
    7380             : 
    7381        1016 :       DO i = 1, nsize
    7382             : 
    7383         508 :          SELECT CASE (conjugator)
    7384             :          CASE (cg_hestenes_stiefel, cg_dai_yuan)
    7385          96 :             beta = -1.0_dp*my_numer/my_denom
    7386             :          CASE (cg_fletcher_reeves, cg_polak_ribiere, cg_fletcher, cg_liu_storey)
    7387         316 :             beta = my_numer/my_denom
    7388             :          CASE (cg_hager_zhang)
    7389          72 :             kappa = -2.0_dp*my_numer/my_denom
    7390          72 :             tau = -1.0_dp*my_numer2/my_denom
    7391          72 :             beta = tau - kappa*my_numer3/my_denom
    7392             :          CASE (cg_zero)
    7393          24 :             beta = 0.0_dp
    7394             :          CASE DEFAULT
    7395         508 :             CPABORT("illegal conjugator")
    7396             :          END SELECT
    7397             : 
    7398             :       END DO ! i - nsize
    7399             : 
    7400         508 :       IF (beta .LT. 0.0_dp) THEN
    7401           0 :          IF (unit_nr > 0) THEN
    7402           0 :             WRITE (unit_nr, *) " Resetting conjugator because beta is negative: ", beta
    7403             :          END IF
    7404           0 :          reset_conjugator = .TRUE.
    7405             :       END IF
    7406             : 
    7407         508 :       IF (PRESENT(numer)) THEN
    7408           0 :          numer = my_numer
    7409             :       END IF
    7410         508 :       IF (PRESENT(denom)) THEN
    7411           0 :          denom = my_denom
    7412             :       END IF
    7413             : 
    7414         508 :       CALL timestop(handle)
    7415             : 
    7416         508 :    END SUBROUTINE compute_cg_beta
    7417             : 
    7418             : ! **************************************************************************************************
    7419             : !> \brief computes the step matrix from the gradient and Hessian using the Newton-Raphson method
    7420             : !> \param optimizer ...
    7421             : !> \param m_grad ...
    7422             : !> \param m_delta ...
    7423             : !> \param m_s ...
    7424             : !> \param m_ks ...
    7425             : !> \param m_siginv ...
    7426             : !> \param m_quench_t ...
    7427             : !> \param m_FTsiginv ...
    7428             : !> \param m_siginvTFTsiginv ...
    7429             : !> \param m_ST ...
    7430             : !> \param m_t ...
    7431             : !> \param m_sig_sqrti_ii ...
    7432             : !> \param domain_s_inv ...
    7433             : !> \param domain_r_down ...
    7434             : !> \param domain_map ...
    7435             : !> \param cpu_of_domain ...
    7436             : !> \param nocc_of_domain ...
    7437             : !> \param para_env ...
    7438             : !> \param blacs_env ...
    7439             : !> \param eps_filter ...
    7440             : !> \param optimize_theta ...
    7441             : !> \param penalty_occ_vol ...
    7442             : !> \param normalize_orbitals ...
    7443             : !> \param penalty_occ_vol_prefactor ...
    7444             : !> \param penalty_occ_vol_pf2 ...
    7445             : !> \param special_case ...
    7446             : !> \par History
    7447             : !>       2015.04 created [Rustam Z. Khaliullin]
    7448             : !> \author Rustam Z. Khaliullin
    7449             : ! **************************************************************************************************
    7450           0 :    SUBROUTINE newton_grad_to_step(optimizer, m_grad, m_delta, m_s, m_ks, &
    7451           0 :                                   m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_t, &
    7452           0 :                                   m_sig_sqrti_ii, domain_s_inv, domain_r_down, domain_map, cpu_of_domain, &
    7453           0 :                                   nocc_of_domain, para_env, blacs_env, eps_filter, optimize_theta, &
    7454           0 :                                   penalty_occ_vol, normalize_orbitals, penalty_occ_vol_prefactor, &
    7455           0 :                                   penalty_occ_vol_pf2, special_case)
    7456             : 
    7457             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    7458             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_grad
    7459             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_delta, m_s, m_ks, m_siginv, m_quench_t
    7460             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    7461             :                                                             m_t, m_sig_sqrti_ii
    7462             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
    7463             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    7464             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
    7465             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    7466             :       INTEGER, DIMENSION(:, :), INTENT(IN)               :: nocc_of_domain
    7467             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    7468             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    7469             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7470             :       LOGICAL, INTENT(IN)                                :: optimize_theta, penalty_occ_vol, &
    7471             :                                                             normalize_orbitals
    7472             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor, &
    7473             :                                                             penalty_occ_vol_pf2
    7474             :       INTEGER, INTENT(IN)                                :: special_case
    7475             : 
    7476             :       CHARACTER(len=*), PARAMETER :: routineN = 'newton_grad_to_step'
    7477             : 
    7478             :       CHARACTER(LEN=20)                                  :: iter_type
    7479             :       INTEGER                                            :: handle, ispin, iteration, max_iter, &
    7480             :                                                             ndomains, nspins, outer_iteration, &
    7481             :                                                             outer_max_iter, unit_nr
    7482             :       LOGICAL :: converged, do_exact_inversion, outer_prepare_to_exit, prepare_to_exit, &
    7483             :          reset_conjugator, use_preconditioner
    7484             :       REAL(KIND=dp)                                      :: alpha, beta, denom, denom_ispin, &
    7485             :                                                             eps_error_target, numer, numer_ispin, &
    7486             :                                                             residue_norm, spin_factor, t1, t2
    7487           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: residue_max_norm
    7488             :       TYPE(cp_logger_type), POINTER                      :: logger
    7489             :       TYPE(dbcsr_type)                                   :: m_tmp_oo_1, m_tmp_oo_2
    7490           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_f_vo, m_f_vv, m_Hstep, m_prec, &
    7491           0 :                                                             m_residue, m_residue_prev, m_s_vv, &
    7492           0 :                                                             m_step, m_STsiginv, m_zet, m_zet_prev
    7493             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    7494           0 :          DIMENSION(:, :)                                 :: domain_prec
    7495             : 
    7496           0 :       CALL timeset(routineN, handle)
    7497             : 
    7498             :       ! get a useful output_unit
    7499           0 :       logger => cp_get_default_logger()
    7500           0 :       IF (logger%para_env%is_source()) THEN
    7501           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7502             :       ELSE
    7503             :          unit_nr = -1
    7504             :       END IF
    7505             : 
    7506             :       !!! Currently for non-theta only
    7507           0 :       IF (optimize_theta) THEN
    7508           0 :          CPABORT("theta is NYI")
    7509             :       END IF
    7510             : 
    7511             :       ! set optimizer options
    7512           0 :       use_preconditioner = (optimizer%preconditioner .NE. xalmo_prec_zero)
    7513           0 :       outer_max_iter = optimizer%max_iter_outer_loop
    7514           0 :       max_iter = optimizer%max_iter
    7515           0 :       eps_error_target = optimizer%eps_error
    7516             : 
    7517             :       ! set key dimensions
    7518           0 :       nspins = SIZE(m_ks)
    7519           0 :       ndomains = SIZE(domain_s_inv, 1)
    7520             : 
    7521           0 :       IF (nspins == 1) THEN
    7522           0 :          spin_factor = 2.0_dp
    7523             :       ELSE
    7524           0 :          spin_factor = 1.0_dp
    7525             :       END IF
    7526             : 
    7527           0 :       ALLOCATE (domain_prec(ndomains, nspins))
    7528           0 :       CALL init_submatrices(domain_prec)
    7529             : 
    7530             :       ! allocate matrices
    7531           0 :       ALLOCATE (m_residue(nspins))
    7532           0 :       ALLOCATE (m_residue_prev(nspins))
    7533           0 :       ALLOCATE (m_step(nspins))
    7534           0 :       ALLOCATE (m_zet(nspins))
    7535           0 :       ALLOCATE (m_zet_prev(nspins))
    7536           0 :       ALLOCATE (m_Hstep(nspins))
    7537           0 :       ALLOCATE (m_prec(nspins))
    7538           0 :       ALLOCATE (m_s_vv(nspins))
    7539           0 :       ALLOCATE (m_f_vv(nspins))
    7540           0 :       ALLOCATE (m_f_vo(nspins))
    7541           0 :       ALLOCATE (m_STsiginv(nspins))
    7542             : 
    7543           0 :       ALLOCATE (residue_max_norm(nspins))
    7544             : 
    7545             :       ! initiate objects before iterations
    7546           0 :       DO ispin = 1, nspins
    7547             : 
    7548             :          ! init matrices
    7549             :          CALL dbcsr_create(m_residue(ispin), &
    7550             :                            template=m_quench_t(ispin), &
    7551           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7552             :          CALL dbcsr_create(m_residue_prev(ispin), &
    7553             :                            template=m_quench_t(ispin), &
    7554           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7555             :          CALL dbcsr_create(m_step(ispin), &
    7556             :                            template=m_quench_t(ispin), &
    7557           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7558             :          CALL dbcsr_create(m_zet_prev(ispin), &
    7559             :                            template=m_quench_t(ispin), &
    7560           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7561             :          CALL dbcsr_create(m_zet(ispin), &
    7562             :                            template=m_quench_t(ispin), &
    7563           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7564             :          CALL dbcsr_create(m_Hstep(ispin), &
    7565             :                            template=m_quench_t(ispin), &
    7566           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7567             :          CALL dbcsr_create(m_f_vo(ispin), &
    7568             :                            template=m_quench_t(ispin), &
    7569           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7570             :          CALL dbcsr_create(m_STsiginv(ispin), &
    7571             :                            template=m_quench_t(ispin), &
    7572           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7573             :          CALL dbcsr_create(m_f_vv(ispin), &
    7574             :                            template=m_ks(ispin), &
    7575           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7576             :          CALL dbcsr_create(m_s_vv(ispin), &
    7577             :                            template=m_s(1), &
    7578           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7579             :          CALL dbcsr_create(m_prec(ispin), &
    7580             :                            template=m_ks(ispin), &
    7581           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7582             : 
    7583             :          ! compute the full "gradient" - it is necessary to
    7584             :          ! evaluate Hessian.X
    7585           0 :          CALL dbcsr_copy(m_f_vo(ispin), m_FTsiginv(ispin))
    7586             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    7587             :                              m_ST(ispin), &
    7588             :                              m_siginvTFTsiginv(ispin), &
    7589             :                              1.0_dp, m_f_vo(ispin), &
    7590           0 :                              filter_eps=eps_filter)
    7591             : 
    7592             : ! RZK-warning
    7593             : ! compute preconditioner even if we do not use it
    7594             : ! this is for debugging because compute_preconditioner includes
    7595             : ! computing F_vv and S_vv necessary for
    7596             : !       IF ( use_preconditioner ) THEN
    7597             : 
    7598             : ! domain_s_inv and domain_r_down are never used with assume_t0_q0x=FALSE
    7599             :          CALL compute_preconditioner( &
    7600             :             domain_prec_out=domain_prec(:, ispin), &
    7601             :             m_prec_out=m_prec(ispin), &
    7602             :             m_ks=m_ks(ispin), &
    7603             :             m_s=m_s(1), &
    7604             :             m_siginv=m_siginv(ispin), &
    7605             :             m_quench_t=m_quench_t(ispin), &
    7606             :             m_FTsiginv=m_FTsiginv(ispin), &
    7607             :             m_siginvTFTsiginv=m_siginvTFTsiginv(ispin), &
    7608             :             m_ST=m_ST(ispin), &
    7609             :             m_STsiginv_out=m_STsiginv(ispin), &
    7610             :             m_s_vv_out=m_s_vv(ispin), &
    7611             :             m_f_vv_out=m_f_vv(ispin), &
    7612             :             para_env=para_env, &
    7613             :             blacs_env=blacs_env, &
    7614             :             nocc_of_domain=nocc_of_domain(:, ispin), &
    7615             :             domain_s_inv=domain_s_inv(:, ispin), &
    7616             :             domain_r_down=domain_r_down(:, ispin), &
    7617             :             cpu_of_domain=cpu_of_domain(:), &
    7618             :             domain_map=domain_map(ispin), &
    7619             :             assume_t0_q0x=.FALSE., &
    7620             :             penalty_occ_vol=penalty_occ_vol, &
    7621             :             penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7622             :             eps_filter=eps_filter, &
    7623             :             neg_thr=0.5_dp, &
    7624             :             spin_factor=spin_factor, &
    7625             :             special_case=special_case, &
    7626             :             skip_inversion=.FALSE. &
    7627           0 :             )
    7628             : 
    7629             : !       ENDIF ! use_preconditioner
    7630             : 
    7631             :          ! initial guess
    7632           0 :          CALL dbcsr_copy(m_delta(ispin), m_quench_t(ispin))
    7633             :          ! in order to use dbcsr_set matrix blocks must exist
    7634           0 :          CALL dbcsr_set(m_delta(ispin), 0.0_dp)
    7635           0 :          CALL dbcsr_copy(m_residue(ispin), m_grad(ispin))
    7636           0 :          CALL dbcsr_scale(m_residue(ispin), -1.0_dp)
    7637             : 
    7638           0 :          do_exact_inversion = .FALSE.
    7639             :          IF (do_exact_inversion) THEN
    7640             : 
    7641             :             ! copy grad to m_step temporarily
    7642             :             ! use m_step as input to the inversion routine
    7643             :             CALL dbcsr_copy(m_step(ispin), m_grad(ispin))
    7644             : 
    7645             :             ! expensive "exact" inversion of the "nearly-exact" Hessian
    7646             :             ! hopefully returns Z=-H^(-1).G
    7647             :             CALL hessian_diag_apply( &
    7648             :                matrix_grad=m_step(ispin), &
    7649             :                matrix_step=m_zet(ispin), &
    7650             :                matrix_S_ao=m_s_vv(ispin), &
    7651             :                matrix_F_ao=m_f_vv(ispin), &
    7652             :                !matrix_S_ao=m_s(ispin),&
    7653             :                !matrix_F_ao=m_ks(ispin),&
    7654             :                matrix_S_mo=m_siginv(ispin), &
    7655             :                matrix_F_mo=m_siginvTFTsiginv(ispin), &
    7656             :                matrix_S_vo=m_STsiginv(ispin), &
    7657             :                matrix_F_vo=m_f_vo(ispin), &
    7658             :                quench_t=m_quench_t(ispin), &
    7659             :                spin_factor=spin_factor, &
    7660             :                eps_zero=eps_filter*10.0_dp, &
    7661             :                penalty_occ_vol=penalty_occ_vol, &
    7662             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7663             :                penalty_occ_vol_pf2=penalty_occ_vol_pf2(ispin), &
    7664             :                m_s=m_s(1), &
    7665             :                para_env=para_env, &
    7666             :                blacs_env=blacs_env &
    7667             :                )
    7668             :             ! correct solution by the spin factor
    7669             :             !CALL dbcsr_scale(m_zet(ispin),1.0_dp/(2.0_dp*spin_factor))
    7670             : 
    7671             :          ELSE ! use PCG to solve H.D=-G
    7672             : 
    7673           0 :             IF (use_preconditioner) THEN
    7674             : 
    7675           0 :                IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7676             :                    special_case .EQ. xalmo_case_fully_deloc) THEN
    7677             : 
    7678             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7679             :                                       m_prec(ispin), &
    7680             :                                       m_residue(ispin), &
    7681             :                                       0.0_dp, m_zet(ispin), &
    7682           0 :                                       filter_eps=eps_filter)
    7683             : 
    7684             :                ELSE
    7685             : 
    7686             :                   CALL apply_domain_operators( &
    7687             :                      matrix_in=m_residue(ispin), &
    7688             :                      matrix_out=m_zet(ispin), &
    7689             :                      operator1=domain_prec(:, ispin), &
    7690             :                      dpattern=m_quench_t(ispin), &
    7691             :                      map=domain_map(ispin), &
    7692             :                      node_of_domain=cpu_of_domain(:), &
    7693             :                      my_action=0, &
    7694             :                      filter_eps=eps_filter &
    7695             :                      !matrix_trimmer=,&
    7696             :                      !use_trimmer=.FALSE.,&
    7697           0 :                      )
    7698             : 
    7699             :                END IF ! special_case
    7700             : 
    7701             :             ELSE ! do not use preconditioner
    7702             : 
    7703           0 :                CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7704             : 
    7705             :             END IF ! use_preconditioner
    7706             : 
    7707             :          END IF ! do_exact_inversion
    7708             : 
    7709           0 :          CALL dbcsr_copy(m_step(ispin), m_zet(ispin))
    7710             : 
    7711             :       END DO !ispin
    7712             : 
    7713             :       ! start the outer SCF loop
    7714           0 :       outer_prepare_to_exit = .FALSE.
    7715           0 :       outer_iteration = 0
    7716           0 :       residue_norm = 0.0_dp
    7717             : 
    7718             :       DO
    7719             : 
    7720             :          ! start the inner SCF loop
    7721           0 :          prepare_to_exit = .FALSE.
    7722           0 :          converged = .FALSE.
    7723           0 :          iteration = 0
    7724           0 :          t1 = m_walltime()
    7725             : 
    7726             :          DO
    7727             : 
    7728             :             ! apply hessian to the step matrix
    7729             :             CALL apply_hessian( &
    7730             :                m_x_in=m_step, &
    7731             :                m_x_out=m_Hstep, &
    7732             :                m_ks=m_ks, &
    7733             :                m_s=m_s, &
    7734             :                m_siginv=m_siginv, &
    7735             :                m_quench_t=m_quench_t, &
    7736             :                m_FTsiginv=m_FTsiginv, &
    7737             :                m_siginvTFTsiginv=m_siginvTFTsiginv, &
    7738             :                m_ST=m_ST, &
    7739             :                m_STsiginv=m_STsiginv, &
    7740             :                m_s_vv=m_s_vv, &
    7741             :                m_ks_vv=m_f_vv, &
    7742             :                !m_s_vv=m_s,&
    7743             :                !m_ks_vv=m_ks,&
    7744             :                m_g_full=m_f_vo, &
    7745             :                m_t=m_t, &
    7746             :                m_sig_sqrti_ii=m_sig_sqrti_ii, &
    7747             :                penalty_occ_vol=penalty_occ_vol, &
    7748             :                normalize_orbitals=normalize_orbitals, &
    7749             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor, &
    7750             :                eps_filter=eps_filter, &
    7751             :                path_num=hessian_path_reuse &
    7752           0 :                )
    7753             : 
    7754             :             ! alpha is computed outside the spin loop
    7755           0 :             numer = 0.0_dp
    7756           0 :             denom = 0.0_dp
    7757           0 :             DO ispin = 1, nspins
    7758             : 
    7759           0 :                CALL dbcsr_dot(m_residue(ispin), m_zet(ispin), numer_ispin)
    7760           0 :                CALL dbcsr_dot(m_step(ispin), m_Hstep(ispin), denom_ispin)
    7761             : 
    7762           0 :                numer = numer + numer_ispin
    7763           0 :                denom = denom + denom_ispin
    7764             : 
    7765             :             END DO !ispin
    7766             : 
    7767           0 :             alpha = numer/denom
    7768             : 
    7769           0 :             DO ispin = 1, nspins
    7770             : 
    7771             :                ! update the variable
    7772           0 :                CALL dbcsr_add(m_delta(ispin), m_step(ispin), 1.0_dp, alpha)
    7773           0 :                CALL dbcsr_copy(m_residue_prev(ispin), m_residue(ispin))
    7774             :                CALL dbcsr_add(m_residue(ispin), m_Hstep(ispin), &
    7775           0 :                               1.0_dp, -1.0_dp*alpha)
    7776           0 :                residue_max_norm(ispin) = dbcsr_maxabs(m_residue(ispin))
    7777             : 
    7778             :             END DO ! ispin
    7779             : 
    7780             :             ! check convergence and other exit criteria
    7781           0 :             residue_norm = MAXVAL(residue_max_norm)
    7782           0 :             converged = (residue_norm .LT. eps_error_target)
    7783           0 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    7784             :                prepare_to_exit = .TRUE.
    7785             :             END IF
    7786             : 
    7787           0 :             IF (.NOT. prepare_to_exit) THEN
    7788             : 
    7789           0 :                DO ispin = 1, nspins
    7790             : 
    7791             :                   ! save current z before the update
    7792           0 :                   CALL dbcsr_copy(m_zet_prev(ispin), m_zet(ispin))
    7793             : 
    7794             :                   ! compute the new step (apply preconditioner if available)
    7795           0 :                   IF (use_preconditioner) THEN
    7796             : 
    7797             :                      !IF (unit_nr>0) THEN
    7798             :                      !   WRITE(unit_nr,*) "....applying preconditioner...."
    7799             :                      !ENDIF
    7800             : 
    7801           0 :                      IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7802             :                          special_case .EQ. xalmo_case_fully_deloc) THEN
    7803             : 
    7804             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7805             :                                             m_prec(ispin), &
    7806             :                                             m_residue(ispin), &
    7807             :                                             0.0_dp, m_zet(ispin), &
    7808           0 :                                             filter_eps=eps_filter)
    7809             : 
    7810             :                      ELSE
    7811             : 
    7812             :                         CALL apply_domain_operators( &
    7813             :                            matrix_in=m_residue(ispin), &
    7814             :                            matrix_out=m_zet(ispin), &
    7815             :                            operator1=domain_prec(:, ispin), &
    7816             :                            dpattern=m_quench_t(ispin), &
    7817             :                            map=domain_map(ispin), &
    7818             :                            node_of_domain=cpu_of_domain(:), &
    7819             :                            my_action=0, &
    7820             :                            filter_eps=eps_filter &
    7821             :                            !matrix_trimmer=,&
    7822             :                            !use_trimmer=.FALSE.,&
    7823           0 :                            )
    7824             : 
    7825             :                      END IF ! special case
    7826             : 
    7827             :                   ELSE
    7828             : 
    7829           0 :                      CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7830             : 
    7831             :                   END IF
    7832             : 
    7833             :                END DO !ispin
    7834             : 
    7835             :                ! compute the conjugation coefficient - beta
    7836             :                CALL compute_cg_beta( &
    7837             :                   beta=beta, &
    7838             :                   reset_conjugator=reset_conjugator, &
    7839             :                   conjugator=cg_fletcher, &
    7840             :                   grad=m_residue, &
    7841             :                   prev_grad=m_residue_prev, &
    7842             :                   step=m_zet, &
    7843           0 :                   prev_step=m_zet_prev)
    7844             : 
    7845           0 :                DO ispin = 1, nspins
    7846             : 
    7847             :                   ! conjugate the step direction
    7848           0 :                   CALL dbcsr_add(m_step(ispin), m_zet(ispin), beta, 1.0_dp)
    7849             : 
    7850             :                END DO !ispin
    7851             : 
    7852             :             END IF ! not.prepare_to_exit
    7853             : 
    7854           0 :             t2 = m_walltime()
    7855           0 :             IF (unit_nr > 0) THEN
    7856             :                !iter_type=TRIM("ALMO SCF "//iter_type)
    7857           0 :                iter_type = TRIM("NR STEP")
    7858             :                WRITE (unit_nr, '(T6,A9,I6,F14.5,F14.5,F15.10,F9.2)') &
    7859           0 :                   iter_type, iteration, &
    7860           0 :                   alpha, beta, residue_norm, &
    7861           0 :                   t2 - t1
    7862             :             END IF
    7863           0 :             t1 = m_walltime()
    7864             : 
    7865           0 :             iteration = iteration + 1
    7866           0 :             IF (prepare_to_exit) EXIT
    7867             : 
    7868             :          END DO ! inner loop
    7869             : 
    7870           0 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    7871           0 :             outer_prepare_to_exit = .TRUE.
    7872             :          END IF
    7873             : 
    7874           0 :          outer_iteration = outer_iteration + 1
    7875           0 :          IF (outer_prepare_to_exit) EXIT
    7876             : 
    7877             :       END DO ! outer loop
    7878             : 
    7879             : ! is not necessary if penalty_occ_vol_pf2=0.0
    7880             : #if 0
    7881             : 
    7882             :       IF (penalty_occ_vol) THEN
    7883             : 
    7884             :          DO ispin = 1, nspins
    7885             : 
    7886             :             CALL dbcsr_copy(m_zet(ispin), m_grad(ispin))
    7887             :             CALL dbcsr_dot(m_delta(ispin), m_zet(ispin), alpha)
    7888             :             WRITE (unit_nr, *) "trace(grad.delta): ", alpha
    7889             :             alpha = -1.0_dp/(penalty_occ_vol_pf2(ispin)*alpha - 1.0_dp)
    7890             :             WRITE (unit_nr, *) "correction alpha: ", alpha
    7891             :             CALL dbcsr_scale(m_delta(ispin), alpha)
    7892             : 
    7893             :          END DO
    7894             : 
    7895             :       END IF
    7896             : 
    7897             : #endif
    7898             : 
    7899           0 :       DO ispin = 1, nspins
    7900             : 
    7901             :          ! check whether the step lies entirely in R or Q
    7902             :          CALL dbcsr_create(m_tmp_oo_1, &
    7903             :                            template=m_siginv(ispin), &
    7904           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7905             :          CALL dbcsr_create(m_tmp_oo_2, &
    7906             :                            template=m_siginv(ispin), &
    7907           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7908             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    7909             :                              m_ST(ispin), &
    7910             :                              m_delta(ispin), &
    7911             :                              0.0_dp, m_tmp_oo_1, &
    7912           0 :                              filter_eps=eps_filter)
    7913             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7914             :                              m_siginv(ispin), &
    7915             :                              m_tmp_oo_1, &
    7916             :                              0.0_dp, m_tmp_oo_2, &
    7917           0 :                              filter_eps=eps_filter)
    7918           0 :          CALL dbcsr_copy(m_zet(ispin), m_quench_t(ispin))
    7919             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7920             :                              m_t(ispin), &
    7921             :                              m_tmp_oo_2, &
    7922             :                              0.0_dp, m_zet(ispin), &
    7923           0 :                              retain_sparsity=.TRUE.)
    7924           0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7925           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Occupied-space projection of the step", alpha
    7926           0 :          CALL dbcsr_add(m_zet(ispin), m_delta(ispin), -1.0_dp, 1.0_dp)
    7927           0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7928           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Virtual-space projection of the step", alpha
    7929           0 :          alpha = dbcsr_maxabs(m_delta(ispin))
    7930           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Full step", alpha
    7931           0 :          CALL dbcsr_release(m_tmp_oo_1)
    7932           0 :          CALL dbcsr_release(m_tmp_oo_2)
    7933             : 
    7934             :       END DO
    7935             : 
    7936             :       ! clean up
    7937           0 :       DO ispin = 1, nspins
    7938           0 :          CALL release_submatrices(domain_prec(:, ispin))
    7939           0 :          CALL dbcsr_release(m_residue(ispin))
    7940           0 :          CALL dbcsr_release(m_residue_prev(ispin))
    7941           0 :          CALL dbcsr_release(m_step(ispin))
    7942           0 :          CALL dbcsr_release(m_zet(ispin))
    7943           0 :          CALL dbcsr_release(m_zet_prev(ispin))
    7944           0 :          CALL dbcsr_release(m_Hstep(ispin))
    7945           0 :          CALL dbcsr_release(m_f_vo(ispin))
    7946           0 :          CALL dbcsr_release(m_f_vv(ispin))
    7947           0 :          CALL dbcsr_release(m_s_vv(ispin))
    7948           0 :          CALL dbcsr_release(m_prec(ispin))
    7949           0 :          CALL dbcsr_release(m_STsiginv(ispin))
    7950             :       END DO !ispin
    7951           0 :       DEALLOCATE (domain_prec)
    7952           0 :       DEALLOCATE (m_residue)
    7953           0 :       DEALLOCATE (m_residue_prev)
    7954           0 :       DEALLOCATE (m_step)
    7955           0 :       DEALLOCATE (m_zet)
    7956           0 :       DEALLOCATE (m_zet_prev)
    7957           0 :       DEALLOCATE (m_prec)
    7958           0 :       DEALLOCATE (m_Hstep)
    7959           0 :       DEALLOCATE (m_s_vv)
    7960           0 :       DEALLOCATE (m_f_vv)
    7961           0 :       DEALLOCATE (m_f_vo)
    7962           0 :       DEALLOCATE (m_STsiginv)
    7963           0 :       DEALLOCATE (residue_max_norm)
    7964             : 
    7965           0 :       IF (.NOT. converged) THEN
    7966           0 :          CPABORT("Optimization not converged!")
    7967             :       END IF
    7968             : 
    7969             :       ! check that the step satisfies H.step=-grad
    7970             : 
    7971           0 :       CALL timestop(handle)
    7972             : 
    7973           0 :    END SUBROUTINE newton_grad_to_step
    7974             : 
    7975             : ! *****************************************************************************
    7976             : !> \brief Computes Hessian.X
    7977             : !> \param m_x_in ...
    7978             : !> \param m_x_out ...
    7979             : !> \param m_ks ...
    7980             : !> \param m_s ...
    7981             : !> \param m_siginv ...
    7982             : !> \param m_quench_t ...
    7983             : !> \param m_FTsiginv ...
    7984             : !> \param m_siginvTFTsiginv ...
    7985             : !> \param m_ST ...
    7986             : !> \param m_STsiginv ...
    7987             : !> \param m_s_vv ...
    7988             : !> \param m_ks_vv ...
    7989             : !> \param m_g_full ...
    7990             : !> \param m_t ...
    7991             : !> \param m_sig_sqrti_ii ...
    7992             : !> \param penalty_occ_vol ...
    7993             : !> \param normalize_orbitals ...
    7994             : !> \param penalty_occ_vol_prefactor ...
    7995             : !> \param eps_filter ...
    7996             : !> \param path_num ...
    7997             : !> \par History
    7998             : !>       2015.04 created [Rustam Z Khaliullin]
    7999             : !> \author Rustam Z Khaliullin
    8000             : ! **************************************************************************************************
    8001           0 :    SUBROUTINE apply_hessian(m_x_in, m_x_out, m_ks, m_s, m_siginv, &
    8002           0 :                             m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv, m_s_vv, &
    8003           0 :                             m_ks_vv, m_g_full, m_t, m_sig_sqrti_ii, penalty_occ_vol, &
    8004           0 :                             normalize_orbitals, penalty_occ_vol_prefactor, eps_filter, path_num)
    8005             : 
    8006             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_x_in, m_x_out, m_ks, m_s
    8007             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_siginv, m_quench_t, m_FTsiginv, &
    8008             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv
    8009             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_s_vv, m_ks_vv, m_g_full
    8010             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_t, m_sig_sqrti_ii
    8011             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol, normalize_orbitals
    8012             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor
    8013             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    8014             :       INTEGER, INTENT(IN)                                :: path_num
    8015             : 
    8016             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'apply_hessian'
    8017             : 
    8018             :       INTEGER                                            :: dim0, handle, ispin, nspins
    8019             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, spin_factor
    8020           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    8021             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_oo_1, &
    8022             :                                                             m_tmp_x_in
    8023             : 
    8024           0 :       CALL timeset(routineN, handle)
    8025             : 
    8026             :       !JHU: test and use for unused debug variables
    8027           0 :       IF (penalty_occ_vol) penalty_prefactor_local = 1._dp
    8028           0 :       CPASSERT(SIZE(m_STsiginv) >= 0)
    8029           0 :       CPASSERT(SIZE(m_siginvTFTsiginv) >= 0)
    8030           0 :       CPASSERT(SIZE(m_s) >= 0)
    8031           0 :       CPASSERT(SIZE(m_g_full) >= 0)
    8032           0 :       CPASSERT(SIZE(m_FTsiginv) >= 0)
    8033             :       MARK_USED(m_siginvTFTsiginv)
    8034             :       MARK_USED(m_STsiginv)
    8035             :       MARK_USED(m_FTsiginv)
    8036             :       MARK_USED(m_g_full)
    8037             :       MARK_USED(m_s)
    8038             : 
    8039           0 :       nspins = SIZE(m_ks)
    8040             : 
    8041           0 :       IF (nspins .EQ. 1) THEN
    8042             :          spin_factor = 2.0_dp
    8043             :       ELSE
    8044           0 :          spin_factor = 1.0_dp
    8045             :       END IF
    8046             : 
    8047           0 :       DO ispin = 1, nspins
    8048             : 
    8049           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor(ispin)/(2.0_dp*spin_factor)
    8050             : 
    8051             :          CALL dbcsr_create(m_tmp_oo_1, &
    8052             :                            template=m_siginv(ispin), &
    8053           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8054             :          CALL dbcsr_create(m_tmp_no_1, &
    8055             :                            template=m_quench_t(ispin), &
    8056           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8057             :          CALL dbcsr_create(m_tmp_no_2, &
    8058             :                            template=m_quench_t(ispin), &
    8059           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8060             :          CALL dbcsr_create(m_tmp_x_in, &
    8061             :                            template=m_quench_t(ispin), &
    8062           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8063             : 
    8064             :          ! transform the input X to take into account the normalization constraint
    8065           0 :          IF (normalize_orbitals) THEN
    8066             : 
    8067             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8068             : 
    8069             :             ! get [tr(T).HD]_ii
    8070           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8071             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8072             :                                 m_x_in(ispin), &
    8073             :                                 m_ST(ispin), &
    8074             :                                 0.0_dp, m_tmp_oo_1, &
    8075           0 :                                 retain_sparsity=.TRUE.)
    8076           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8077           0 :             ALLOCATE (tg_diagonal(dim0))
    8078           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8079           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8080           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8081           0 :             DEALLOCATE (tg_diagonal)
    8082             : 
    8083           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_in(ispin))
    8084             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8085             :                                 m_t(ispin), &
    8086             :                                 m_tmp_oo_1, &
    8087             :                                 1.0_dp, m_tmp_no_1, &
    8088           0 :                                 filter_eps=eps_filter)
    8089             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8090             :                                 m_tmp_no_1, &
    8091             :                                 m_sig_sqrti_ii(ispin), &
    8092             :                                 0.0_dp, m_tmp_x_in, &
    8093           0 :                                 filter_eps=eps_filter)
    8094             : 
    8095             :          ELSE
    8096             : 
    8097           0 :             CALL dbcsr_copy(m_tmp_x_in, m_x_in(ispin))
    8098             : 
    8099             :          END IF ! normalize_orbitals
    8100             : 
    8101           0 :          IF (path_num .EQ. hessian_path_reuse) THEN
    8102             : 
    8103             :             ! apply pre-computed F_vv and S_vv to X
    8104             : 
    8105             : #if 0
    8106             : ! RZK-warning: negative sign at penalty_prefactor_local is that
    8107             : ! magical fix for the negative definite problem
    8108             : ! (since penalty_prefactor_local<0 the coeff before S_vv must
    8109             : ! be multiplied by -1 to take the step in the right direction)
    8110             : !CALL dbcsr_multiply("N","N",-4.0_dp*penalty_prefactor_local,&
    8111             : !        m_s_vv(ispin),&
    8112             : !        m_tmp_x_in,&
    8113             : !        0.0_dp,m_tmp_no_1,&
    8114             : !        filter_eps=eps_filter)
    8115             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8116             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8117             : !        m_tmp_no_1,&
    8118             : !        m_siginv(ispin),&
    8119             : !        0.0_dp,m_x_out(ispin),&
    8120             : !        retain_sparsity=.TRUE.)
    8121             : 
    8122             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8123             :                                 m_s(1), &
    8124             :                                 m_tmp_x_in, &
    8125             :                                 0.0_dp, m_tmp_no_1, &
    8126             :                                 filter_eps=eps_filter)
    8127             :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8128             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8129             :                                 m_tmp_no_1, &
    8130             :                                 m_siginv(ispin), &
    8131             :                                 0.0_dp, m_x_out(ispin), &
    8132             :                                 retain_sparsity=.TRUE.)
    8133             : 
    8134             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8135             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8136             : !        m_s(1),&
    8137             : !        m_tmp_x_in,&
    8138             : !        0.0_dp,m_x_out(ispin),&
    8139             : !        retain_sparsity=.TRUE.)
    8140             : 
    8141             : #else
    8142             : 
    8143             :             ! debugging: only vv matrices, oo matrices are kronecker
    8144           0 :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8145             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8146             :                                 m_ks_vv(ispin), &
    8147             :                                 m_tmp_x_in, &
    8148             :                                 0.0_dp, m_x_out(ispin), &
    8149           0 :                                 retain_sparsity=.TRUE.)
    8150             : 
    8151           0 :             CALL dbcsr_copy(m_tmp_no_2, m_quench_t(ispin))
    8152             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8153             :                                 m_s_vv(ispin), &
    8154             :                                 m_tmp_x_in, &
    8155             :                                 0.0_dp, m_tmp_no_2, &
    8156           0 :                                 retain_sparsity=.TRUE.)
    8157             :             CALL dbcsr_add(m_x_out(ispin), m_tmp_no_2, &
    8158           0 :                            1.0_dp, -4.0_dp*penalty_prefactor_local + 1.0_dp)
    8159             : #endif
    8160             : 
    8161             : !          ! F_vv.X.S_oo
    8162             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8163             : !                  m_ks_vv(ispin),&
    8164             : !                  m_tmp_x_in,&
    8165             : !                  0.0_dp,m_tmp_no_1,&
    8166             : !                  filter_eps=eps_filter,&
    8167             : !                  )
    8168             : !          CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8169             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8170             : !                  m_tmp_no_1,&
    8171             : !                  m_siginv(ispin),&
    8172             : !                  0.0_dp,m_x_out(ispin),&
    8173             : !                  retain_sparsity=.TRUE.,&
    8174             : !                  )
    8175             : !
    8176             : !          ! S_vv.X.F_oo
    8177             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8178             : !                  m_s_vv(ispin),&
    8179             : !                  m_tmp_x_in,&
    8180             : !                  0.0_dp,m_tmp_no_1,&
    8181             : !                  filter_eps=eps_filter,&
    8182             : !                  )
    8183             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8184             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8185             : !                  m_tmp_no_1,&
    8186             : !                  m_siginvTFTsiginv(ispin),&
    8187             : !                  0.0_dp,m_tmp_no_2,&
    8188             : !                  retain_sparsity=.TRUE.,&
    8189             : !                  )
    8190             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8191             : !               1.0_dp,-1.0_dp)
    8192             : !! we have to add occ voll penalty here (the Svv termi (i.e. both Svv.D.Soo)
    8193             : !!  and STsiginv terms)
    8194             : !
    8195             : !         ! S_vo.X^t.F_vo
    8196             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8197             : !                  m_tmp_x_in,&
    8198             : !                  m_g_full(ispin),&
    8199             : !                  0.0_dp,m_tmp_oo_1,&
    8200             : !                  filter_eps=eps_filter,&
    8201             : !                  )
    8202             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8203             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8204             : !                  m_STsiginv(ispin),&
    8205             : !                  m_tmp_oo_1,&
    8206             : !                  0.0_dp,m_tmp_no_2,&
    8207             : !                  retain_sparsity=.TRUE.,&
    8208             : !                  )
    8209             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8210             : !                  1.0_dp,-1.0_dp)
    8211             : !
    8212             : !          ! S_vo.X^t.F_vo
    8213             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8214             : !                  m_tmp_x_in,&
    8215             : !                  m_STsiginv(ispin),&
    8216             : !                  0.0_dp,m_tmp_oo_1,&
    8217             : !                  filter_eps=eps_filter,&
    8218             : !                  )
    8219             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8220             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8221             : !                  m_g_full(ispin),&
    8222             : !                  m_tmp_oo_1,&
    8223             : !                  0.0_dp,m_tmp_no_2,&
    8224             : !                  retain_sparsity=.TRUE.,&
    8225             : !                  )
    8226             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8227             : !                  1.0_dp,-1.0_dp)
    8228             : 
    8229           0 :          ELSE IF (path_num .EQ. hessian_path_assemble) THEN
    8230             : 
    8231             :             ! compute F_vv.X and S_vv.X directly
    8232             :             ! this path will be advantageous if the number
    8233             :             ! of PCG iterations is small
    8234           0 :             CPABORT("path is NYI")
    8235             : 
    8236             :          ELSE
    8237           0 :             CPABORT("illegal path")
    8238             :          END IF ! path
    8239             : 
    8240             :          ! transform the output to take into account the normalization constraint
    8241           0 :          IF (normalize_orbitals) THEN
    8242             : 
    8243             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8244             : 
    8245             :             ! get [tr(T).HD]_ii
    8246           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8247             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8248             :                                 m_t(ispin), &
    8249             :                                 m_x_out(ispin), &
    8250             :                                 0.0_dp, m_tmp_oo_1, &
    8251           0 :                                 retain_sparsity=.TRUE.)
    8252           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8253           0 :             ALLOCATE (tg_diagonal(dim0))
    8254           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8255           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8256           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8257           0 :             DEALLOCATE (tg_diagonal)
    8258             : 
    8259             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8260             :                                 m_ST(ispin), &
    8261             :                                 m_tmp_oo_1, &
    8262             :                                 1.0_dp, m_x_out(ispin), &
    8263           0 :                                 retain_sparsity=.TRUE.)
    8264           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_out(ispin))
    8265             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8266             :                                 m_tmp_no_1, &
    8267             :                                 m_sig_sqrti_ii(ispin), &
    8268             :                                 0.0_dp, m_x_out(ispin), &
    8269           0 :                                 retain_sparsity=.TRUE.)
    8270             : 
    8271             :          END IF ! normalize_orbitals
    8272             : 
    8273             :          CALL dbcsr_scale(m_x_out(ispin), &
    8274           0 :                           2.0_dp*spin_factor)
    8275             : 
    8276           0 :          CALL dbcsr_release(m_tmp_oo_1)
    8277           0 :          CALL dbcsr_release(m_tmp_no_1)
    8278           0 :          CALL dbcsr_release(m_tmp_no_2)
    8279           0 :          CALL dbcsr_release(m_tmp_x_in)
    8280             : 
    8281             :       END DO !ispin
    8282             : 
    8283             :       ! there is one more part of the hessian that comes
    8284             :       ! from T-dependence of the KS matrix
    8285             :       ! it is neglected here
    8286             : 
    8287           0 :       CALL timestop(handle)
    8288             : 
    8289           0 :    END SUBROUTINE apply_hessian
    8290             : 
    8291             : ! *****************************************************************************
    8292             : !> \brief Serial code that constructs an approximate Hessian
    8293             : !> \param matrix_grad ...
    8294             : !> \param matrix_step ...
    8295             : !> \param matrix_S_ao ...
    8296             : !> \param matrix_F_ao ...
    8297             : !> \param matrix_S_mo ...
    8298             : !> \param matrix_F_mo ...
    8299             : !> \param matrix_S_vo ...
    8300             : !> \param matrix_F_vo ...
    8301             : !> \param quench_t ...
    8302             : !> \param penalty_occ_vol ...
    8303             : !> \param penalty_occ_vol_prefactor ...
    8304             : !> \param penalty_occ_vol_pf2 ...
    8305             : !> \param spin_factor ...
    8306             : !> \param eps_zero ...
    8307             : !> \param m_s ...
    8308             : !> \param para_env ...
    8309             : !> \param blacs_env ...
    8310             : !> \par History
    8311             : !>       2012.02 created [Rustam Z. Khaliullin]
    8312             : !> \author Rustam Z. Khaliullin
    8313             : ! **************************************************************************************************
    8314           0 :    SUBROUTINE hessian_diag_apply(matrix_grad, matrix_step, matrix_S_ao, &
    8315             :                                  matrix_F_ao, matrix_S_mo, matrix_F_mo, matrix_S_vo, matrix_F_vo, quench_t, &
    8316             :                                  penalty_occ_vol, penalty_occ_vol_prefactor, penalty_occ_vol_pf2, &
    8317             :                                  spin_factor, eps_zero, m_s, para_env, blacs_env)
    8318             : 
    8319             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_grad, matrix_step, matrix_S_ao, &
    8320             :                                                             matrix_F_ao, matrix_S_mo
    8321             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_F_mo
    8322             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_S_vo, matrix_F_vo, quench_t
    8323             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol
    8324             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    8325             :                                                             penalty_occ_vol_pf2, spin_factor, &
    8326             :                                                             eps_zero
    8327             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_s
    8328             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    8329             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    8330             : 
    8331             :       CHARACTER(len=*), PARAMETER :: routineN = 'hessian_diag_apply'
    8332             : 
    8333             :       INTEGER :: ao_hori_offset, ao_vert_offset, block_col, block_row, col, H_size, handle, ii, &
    8334             :          INFO, jj, lev1_hori_offset, lev1_vert_offset, lev2_hori_offset, lev2_vert_offset, LWORK, &
    8335             :          nblkcols_tot, nblkrows_tot, ncores, orb_i, orb_j, row, unit_nr, zero_neg_eiv
    8336           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, ao_domain_sizes, &
    8337           0 :                                                             mo_block_sizes
    8338           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    8339             :       LOGICAL                                            :: found, found_col, found_row
    8340             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, test_error
    8341           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, Grad_vec, Step_vec, tmp, &
    8342           0 :                                                             tmpr, work
    8343           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: F_ao_block, F_mo_block, H, Hinv, &
    8344           0 :                                                             S_ao_block, S_mo_block, test, test2
    8345           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p, p_new_block
    8346             :       TYPE(cp_logger_type), POINTER                      :: logger
    8347             :       TYPE(dbcsr_distribution_type)                      :: main_dist
    8348             :       TYPE(dbcsr_type)                                   :: matrix_F_ao_sym, matrix_F_mo_sym, &
    8349             :                                                             matrix_S_ao_sym, matrix_S_mo_sym
    8350             : 
    8351           0 :       CALL timeset(routineN, handle)
    8352             : 
    8353             :       ! get a useful output_unit
    8354           0 :       logger => cp_get_default_logger()
    8355           0 :       IF (logger%para_env%is_source()) THEN
    8356           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    8357             :       ELSE
    8358             :          unit_nr = -1
    8359             :       END IF
    8360             : 
    8361             :       !JHU use and test for unused debug variables
    8362           0 :       CPASSERT(ASSOCIATED(blacs_env))
    8363           0 :       CPASSERT(ASSOCIATED(para_env))
    8364             :       MARK_USED(blacs_env)
    8365             :       MARK_USED(para_env)
    8366             : 
    8367           0 :       CALL dbcsr_get_info(m_s, row_blk_size=ao_blk_sizes)
    8368           0 :       CALL dbcsr_get_info(matrix_S_vo, row_blk_size=ao_blk_sizes)
    8369           0 :       CALL dbcsr_get_info(matrix_F_vo, row_blk_size=ao_blk_sizes)
    8370             : 
    8371             :       ! serial code only
    8372           0 :       CALL dbcsr_get_info(matrix=matrix_S_ao, distribution=main_dist)
    8373           0 :       CALL dbcsr_distribution_get(main_dist, numnodes=ncores)
    8374           0 :       IF (ncores .GT. 1) THEN
    8375           0 :          CPABORT("serial code only")
    8376             :       END IF
    8377             : 
    8378           0 :       nblkrows_tot = dbcsr_nblkrows_total(quench_t)
    8379           0 :       nblkcols_tot = dbcsr_nblkcols_total(quench_t)
    8380           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    8381           0 :       CALL dbcsr_get_info(quench_t, row_blk_size=ao_blk_sizes)
    8382           0 :       CALL dbcsr_get_info(quench_t, col_blk_size=mo_blk_sizes)
    8383           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    8384           0 :       ALLOCATE (ao_domain_sizes(nblkcols_tot))
    8385           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    8386           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    8387           0 :       ao_domain_sizes(:) = 0
    8388             : 
    8389             :       CALL dbcsr_create(matrix_S_ao_sym, &
    8390             :                         template=matrix_S_ao, &
    8391           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8392           0 :       CALL dbcsr_desymmetrize(matrix_S_ao, matrix_S_ao_sym)
    8393           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    8394             : 
    8395             :       CALL dbcsr_create(matrix_F_ao_sym, &
    8396             :                         template=matrix_F_ao, &
    8397           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8398           0 :       CALL dbcsr_desymmetrize(matrix_F_ao, matrix_F_ao_sym)
    8399           0 :       CALL dbcsr_scale(matrix_F_ao_sym, 2.0_dp*spin_factor)
    8400             : 
    8401             :       CALL dbcsr_create(matrix_S_mo_sym, &
    8402             :                         template=matrix_S_mo, &
    8403           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8404           0 :       CALL dbcsr_desymmetrize(matrix_S_mo, matrix_S_mo_sym)
    8405             : 
    8406             :       CALL dbcsr_create(matrix_F_mo_sym, &
    8407             :                         template=matrix_F_mo, &
    8408           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8409           0 :       CALL dbcsr_desymmetrize(matrix_F_mo, matrix_F_mo_sym)
    8410             : 
    8411           0 :       IF (penalty_occ_vol) THEN
    8412           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor/(2.0_dp*spin_factor)
    8413             :       ELSE
    8414           0 :          penalty_prefactor_local = 0.0_dp
    8415             :       END IF
    8416             : 
    8417           0 :       WRITE (unit_nr, *) "penalty_prefactor_local: ", penalty_prefactor_local
    8418           0 :       WRITE (unit_nr, *) "penalty_prefactor_2: ", penalty_occ_vol_pf2
    8419             : 
    8420             :       !CALL dbcsr_print(matrix_grad)
    8421             :       !CALL dbcsr_print(matrix_F_ao_sym)
    8422             :       !CALL dbcsr_print(matrix_S_ao_sym)
    8423             :       !CALL dbcsr_print(matrix_F_mo_sym)
    8424             :       !CALL dbcsr_print(matrix_S_mo_sym)
    8425             : 
    8426             :       ! loop over domains to find the size of the Hessian
    8427           0 :       H_size = 0
    8428           0 :       DO col = 1, nblkcols_tot
    8429             : 
    8430             :          ! find sizes of AO submatrices
    8431           0 :          DO row = 1, nblkrows_tot
    8432             : 
    8433             :             CALL dbcsr_get_block_p(quench_t, &
    8434           0 :                                    row, col, block_p, found)
    8435           0 :             IF (found) THEN
    8436           0 :                ao_domain_sizes(col) = ao_domain_sizes(col) + ao_blk_sizes(row)
    8437             :             END IF
    8438             : 
    8439             :          END DO
    8440             : 
    8441           0 :          H_size = H_size + ao_domain_sizes(col)*mo_block_sizes(col)
    8442             : 
    8443             :       END DO
    8444             : 
    8445           0 :       ALLOCATE (H(H_size, H_size))
    8446           0 :       H(:, :) = 0.0_dp
    8447             : 
    8448             :       ! fill the Hessian matrix
    8449           0 :       lev1_vert_offset = 0
    8450             :       ! loop over all pairs of fragments
    8451           0 :       DO row = 1, nblkcols_tot
    8452             : 
    8453           0 :          lev1_hori_offset = 0
    8454           0 :          DO col = 1, nblkcols_tot
    8455             : 
    8456             :             ! prepare blocks for the current row-column fragment pair
    8457           0 :             ALLOCATE (F_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8458           0 :             ALLOCATE (S_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8459           0 :             ALLOCATE (F_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8460           0 :             ALLOCATE (S_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8461             : 
    8462           0 :             F_ao_block(:, :) = 0.0_dp
    8463           0 :             S_ao_block(:, :) = 0.0_dp
    8464           0 :             F_mo_block(:, :) = 0.0_dp
    8465           0 :             S_mo_block(:, :) = 0.0_dp
    8466             : 
    8467             :             ! fill AO submatrices
    8468             :             ! loop over all blocks of the AO dbcsr matrix
    8469           0 :             ao_vert_offset = 0
    8470           0 :             DO block_row = 1, nblkcols_tot
    8471             : 
    8472             :                CALL dbcsr_get_block_p(quench_t, &
    8473           0 :                                       block_row, row, block_p, found_row)
    8474           0 :                IF (found_row) THEN
    8475             : 
    8476           0 :                   ao_hori_offset = 0
    8477           0 :                   DO block_col = 1, nblkcols_tot
    8478             : 
    8479             :                      CALL dbcsr_get_block_p(quench_t, &
    8480           0 :                                             block_col, col, block_p, found_col)
    8481           0 :                      IF (found_col) THEN
    8482             : 
    8483             :                         CALL dbcsr_get_block_p(matrix_F_ao_sym, &
    8484           0 :                                                block_row, block_col, block_p, found)
    8485           0 :                         IF (found) THEN
    8486             :                            ! copy the block into the submatrix
    8487             :                            F_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8488             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8489           0 :                               = block_p(:, :)
    8490             :                         END IF
    8491             : 
    8492             :                         CALL dbcsr_get_block_p(matrix_S_ao_sym, &
    8493           0 :                                                block_row, block_col, block_p, found)
    8494           0 :                         IF (found) THEN
    8495             :                            ! copy the block into the submatrix
    8496             :                            S_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8497             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8498           0 :                               = block_p(:, :)
    8499             :                         END IF
    8500             : 
    8501           0 :                         ao_hori_offset = ao_hori_offset + ao_block_sizes(block_col)
    8502             : 
    8503             :                      END IF
    8504             : 
    8505             :                   END DO
    8506             : 
    8507           0 :                   ao_vert_offset = ao_vert_offset + ao_block_sizes(block_row)
    8508             : 
    8509             :                END IF
    8510             : 
    8511             :             END DO
    8512             : 
    8513             :             ! fill MO submatrices
    8514           0 :             CALL dbcsr_get_block_p(matrix_F_mo_sym, row, col, block_p, found)
    8515           0 :             IF (found) THEN
    8516             :                ! copy the block into the submatrix
    8517           0 :                F_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8518             :             END IF
    8519           0 :             CALL dbcsr_get_block_p(matrix_S_mo_sym, row, col, block_p, found)
    8520           0 :             IF (found) THEN
    8521             :                ! copy the block into the submatrix
    8522           0 :                S_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8523             :             END IF
    8524             : 
    8525             :             !WRITE(*,*) "F_AO_BLOCK", row, col, ao_domain_sizes(row), ao_domain_sizes(col)
    8526             :             !DO ii=1,ao_domain_sizes(row)
    8527             :             !  WRITE(*,'(100F13.9)') F_ao_block(ii,:)
    8528             :             !ENDDO
    8529             :             !WRITE(*,*) "S_AO_BLOCK", row, col
    8530             :             !DO ii=1,ao_domain_sizes(row)
    8531             :             !  WRITE(*,'(100F13.9)') S_ao_block(ii,:)
    8532             :             !ENDDO
    8533             :             !WRITE(*,*) "F_MO_BLOCK", row, col
    8534             :             !DO ii=1,mo_block_sizes(row)
    8535             :             !  WRITE(*,'(100F13.9)') F_mo_block(ii,:)
    8536             :             !ENDDO
    8537             :             !WRITE(*,*) "S_MO_BLOCK", row, col, mo_block_sizes(row), mo_block_sizes(col)
    8538             :             !DO ii=1,mo_block_sizes(row)
    8539             :             !  WRITE(*,'(100F13.9)') S_mo_block(ii,:)
    8540             :             !ENDDO
    8541             : 
    8542             :             ! construct tensor products for the current row-column fragment pair
    8543             :             lev2_vert_offset = 0
    8544           0 :             DO orb_j = 1, mo_block_sizes(row)
    8545             : 
    8546             :                lev2_hori_offset = 0
    8547           0 :                DO orb_i = 1, mo_block_sizes(col)
    8548           0 :                   IF (orb_i .EQ. orb_j .AND. row .EQ. col) THEN
    8549             :                      H(lev1_vert_offset + lev2_vert_offset + 1:lev1_vert_offset + lev2_vert_offset + ao_domain_sizes(row), &
    8550             :                        lev1_hori_offset + lev2_hori_offset + 1:lev1_hori_offset + lev2_hori_offset + ao_domain_sizes(col)) &
    8551             :                         != -penalty_prefactor_local*S_ao_block(:,:)
    8552           0 :                         = F_ao_block(:, :) + S_ao_block(:, :)
    8553             : !=S_ao_block(:,:)
    8554             : !RZK-warning               =F_ao_block(:,:)+( 1.0_dp + penalty_prefactor_local )*S_ao_block(:,:)
    8555             : !               =S_mo_block(orb_j,orb_i)*F_ao_block(:,:)&
    8556             : !               -F_mo_block(orb_j,orb_i)*S_ao_block(:,:)&
    8557             : !               +penalty_prefactor_local*S_mo_block(orb_j,orb_i)*S_ao_block(:,:)
    8558             :                   END IF
    8559             :                   !WRITE(*,*) row, col, orb_j, orb_i, lev1_vert_offset+lev2_vert_offset+1, ao_domain_sizes(row),&
    8560             :                   !   lev1_hori_offset+lev2_hori_offset+1, ao_domain_sizes(col), S_mo_block(orb_j,orb_i)
    8561             : 
    8562           0 :                   lev2_hori_offset = lev2_hori_offset + ao_domain_sizes(col)
    8563             : 
    8564             :                END DO
    8565             : 
    8566           0 :                lev2_vert_offset = lev2_vert_offset + ao_domain_sizes(row)
    8567             : 
    8568             :             END DO
    8569             : 
    8570           0 :             lev1_hori_offset = lev1_hori_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8571             : 
    8572           0 :             DEALLOCATE (F_ao_block)
    8573           0 :             DEALLOCATE (S_ao_block)
    8574           0 :             DEALLOCATE (F_mo_block)
    8575           0 :             DEALLOCATE (S_mo_block)
    8576             : 
    8577             :          END DO ! col fragment
    8578             : 
    8579           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(row)*mo_block_sizes(row)
    8580             : 
    8581             :       END DO ! row fragment
    8582             : 
    8583           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    8584           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    8585           0 :       CALL dbcsr_release(matrix_S_mo_sym)
    8586           0 :       CALL dbcsr_release(matrix_F_mo_sym)
    8587             : 
    8588             : !!    ! Two more terms of the Hessian: S_vo.D.F_vo and F_vo.D.S_vo
    8589             : !!    ! It seems that these terms break positive definite property of the Hessian
    8590             : !!    ALLOCATE(H1(H_size,H_size))
    8591             : !!    ALLOCATE(H2(H_size,H_size))
    8592             : !!    H1=0.0_dp
    8593             : !!    H2=0.0_dp
    8594             : !!    DO row = 1, nblkcols_tot
    8595             : !!
    8596             : !!       lev1_hori_offset=0
    8597             : !!       DO col = 1, nblkcols_tot
    8598             : !!
    8599             : !!          CALL dbcsr_get_block_p(matrix_F_vo,&
    8600             : !!                  row, col, block_p, found)
    8601             : !!          CALL dbcsr_get_block_p(matrix_S_vo,&
    8602             : !!                  row, col, block_p2, found2)
    8603             : !!
    8604             : !!          lev1_vert_offset=0
    8605             : !!          DO block_col = 1, nblkcols_tot
    8606             : !!
    8607             : !!             CALL dbcsr_get_block_p(quench_t,&
    8608             : !!                     row, block_col, p_new_block, found_row)
    8609             : !!
    8610             : !!             IF (found_row) THEN
    8611             : !!
    8612             : !!                ! determine offset in this short loop
    8613             : !!                lev2_vert_offset=0
    8614             : !!                DO block_row=1,row-1
    8615             : !!                   CALL dbcsr_get_block_p(quench_t,&
    8616             : !!                           block_row, block_col, p_new_block, found_col)
    8617             : !!                   IF (found_col) lev2_vert_offset=lev2_vert_offset+ao_block_sizes(block_row)
    8618             : !!                ENDDO
    8619             : !!                !!!!!!!! short loop
    8620             : !!
    8621             : !!                ! over all electrons of the block
    8622             : !!                DO orb_i=1, mo_block_sizes(col)
    8623             : !!
    8624             : !!                   ! into all possible locations
    8625             : !!                   DO orb_j=1, mo_block_sizes(block_col)
    8626             : !!
    8627             : !!                      ! column is copied several times
    8628             : !!                      DO copy=1, ao_domain_sizes(col)
    8629             : !!
    8630             : !!                         IF (found) THEN
    8631             : !!
    8632             : !!                            !WRITE(*,*) row, col, block_col, orb_i, orb_j, copy,&
    8633             : !!                            ! lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1,&
    8634             : !!                            ! lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy
    8635             : !!
    8636             : !!                            H1( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8637             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8638             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8639             : !!                              =block_p(:,orb_i)
    8640             : !!
    8641             : !!                         ENDIF ! found block in the data matrix
    8642             : !!
    8643             : !!                         IF (found2) THEN
    8644             : !!
    8645             : !!                            H2( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8646             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8647             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8648             : !!                              =block_p2(:,orb_i)
    8649             : !!
    8650             : !!                         ENDIF ! found block in the data matrix
    8651             : !!
    8652             : !!                      ENDDO
    8653             : !!
    8654             : !!                   ENDDO
    8655             : !!
    8656             : !!                ENDDO
    8657             : !!
    8658             : !!                !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8659             : !!
    8660             : !!             ENDIF ! found block in the quench matrix
    8661             : !!
    8662             : !!             lev1_vert_offset=lev1_vert_offset+&
    8663             : !!                ao_domain_sizes(block_col)*mo_block_sizes(block_col)
    8664             : !!
    8665             : !!          ENDDO
    8666             : !!
    8667             : !!          lev1_hori_offset=lev1_hori_offset+&
    8668             : !!             ao_domain_sizes(col)*mo_block_sizes(col)
    8669             : !!
    8670             : !!       ENDDO
    8671             : !!
    8672             : !!       !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8673             : !!
    8674             : !!    ENDDO
    8675             : !!    H1(:,:)=H1(:,:)*2.0_dp*spin_factor
    8676             : !!    !!!WRITE(*,*) "F_vo"
    8677             : !!    !!!DO ii=1,H_size
    8678             : !!    !!! WRITE(*,'(100F13.9)') H1(ii,:)
    8679             : !!    !!!ENDDO
    8680             : !!    !!!WRITE(*,*) "S_vo"
    8681             : !!    !!!DO ii=1,H_size
    8682             : !!    !!! WRITE(*,'(100F13.9)') H2(ii,:)
    8683             : !!    !!!ENDDO
    8684             : !!    !!!!! add terms to the hessian
    8685             : !!    DO ii=1,H_size
    8686             : !!       DO jj=1,H_size
    8687             : !!! add penalty_occ_vol term
    8688             : !!          H(ii,jj)=H(ii,jj)-H1(ii,jj)*H2(jj,ii)-H1(jj,ii)*H2(ii,jj)
    8689             : !!       ENDDO
    8690             : !!    ENDDO
    8691             : !!    DEALLOCATE(H1)
    8692             : !!    DEALLOCATE(H2)
    8693             : 
    8694             : !!    ! S_vo.S_vo diagonal component due to determiant constraint
    8695             : !!    ! use grad vector temporarily
    8696             : !!    IF (penalty_occ_vol) THEN
    8697             : !!       ALLOCATE(Grad_vec(H_size))
    8698             : !!       Grad_vec(:)=0.0_dp
    8699             : !!       lev1_vert_offset=0
    8700             : !!       ! loop over all electron blocks
    8701             : !!       DO col = 1, nblkcols_tot
    8702             : !!
    8703             : !!          ! loop over AO-rows of the dbcsr matrix
    8704             : !!          lev2_vert_offset=0
    8705             : !!          DO row = 1, nblkrows_tot
    8706             : !!
    8707             : !!             CALL dbcsr_get_block_p(quench_t,&
    8708             : !!                     row, col, block_p, found_row)
    8709             : !!             IF (found_row) THEN
    8710             : !!
    8711             : !!                CALL dbcsr_get_block_p(matrix_S_vo,&
    8712             : !!                        row, col, block_p, found)
    8713             : !!                IF (found) THEN
    8714             : !!                   ! copy the data into the vector, column by column
    8715             : !!                   DO orb_i=1, mo_block_sizes(col)
    8716             : !!                      Grad_vec(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8717             : !!                               lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8718             : !!                               =block_p(:,orb_i)
    8719             : !!                   ENDDO
    8720             : !!
    8721             : !!                ENDIF
    8722             : !!
    8723             : !!                lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8724             : !!
    8725             : !!             ENDIF
    8726             : !!
    8727             : !!          ENDDO
    8728             : !!
    8729             : !!          lev1_vert_offset=lev1_vert_offset+ao_domain_sizes(col)*mo_block_sizes(col)
    8730             : !!
    8731             : !!       ENDDO ! loop over electron blocks
    8732             : !!       ! update H now
    8733             : !!       DO ii=1,H_size
    8734             : !!          DO jj=1,H_size
    8735             : !!             H(ii,jj)=H(ii,jj)+penalty_occ_vol_prefactor*&
    8736             : !!                      penalty_occ_vol_pf2*Grad_vec(ii)*Grad_vec(jj)
    8737             : !!          ENDDO
    8738             : !!       ENDDO
    8739             : !!       DEALLOCATE(Grad_vec)
    8740             : !!    ENDIF ! penalty_occ_vol
    8741             : 
    8742             : !S-1.G ! invert S using cholesky
    8743             : !S-1.G CALL dbcsr_create(m_prec_out,&
    8744             : !S-1.G         template=m_s,&
    8745             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8746             : !S-1.G CALL dbcsr_copy(m_prec_out,m_s)
    8747             : !S-1.G CALL dbcsr_cholesky_decompose(m_prec_out,&
    8748             : !S-1.G         para_env=para_env,&
    8749             : !S-1.G         blacs_env=blacs_env)
    8750             : !S-1.G CALL dbcsr_cholesky_invert(m_prec_out,&
    8751             : !S-1.G         para_env=para_env,&
    8752             : !S-1.G         blacs_env=blacs_env,&
    8753             : !S-1.G         uplo_to_full=.TRUE.)
    8754             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8755             : !S-1.G         m_prec_out,&
    8756             : !S-1.G         matrix_grad,&
    8757             : !S-1.G         0.0_dp,matrix_step,&
    8758             : !S-1.G         filter_eps=1.0E-10_dp)
    8759             : !S-1.G !CALL dbcsr_release(m_prec_out)
    8760             : !S-1.G ALLOCATE(test3(H_size))
    8761             : 
    8762             :       ! convert gradient from the dbcsr matrix to the vector form
    8763           0 :       ALLOCATE (Grad_vec(H_size))
    8764           0 :       Grad_vec(:) = 0.0_dp
    8765           0 :       lev1_vert_offset = 0
    8766             :       ! loop over all electron blocks
    8767           0 :       DO col = 1, nblkcols_tot
    8768             : 
    8769             :          ! loop over AO-rows of the dbcsr matrix
    8770           0 :          lev2_vert_offset = 0
    8771           0 :          DO row = 1, nblkrows_tot
    8772             : 
    8773             :             CALL dbcsr_get_block_p(quench_t, &
    8774           0 :                                    row, col, block_p, found_row)
    8775           0 :             IF (found_row) THEN
    8776             : 
    8777             :                CALL dbcsr_get_block_p(matrix_grad, &
    8778           0 :                                       row, col, block_p, found)
    8779           0 :                IF (found) THEN
    8780             :                   ! copy the data into the vector, column by column
    8781           0 :                   DO orb_i = 1, mo_block_sizes(col)
    8782             :                      Grad_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8783             :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row)) &
    8784           0 :                         = block_p(:, orb_i)
    8785             : !WRITE(*,*) "GRAD: ", row, col, orb_i, lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1, ao_block_sizes(row)
    8786             :                   END DO
    8787             : 
    8788             :                END IF
    8789             : 
    8790             : !S-1.G CALL dbcsr_get_block_p(matrix_step,&
    8791             : !S-1.G         row, col, block_p, found)
    8792             : !S-1.G IF (found) THEN
    8793             : !S-1.G    ! copy the data into the vector, column by column
    8794             : !S-1.G    DO orb_i=1, mo_block_sizes(col)
    8795             : !S-1.G       test3(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8796             : !S-1.G                lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8797             : !S-1.G                =block_p(:,orb_i)
    8798             : !S-1.G    ENDDO
    8799             : !S-1.G ENDIF
    8800             : 
    8801           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8802             : 
    8803             :             END IF
    8804             : 
    8805             :          END DO
    8806             : 
    8807           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8808             : 
    8809             :       END DO ! loop over electron blocks
    8810             : 
    8811             :       !WRITE(*,*) "HESSIAN"
    8812             :       !DO ii=1,H_size
    8813             :       ! WRITE(*,*) ii
    8814             :       ! WRITE(*,'(20F14.10)') H(ii,:)
    8815             :       !ENDDO
    8816             : 
    8817             :       ! invert the Hessian
    8818           0 :       INFO = 0
    8819           0 :       ALLOCATE (Hinv(H_size, H_size))
    8820           0 :       Hinv(:, :) = H(:, :)
    8821             : 
    8822             :       ! before inverting diagonalize
    8823           0 :       ALLOCATE (eigenvalues(H_size))
    8824             :       ! Query the optimal workspace for dsyev
    8825           0 :       LWORK = -1
    8826           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8827           0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8828           0 :       LWORK = INT(WORK(1))
    8829           0 :       DEALLOCATE (WORK)
    8830             :       ! Allocate the workspace and solve the eigenproblem
    8831           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8832           0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8833           0 :       IF (INFO .NE. 0) THEN
    8834           0 :          WRITE (unit_nr, *) 'DSYEV ERROR MESSAGE: ', INFO
    8835           0 :          CPABORT("DSYEV failed")
    8836             :       END IF
    8837           0 :       DEALLOCATE (WORK)
    8838             : 
    8839             :       ! compute grad vector in the basis of Hessian eigenvectors
    8840           0 :       ALLOCATE (Step_vec(H_size))
    8841             :       ! Step_vec contains Grad_vec here
    8842           0 :       Step_vec(:) = MATMUL(TRANSPOSE(Hinv), Grad_vec)
    8843             : 
    8844             :       ! compute U.tr(U)-1 = error
    8845             :       !ALLOCATE(test(H_size,H_size))
    8846             :       !test(:,:)=MATMUL(TRANSPOSE(Hinv),Hinv)
    8847             :       !DO ii=1,H_size
    8848             :       !   test(ii,ii)=test(ii,ii)-1.0_dp
    8849             :       !ENDDO
    8850             :       !test_error=0.0_dp
    8851             :       !DO ii=1,H_size
    8852             :       !   DO jj=1,H_size
    8853             :       !      test_error=test_error+test(jj,ii)*test(jj,ii)
    8854             :       !   ENDDO
    8855             :       !ENDDO
    8856             :       !WRITE(*,*) "U.tr(U)-1 error: ", SQRT(test_error)
    8857             :       !DEALLOCATE(test)
    8858             : 
    8859             :       ! invert eigenvalues and use eigenvectors to compute the Hessian inverse
    8860             :       ! project out zero-eigenvalue directions
    8861           0 :       ALLOCATE (test(H_size, H_size))
    8862           0 :       zero_neg_eiv = 0
    8863           0 :       DO jj = 1, H_size
    8864           0 :          WRITE (unit_nr, "(I10,F20.10,F20.10)") jj, eigenvalues(jj), Step_vec(jj)
    8865           0 :          IF (eigenvalues(jj) .GT. eps_zero) THEN
    8866           0 :             test(jj, :) = Hinv(:, jj)/eigenvalues(jj)
    8867             :          ELSE
    8868           0 :             test(jj, :) = Hinv(:, jj)*0.0_dp
    8869           0 :             zero_neg_eiv = zero_neg_eiv + 1
    8870             :          END IF
    8871             :       END DO
    8872           0 :       WRITE (unit_nr, *) 'ZERO OR NEGATIVE EIGENVALUES: ', zero_neg_eiv
    8873           0 :       DEALLOCATE (Step_vec)
    8874             : 
    8875           0 :       ALLOCATE (test2(H_size, H_size))
    8876           0 :       test2(:, :) = MATMUL(Hinv, test)
    8877           0 :       Hinv(:, :) = test2(:, :)
    8878           0 :       DEALLOCATE (test, test2)
    8879             : 
    8880             :       !! shift to kill singularity
    8881             :       !shift=0.0_dp
    8882             :       !IF (eigenvalues(1).lt.0.0_dp) THEN
    8883             :       !   CPABORT("Negative eigenvalue(s)")
    8884             :       !   shift=abs(eigenvalues(1))
    8885             :       !   WRITE(*,*) "Lowest eigenvalue: ", eigenvalues(1)
    8886             :       !ENDIF
    8887             :       !DO ii=1, H_size
    8888             :       !   IF (eigenvalues(ii).gt.eps_zero) THEN
    8889             :       !      shift=shift+min(1.0_dp,eigenvalues(ii))*1.0E-4_dp
    8890             :       !      EXIT
    8891             :       !   ENDIF
    8892             :       !ENDDO
    8893             :       !WRITE(*,*) "Hessian shift: ", shift
    8894             :       !DO ii=1, H_size
    8895             :       !   H(ii,ii)=H(ii,ii)+shift
    8896             :       !ENDDO
    8897             :       !! end shift
    8898             : 
    8899           0 :       DEALLOCATE (eigenvalues)
    8900             : 
    8901             : !!!!    Hinv=H
    8902             : !!!!    INFO=0
    8903             : !!!!    CALL dpotrf('L', H_size, Hinv, H_size, INFO )
    8904             : !!!!    IF( INFO.NE.0 ) THEN
    8905             : !!!!       WRITE(*,*) 'DPOTRF ERROR MESSAGE: ', INFO
    8906             : !!!!       CPABORT("DPOTRF failed")
    8907             : !!!!    END IF
    8908             : !!!!    CALL dpotri('L', H_size, Hinv, H_size, INFO )
    8909             : !!!!    IF( INFO.NE.0 ) THEN
    8910             : !!!!       WRITE(*,*) 'DPOTRI ERROR MESSAGE: ', INFO
    8911             : !!!!       CPABORT("DPOTRI failed")
    8912             : !!!!    END IF
    8913             : !!!!    ! complete the matrix
    8914             : !!!!    DO ii=1,H_size
    8915             : !!!!       DO jj=ii+1,H_size
    8916             : !!!!          Hinv(ii,jj)=Hinv(jj,ii)
    8917             : !!!!       ENDDO
    8918             : !!!!    ENDDO
    8919             : 
    8920             :       ! compute the inversion error
    8921           0 :       ALLOCATE (test(H_size, H_size))
    8922           0 :       test(:, :) = MATMUL(Hinv, H)
    8923           0 :       DO ii = 1, H_size
    8924           0 :          test(ii, ii) = test(ii, ii) - 1.0_dp
    8925             :       END DO
    8926           0 :       test_error = 0.0_dp
    8927           0 :       DO ii = 1, H_size
    8928           0 :          DO jj = 1, H_size
    8929           0 :             test_error = test_error + test(jj, ii)*test(jj, ii)
    8930             :          END DO
    8931             :       END DO
    8932           0 :       WRITE (unit_nr, *) "Hessian inversion error: ", SQRT(test_error)
    8933           0 :       DEALLOCATE (test)
    8934             : 
    8935             :       ! prepare the output vector
    8936           0 :       ALLOCATE (Step_vec(H_size))
    8937           0 :       ALLOCATE (tmp(H_size))
    8938           0 :       tmp(:) = MATMUL(Hinv, Grad_vec)
    8939             :       !tmp(:)=MATMUL(Hinv,test3)
    8940           0 :       Step_vec(:) = -1.0_dp*tmp(:)
    8941             : 
    8942           0 :       ALLOCATE (tmpr(H_size))
    8943           0 :       tmpr(:) = MATMUL(H, Step_vec)
    8944           0 :       tmp(:) = tmpr(:) + Grad_vec(:)
    8945           0 :       DEALLOCATE (tmpr)
    8946           0 :       WRITE (unit_nr, *) "NEWTOV step error: ", MAXVAL(ABS(tmp))
    8947             : 
    8948           0 :       DEALLOCATE (tmp)
    8949             : 
    8950           0 :       DEALLOCATE (H)
    8951           0 :       DEALLOCATE (Hinv)
    8952           0 :       DEALLOCATE (Grad_vec)
    8953             : 
    8954             : !S-1.G DEALLOCATE(test3)
    8955             : 
    8956             :       ! copy the step from the vector into the dbcsr matrix
    8957             : 
    8958             :       ! re-create the step matrix to remove all blocks
    8959             :       CALL dbcsr_create(matrix_step, &
    8960             :                         template=matrix_grad, &
    8961           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8962           0 :       CALL dbcsr_work_create(matrix_step, work_mutable=.TRUE.)
    8963             : 
    8964           0 :       lev1_vert_offset = 0
    8965             :       ! loop over all electron blocks
    8966           0 :       DO col = 1, nblkcols_tot
    8967             : 
    8968             :          ! loop over AO-rows of the dbcsr matrix
    8969           0 :          lev2_vert_offset = 0
    8970           0 :          DO row = 1, nblkrows_tot
    8971             : 
    8972             :             CALL dbcsr_get_block_p(quench_t, &
    8973           0 :                                    row, col, block_p, found_row)
    8974           0 :             IF (found_row) THEN
    8975             : 
    8976           0 :                NULLIFY (p_new_block)
    8977           0 :                CALL dbcsr_reserve_block2d(matrix_step, row, col, p_new_block)
    8978           0 :                CPASSERT(ASSOCIATED(p_new_block))
    8979             :                ! copy the data column by column
    8980           0 :                DO orb_i = 1, mo_block_sizes(col)
    8981             :                   p_new_block(:, orb_i) = &
    8982             :                      Step_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8983           0 :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row))
    8984             :                END DO
    8985             : 
    8986           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8987             : 
    8988             :             END IF
    8989             : 
    8990             :          END DO
    8991             : 
    8992           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8993             : 
    8994             :       END DO ! loop over electron blocks
    8995             : 
    8996           0 :       DEALLOCATE (Step_vec)
    8997             : 
    8998           0 :       CALL dbcsr_finalize(matrix_step)
    8999             : 
    9000             : !S-1.G CALL dbcsr_create(m_tmp_no_1,&
    9001             : !S-1.G         template=matrix_step,&
    9002             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    9003             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    9004             : !S-1.G         m_prec_out,&
    9005             : !S-1.G         matrix_step,&
    9006             : !S-1.G         0.0_dp,m_tmp_no_1,&
    9007             : !S-1.G         filter_eps=1.0E-10_dp,&
    9008             : !S-1.G         )
    9009             : !S-1.G CALL dbcsr_copy(matrix_step,m_tmp_no_1)
    9010             : !S-1.G CALL dbcsr_release(m_tmp_no_1)
    9011             : !S-1.G CALL dbcsr_release(m_prec_out)
    9012             : 
    9013           0 :       DEALLOCATE (mo_block_sizes, ao_block_sizes)
    9014           0 :       DEALLOCATE (ao_domain_sizes)
    9015             : 
    9016             :       CALL dbcsr_create(matrix_S_ao_sym, &
    9017             :                         template=quench_t, &
    9018           0 :                         matrix_type=dbcsr_type_no_symmetry)
    9019           0 :       CALL dbcsr_copy(matrix_S_ao_sym, quench_t)
    9020             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9021             :                           matrix_F_ao, &
    9022             :                           matrix_step, &
    9023             :                           0.0_dp, matrix_S_ao_sym, &
    9024           0 :                           retain_sparsity=.TRUE.)
    9025             :       CALL dbcsr_create(matrix_F_ao_sym, &
    9026             :                         template=quench_t, &
    9027           0 :                         matrix_type=dbcsr_type_no_symmetry)
    9028           0 :       CALL dbcsr_copy(matrix_F_ao_sym, quench_t)
    9029             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9030             :                           matrix_S_ao, &
    9031             :                           matrix_step, &
    9032             :                           0.0_dp, matrix_F_ao_sym, &
    9033           0 :                           retain_sparsity=.TRUE.)
    9034             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_F_ao_sym, &
    9035           0 :                      1.0_dp, 1.0_dp)
    9036           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    9037             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_grad, &
    9038           0 :                      1.0_dp, 1.0_dp)
    9039           0 :       test_error = dbcsr_maxabs(matrix_S_ao_sym)
    9040           0 :       WRITE (unit_nr, *) "NEWTOL step error: ", test_error
    9041           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    9042           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    9043             : 
    9044           0 :       CALL timestop(handle)
    9045             : 
    9046           0 :    END SUBROUTINE hessian_diag_apply
    9047             : 
    9048             : ! **************************************************************************************************
    9049             : !> \brief Optimization of ALMOs using trust region minimizers
    9050             : !> \param qs_env ...
    9051             : !> \param almo_scf_env ...
    9052             : !> \param optimizer   controls the optimization algorithm
    9053             : !> \param quench_t ...
    9054             : !> \param matrix_t_in ...
    9055             : !> \param matrix_t_out ...
    9056             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
    9057             : !> \param special_case   to reduce the overhead special cases are implemented:
    9058             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
    9059             : !>                       xalmo_case_block_diag
    9060             : !>                       xalmo_case_fully_deloc
    9061             : !> \par History
    9062             : !>       2020.01 created [Rustam Z Khaliullin]
    9063             : !> \author Rustam Z Khaliullin
    9064             : ! **************************************************************************************************
    9065          18 :    SUBROUTINE almo_scf_xalmo_trustr(qs_env, almo_scf_env, optimizer, quench_t, &
    9066             :                                     matrix_t_in, matrix_t_out, perturbation_only, &
    9067             :                                     special_case)
    9068             : 
    9069             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    9070             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    9071             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    9072             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: quench_t, matrix_t_in, matrix_t_out
    9073             :       LOGICAL, INTENT(IN)                                :: perturbation_only
    9074             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
    9075             : 
    9076             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_trustr'
    9077             : 
    9078             :       INTEGER :: handle, ispin, iteration, iteration_type_to_report, my_special_case, ndomains, &
    9079             :          nspins, outer_iteration, prec_type, unit_nr
    9080          18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
    9081             :       LOGICAL :: assume_t0_q0x, border_reached, inner_loop_success, normalize_orbitals, &
    9082             :          optimize_theta, penalty_occ_vol, reset_conjugator, same_position, scf_converged
    9083             :       REAL(kind=dp) :: beta, energy_start, energy_trial, eta, expected_reduction, &
    9084             :          fake_step_size_to_report, grad_norm_ratio, grad_norm_ref, loss_change_to_report, &
    9085             :          loss_start, loss_trial, model_grad_norm, penalty_amplitude, penalty_start, penalty_trial, &
    9086             :          radius_current, radius_max, real_temp, rho, spin_factor, step_norm, step_size, t1, &
    9087             :          t1outer, t2, t2outer, y_scalar
    9088          18 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
    9089          18 :                                                             penalty_occ_vol_g_prefactor, &
    9090          18 :                                                             penalty_occ_vol_h_prefactor
    9091             :       TYPE(cp_logger_type), POINTER                      :: logger
    9092             :       TYPE(dbcsr_type)                                   :: m_s_inv
    9093          18 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_model_Bd, m_model_d, &
    9094          18 :          m_model_hessian, m_model_hessian_inv, m_model_r, m_model_r_prev, m_model_rt, &
    9095          18 :          m_model_rt_prev, m_sig_sqrti_ii, m_theta, m_theta_trial, prev_step, siginvTFTsiginv, ST, &
    9096          18 :          step, STsiginv_0
    9097             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    9098          18 :          DIMENSION(:, :)                                 :: domain_model_hessian_inv, domain_r_down
    9099             : 
    9100             :       ! RZK-warning: number of temporary storage matrices can be reduced
    9101          18 :       CALL timeset(routineN, handle)
    9102             : 
    9103          18 :       t1outer = m_walltime()
    9104             : 
    9105          18 :       my_special_case = xalmo_case_normal
    9106          18 :       IF (PRESENT(special_case)) my_special_case = special_case
    9107             : 
    9108             :       ! get a useful output_unit
    9109          18 :       logger => cp_get_default_logger()
    9110          18 :       IF (logger%para_env%is_source()) THEN
    9111           9 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    9112             :       ELSE
    9113           9 :          unit_nr = -1
    9114             :       END IF
    9115             : 
    9116             :       ! Trust radius code is written to obviate the need in projected orbitals
    9117          18 :       assume_t0_q0x = .FALSE.
    9118             :       ! Smoothing of the orbitals have not been implemented
    9119          18 :       optimize_theta = .FALSE.
    9120             : 
    9121          18 :       nspins = almo_scf_env%nspins
    9122          18 :       IF (nspins == 1) THEN
    9123          18 :          spin_factor = 2.0_dp
    9124             :       ELSE
    9125           0 :          spin_factor = 1.0_dp
    9126             :       END IF
    9127             : 
    9128          18 :       IF (unit_nr > 0) THEN
    9129           9 :          WRITE (unit_nr, *)
    9130           1 :          SELECT CASE (my_special_case)
    9131             :          CASE (xalmo_case_block_diag)
    9132           1 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9133           2 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
    9134             :          CASE (xalmo_case_fully_deloc)
    9135           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9136           0 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
    9137             :          CASE (xalmo_case_normal)
    9138           8 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
    9139          17 :                " Optimization of XALMOs ", REPEAT("-", 28)
    9140             :          END SELECT
    9141           9 :          WRITE (unit_nr, *)
    9142             :          CALL trust_r_report(unit_nr, &
    9143             :                              iter_type=0, & ! print header, all values are ignored
    9144             :                              iteration=0, &
    9145             :                              radius=0.0_dp, &
    9146             :                              loss=0.0_dp, &
    9147             :                              delta_loss=0.0_dp, &
    9148             :                              grad_norm=0.0_dp, &
    9149             :                              predicted_reduction=0.0_dp, &
    9150             :                              rho=0.0_dp, &
    9151             :                              new=.TRUE., &
    9152           9 :                              time=0.0_dp)
    9153           9 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    9154             :       END IF
    9155             : 
    9156             :       ! penalty amplitude adjusts the strength of volume conservation
    9157          18 :       penalty_occ_vol = .FALSE.
    9158             :       !(almo_scf_env%penalty%occ_vol_method .NE. almo_occ_vol_penalty_none .AND. &
    9159             :       !                   my_special_case .EQ. xalmo_case_fully_deloc)
    9160          18 :       normalize_orbitals = penalty_occ_vol
    9161          18 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
    9162          54 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
    9163          36 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
    9164          36 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
    9165          36 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
    9166             : 
    9167             :       ! here preconditioner is the Hessian of model function
    9168          18 :       prec_type = optimizer%preconditioner
    9169             : 
    9170          36 :       ALLOCATE (grad_norm_spin(nspins))
    9171          54 :       ALLOCATE (nocc(nspins))
    9172             : 
    9173             :       ! m_theta contains a set of variational parameters
    9174             :       ! that define one-electron orbitals (simple, projected, etc.)
    9175          72 :       ALLOCATE (m_theta(nspins))
    9176          36 :       DO ispin = 1, nspins
    9177             :          CALL dbcsr_create(m_theta(ispin), &
    9178             :                            template=matrix_t_out(ispin), &
    9179          36 :                            matrix_type=dbcsr_type_no_symmetry)
    9180             :       END DO
    9181             : 
    9182             :       ! create initial guess from the initial orbitals
    9183             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    9184             :                                m_t_in=matrix_t_in, &
    9185             :                                m_t0=almo_scf_env%matrix_t_blk, &
    9186             :                                m_quench_t=quench_t, &
    9187             :                                m_overlap=almo_scf_env%matrix_s(1), &
    9188             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    9189             :                                nspins=nspins, &
    9190             :                                xalmo_history=almo_scf_env%xalmo_history, &
    9191             :                                assume_t0_q0x=assume_t0_q0x, &
    9192             :                                optimize_theta=optimize_theta, &
    9193             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9194             :                                eps_filter=almo_scf_env%eps_filter, &
    9195             :                                order_lanczos=almo_scf_env%order_lanczos, &
    9196             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    9197             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    9198          18 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    9199             : 
    9200          18 :       ndomains = almo_scf_env%ndomains
    9201         218 :       ALLOCATE (domain_r_down(ndomains, nspins))
    9202          18 :       CALL init_submatrices(domain_r_down)
    9203         200 :       ALLOCATE (domain_model_hessian_inv(ndomains, nspins))
    9204          18 :       CALL init_submatrices(domain_model_hessian_inv)
    9205             : 
    9206          54 :       ALLOCATE (m_model_hessian(nspins))
    9207          54 :       ALLOCATE (m_model_hessian_inv(nspins))
    9208          54 :       ALLOCATE (siginvTFTsiginv(nspins))
    9209          54 :       ALLOCATE (STsiginv_0(nspins))
    9210          54 :       ALLOCATE (FTsiginv(nspins))
    9211          54 :       ALLOCATE (ST(nspins))
    9212          54 :       ALLOCATE (grad(nspins))
    9213          72 :       ALLOCATE (prev_step(nspins))
    9214          54 :       ALLOCATE (step(nspins))
    9215          54 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    9216          54 :       ALLOCATE (m_model_r(nspins))
    9217          54 :       ALLOCATE (m_model_rt(nspins))
    9218          54 :       ALLOCATE (m_model_d(nspins))
    9219          54 :       ALLOCATE (m_model_Bd(nspins))
    9220          54 :       ALLOCATE (m_model_r_prev(nspins))
    9221          54 :       ALLOCATE (m_model_rt_prev(nspins))
    9222          54 :       ALLOCATE (m_theta_trial(nspins))
    9223             : 
    9224          36 :       DO ispin = 1, nspins
    9225             : 
    9226             :          ! init temporary storage
    9227             :          CALL dbcsr_create(m_model_hessian_inv(ispin), &
    9228             :                            template=almo_scf_env%matrix_ks(ispin), &
    9229          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9230             :          CALL dbcsr_create(m_model_hessian(ispin), &
    9231             :                            template=almo_scf_env%matrix_ks(ispin), &
    9232          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9233             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    9234             :                            template=almo_scf_env%matrix_sigma(ispin), &
    9235          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9236             :          CALL dbcsr_create(STsiginv_0(ispin), &
    9237             :                            template=matrix_t_out(ispin), &
    9238          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9239             :          CALL dbcsr_create(FTsiginv(ispin), &
    9240             :                            template=matrix_t_out(ispin), &
    9241          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9242             :          CALL dbcsr_create(ST(ispin), &
    9243             :                            template=matrix_t_out(ispin), &
    9244          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9245             :          CALL dbcsr_create(grad(ispin), &
    9246             :                            template=matrix_t_out(ispin), &
    9247          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9248             :          CALL dbcsr_create(prev_step(ispin), &
    9249             :                            template=matrix_t_out(ispin), &
    9250          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9251             :          CALL dbcsr_create(step(ispin), &
    9252             :                            template=matrix_t_out(ispin), &
    9253          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9254             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    9255             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    9256          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9257             :          CALL dbcsr_create(m_model_r(ispin), &
    9258             :                            template=matrix_t_out(ispin), &
    9259          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9260             :          CALL dbcsr_create(m_model_rt(ispin), &
    9261             :                            template=matrix_t_out(ispin), &
    9262          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9263             :          CALL dbcsr_create(m_model_d(ispin), &
    9264             :                            template=matrix_t_out(ispin), &
    9265          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9266             :          CALL dbcsr_create(m_model_Bd(ispin), &
    9267             :                            template=matrix_t_out(ispin), &
    9268          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9269             :          CALL dbcsr_create(m_model_r_prev(ispin), &
    9270             :                            template=matrix_t_out(ispin), &
    9271          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9272             :          CALL dbcsr_create(m_model_rt_prev(ispin), &
    9273             :                            template=matrix_t_out(ispin), &
    9274          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9275             :          CALL dbcsr_create(m_theta_trial(ispin), &
    9276             :                            template=matrix_t_out(ispin), &
    9277          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9278             : 
    9279          18 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    9280          18 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    9281             : 
    9282             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    9283          18 :                              nfullrows_total=nocc(ispin))
    9284             : 
    9285             :          ! invert S domains if necessary
    9286             :          ! Note: domains for alpha and beta electrons might be different
    9287             :          ! that is why the inversion of the AO overlap is inside the spin loop
    9288          36 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    9289             : 
    9290             :             CALL construct_domain_s_inv( &
    9291             :                matrix_s=almo_scf_env%matrix_s(1), &
    9292             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9293             :                dpattern=quench_t(ispin), &
    9294             :                map=almo_scf_env%domain_map(ispin), &
    9295          16 :                node_of_domain=almo_scf_env%cpu_of_domain)
    9296             : 
    9297             :          END IF
    9298             : 
    9299             :       END DO ! ispin
    9300             : 
    9301             :       ! invert metric for special case where metric is spin independent
    9302          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9303             : 
    9304             :          CALL dbcsr_create(m_s_inv, &
    9305             :                            template=almo_scf_env%matrix_s(1), &
    9306           2 :                            matrix_type=dbcsr_type_no_symmetry)
    9307             :          CALL invert_Hotelling(m_s_inv, &
    9308             :                                almo_scf_env%matrix_s_blk(1), &
    9309             :                                threshold=almo_scf_env%eps_filter, &
    9310           2 :                                filter_eps=almo_scf_env%eps_filter)
    9311             : 
    9312          16 :       ELSE IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9313             : 
    9314             :          ! invert S using cholesky
    9315             :          CALL dbcsr_create(m_s_inv, &
    9316             :                            template=almo_scf_env%matrix_s(1), &
    9317           0 :                            matrix_type=dbcsr_type_no_symmetry)
    9318           0 :          CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1), m_s_inv)
    9319             :          CALL cp_dbcsr_cholesky_decompose(m_s_inv, &
    9320             :                                           para_env=almo_scf_env%para_env, &
    9321           0 :                                           blacs_env=almo_scf_env%blacs_env)
    9322             :          CALL cp_dbcsr_cholesky_invert(m_s_inv, &
    9323             :                                        para_env=almo_scf_env%para_env, &
    9324             :                                        blacs_env=almo_scf_env%blacs_env, &
    9325           0 :                                        uplo_to_full=.TRUE.)
    9326           0 :          CALL dbcsr_filter(m_s_inv, almo_scf_env%eps_filter)
    9327             : 
    9328             :       END IF ! s_inv
    9329             : 
    9330          18 :       radius_max = optimizer%max_trust_radius
    9331          18 :       radius_current = MIN(optimizer%initial_trust_radius, radius_max)
    9332             :       ! eta must be between 0 and 0.25
    9333          18 :       eta = MIN(MAX(optimizer%rho_do_not_update, 0.0_dp), 0.25_dp)
    9334             :       energy_start = 0.0_dp
    9335          18 :       energy_trial = 0.0_dp
    9336             :       penalty_start = 0.0_dp
    9337          18 :       penalty_trial = 0.0_dp
    9338             :       loss_start = 0.0_dp ! sum of the energy and penalty
    9339          18 :       loss_trial = 0.0_dp
    9340             : 
    9341          18 :       same_position = .FALSE.
    9342             : 
    9343             :       ! compute the energy
    9344             :       CALL main_var_to_xalmos_and_loss_func( &
    9345             :          almo_scf_env=almo_scf_env, &
    9346             :          qs_env=qs_env, &
    9347             :          m_main_var_in=m_theta, &
    9348             :          m_t_out=matrix_t_out, &
    9349             :          m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    9350             :          energy_out=energy_start, &
    9351             :          penalty_out=penalty_start, &
    9352             :          m_FTsiginv_out=FTsiginv, &
    9353             :          m_siginvTFTsiginv_out=siginvTFTsiginv, &
    9354             :          m_ST_out=ST, &
    9355             :          m_STsiginv0_in=STsiginv_0, &
    9356             :          m_quench_t_in=quench_t, &
    9357             :          domain_r_down_in=domain_r_down, &
    9358             :          assume_t0_q0x=assume_t0_q0x, &
    9359             :          just_started=.TRUE., &
    9360             :          optimize_theta=optimize_theta, &
    9361             :          normalize_orbitals=normalize_orbitals, &
    9362             :          perturbation_only=perturbation_only, &
    9363             :          do_penalty=penalty_occ_vol, &
    9364          18 :          special_case=my_special_case)
    9365          18 :       loss_start = energy_start + penalty_start
    9366          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9367           2 :          almo_scf_env%almo_scf_energy = energy_start
    9368             :       END IF
    9369          36 :       DO ispin = 1, nspins
    9370          36 :          IF (penalty_occ_vol) THEN
    9371             :             penalty_occ_vol_g_prefactor(ispin) = &
    9372           0 :                -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    9373           0 :             penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    9374             :          END IF
    9375             :       END DO ! ispin
    9376             : 
    9377             :       ! start the outer step-size-adjustment loop
    9378          18 :       scf_converged = .FALSE.
    9379         426 :       adjust_r_loop: DO outer_iteration = 1, optimizer%max_iter_outer_loop
    9380             : 
    9381             :          ! start the inner fixed-radius loop
    9382         426 :          border_reached = .FALSE.
    9383             : 
    9384         852 :          DO ispin = 1, nspins
    9385         426 :             CALL dbcsr_set(step(ispin), 0.0_dp)
    9386         852 :             CALL dbcsr_filter(step(ispin), almo_scf_env%eps_filter)
    9387             :          END DO
    9388             : 
    9389         426 :          IF (.NOT. same_position) THEN
    9390             : 
    9391         852 :             DO ispin = 1, nspins
    9392             : 
    9393             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model gradient"
    9394             :                CALL compute_gradient( &
    9395             :                   m_grad_out=grad(ispin), &
    9396             :                   m_ks=almo_scf_env%matrix_ks(ispin), &
    9397             :                   m_s=almo_scf_env%matrix_s(1), &
    9398             :                   m_t=matrix_t_out(ispin), &
    9399             :                   m_t0=almo_scf_env%matrix_t_blk(ispin), &
    9400             :                   m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9401             :                   m_quench_t=quench_t(ispin), &
    9402             :                   m_FTsiginv=FTsiginv(ispin), &
    9403             :                   m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9404             :                   m_ST=ST(ispin), &
    9405             :                   m_STsiginv0=STsiginv_0(ispin), &
    9406             :                   m_theta=m_theta(ispin), &
    9407             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    9408             :                   domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9409             :                   domain_r_down=domain_r_down(:, ispin), &
    9410             :                   cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9411             :                   domain_map=almo_scf_env%domain_map(ispin), &
    9412             :                   assume_t0_q0x=assume_t0_q0x, &
    9413             :                   optimize_theta=optimize_theta, &
    9414             :                   normalize_orbitals=normalize_orbitals, &
    9415             :                   penalty_occ_vol=penalty_occ_vol, &
    9416             :                   penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9417             :                   envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9418             :                   eps_filter=almo_scf_env%eps_filter, &
    9419             :                   spin_factor=spin_factor, &
    9420         852 :                   special_case=my_special_case)
    9421             : 
    9422             :             END DO ! ispin
    9423             : 
    9424             :          END IF ! skip_grad
    9425             : 
    9426             :          ! check convergence and other exit criteria
    9427         852 :          DO ispin = 1, nspins
    9428         852 :             grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    9429             :             !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9430             :             !                 dbcsr_frobenius_norm(quench_t(ispin))
    9431             :          END DO ! ispin
    9432        1278 :          grad_norm_ref = MAXVAL(grad_norm_spin)
    9433             : 
    9434         426 :          t2outer = m_walltime()
    9435             :          CALL trust_r_report(unit_nr, &
    9436             :                              iter_type=1, & ! only some data is important
    9437             :                              iteration=outer_iteration, &
    9438             :                              loss=loss_start, &
    9439             :                              delta_loss=0.0_dp, &
    9440             :                              grad_norm=grad_norm_ref, &
    9441             :                              predicted_reduction=0.0_dp, &
    9442             :                              rho=0.0_dp, &
    9443             :                              radius=radius_current, &
    9444             :                              new=.NOT. same_position, &
    9445         426 :                              time=t2outer - t1outer)
    9446         426 :          t1outer = m_walltime()
    9447             : 
    9448         426 :          IF (grad_norm_ref .LE. optimizer%eps_error) THEN
    9449          18 :             scf_converged = .TRUE.
    9450          18 :             border_reached = .FALSE.
    9451          18 :             expected_reduction = 0.0_dp
    9452          18 :             IF (.NOT. (optimizer%early_stopping_on .AND. outer_iteration .EQ. 1)) &
    9453             :                EXIT adjust_r_loop
    9454             :          ELSE
    9455             :             scf_converged = .FALSE.
    9456             :          END IF
    9457             : 
    9458         816 :          DO ispin = 1, nspins
    9459             : 
    9460         408 :             CALL dbcsr_copy(m_model_r(ispin), grad(ispin))
    9461         408 :             CALL dbcsr_scale(m_model_r(ispin), -1.0_dp)
    9462             : 
    9463         408 :             IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9464             :                 my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9465             : 
    9466             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv.r"
    9467             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9468             :                                    m_s_inv, &
    9469             :                                    m_model_r(ispin), &
    9470             :                                    0.0_dp, m_model_rt(ispin), &
    9471          92 :                                    filter_eps=almo_scf_env%eps_filter)
    9472             : 
    9473         316 :             ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
    9474             : 
    9475             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv_xx.r"
    9476             :                CALL apply_domain_operators( &
    9477             :                   matrix_in=m_model_r(ispin), &
    9478             :                   matrix_out=m_model_rt(ispin), &
    9479             :                   operator1=almo_scf_env%domain_s_inv(:, ispin), &
    9480             :                   dpattern=quench_t(ispin), &
    9481             :                   map=almo_scf_env%domain_map(ispin), &
    9482             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    9483             :                   my_action=0, &
    9484         316 :                   filter_eps=almo_scf_env%eps_filter)
    9485             : 
    9486             :             ELSE
    9487           0 :                CPABORT("Unknown XALMO special case")
    9488             :             END IF
    9489             : 
    9490         816 :             CALL dbcsr_copy(m_model_d(ispin), m_model_rt(ispin))
    9491             : 
    9492             :          END DO ! ispin
    9493             : 
    9494             :          ! compute model Hessian
    9495         408 :          IF (.NOT. same_position) THEN
    9496             : 
    9497             :             SELECT CASE (prec_type)
    9498             :             CASE (xalmo_prec_domain)
    9499             : 
    9500             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model Hessian"
    9501         816 :                DO ispin = 1, nspins
    9502             :                   CALL compute_preconditioner( &
    9503             :                      domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    9504             :                      m_prec_out=m_model_hessian(ispin), &
    9505             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    9506             :                      m_s=almo_scf_env%matrix_s(1), &
    9507             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9508             :                      m_quench_t=quench_t(ispin), &
    9509             :                      m_FTsiginv=FTsiginv(ispin), &
    9510             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9511             :                      m_ST=ST(ispin), &
    9512             :                      para_env=almo_scf_env%para_env, &
    9513             :                      blacs_env=almo_scf_env%blacs_env, &
    9514             :                      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9515             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9516             :                      domain_r_down=domain_r_down(:, ispin), &
    9517             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9518             :                      domain_map=almo_scf_env%domain_map(ispin), &
    9519             :                      assume_t0_q0x=.FALSE., &
    9520             :                      penalty_occ_vol=penalty_occ_vol, &
    9521             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9522             :                      eps_filter=almo_scf_env%eps_filter, &
    9523             :                      neg_thr=0.5_dp, &
    9524             :                      spin_factor=spin_factor, &
    9525             :                      skip_inversion=.TRUE., &
    9526         816 :                      special_case=my_special_case)
    9527             :                END DO ! ispin
    9528             : 
    9529             :             CASE DEFAULT
    9530             : 
    9531         408 :                CPABORT("Unknown preconditioner")
    9532             : 
    9533             :             END SELECT ! preconditioner type fork
    9534             : 
    9535             :          END IF  ! not same position
    9536             : 
    9537             :          ! print the header (argument values are ignored)
    9538             :          CALL fixed_r_report(unit_nr, &
    9539             :                              iter_type=0, &
    9540             :                              iteration=0, &
    9541             :                              step_size=0.0_dp, &
    9542             :                              border_reached=.FALSE., &
    9543             :                              curvature=0.0_dp, &
    9544             :                              grad_norm_ratio=0.0_dp, &
    9545         408 :                              time=0.0_dp)
    9546             : 
    9547             :          IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Start inner loop"
    9548             : 
    9549         408 :          t1 = m_walltime()
    9550         408 :          inner_loop_success = .FALSE.
    9551             :          ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9552         490 :          fixed_r_loop: DO iteration = 1, optimizer%max_iter
    9553             : 
    9554             :             ! Step 2. Get curvature. If negative, step to the border
    9555         490 :             y_scalar = 0.0_dp
    9556         980 :             DO ispin = 1, nspins
    9557             : 
    9558             :                ! Get B.d
    9559         490 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9560             :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9561             : 
    9562             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9563             :                                       m_model_hessian(ispin), &
    9564             :                                       m_model_d(ispin), &
    9565             :                                       0.0_dp, m_model_Bd(ispin), &
    9566          92 :                                       filter_eps=almo_scf_env%eps_filter)
    9567             : 
    9568             :                ELSE
    9569             : 
    9570             :                   CALL apply_domain_operators( &
    9571             :                      matrix_in=m_model_d(ispin), &
    9572             :                      matrix_out=m_model_Bd(ispin), &
    9573             :                      operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    9574             :                      dpattern=quench_t(ispin), &
    9575             :                      map=almo_scf_env%domain_map(ispin), &
    9576             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    9577             :                      my_action=0, &
    9578         398 :                      filter_eps=almo_scf_env%eps_filter)
    9579             : 
    9580             :                END IF ! special case
    9581             : 
    9582             :                ! Get y=d^T.B.d
    9583         490 :                CALL dbcsr_dot(m_model_d(ispin), m_model_Bd(ispin), real_temp)
    9584         980 :                y_scalar = y_scalar + real_temp
    9585             : 
    9586             :             END DO ! ispin
    9587             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Curvature: ", y_scalar
    9588             : 
    9589             :             ! step to the border
    9590         490 :             IF (y_scalar .LT. 0.0_dp) THEN
    9591             : 
    9592             :                CALL step_size_to_border( &
    9593             :                   step_size_out=step_size, &
    9594             :                   metric_in=almo_scf_env%matrix_s, &
    9595             :                   position_in=step, &
    9596             :                   direction_in=m_model_d, &
    9597             :                   trust_radius_in=radius_current, &
    9598             :                   quench_t_in=quench_t, &
    9599             :                   eps_filter_in=almo_scf_env%eps_filter &
    9600           0 :                   )
    9601             : 
    9602           0 :                DO ispin = 1, nspins
    9603           0 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9604             :                END DO
    9605             : 
    9606           0 :                border_reached = .TRUE.
    9607           0 :                inner_loop_success = .TRUE.
    9608             : 
    9609             :                CALL predicted_reduction( &
    9610             :                   reduction_out=expected_reduction, &
    9611             :                   grad_in=grad, &
    9612             :                   step_in=step, &
    9613             :                   hess_in=m_model_hessian, &
    9614             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9615             :                   quench_t_in=quench_t, &
    9616             :                   special_case=my_special_case, &
    9617             :                   eps_filter=almo_scf_env%eps_filter, &
    9618             :                   domain_map=almo_scf_env%domain_map, &
    9619             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9620           0 :                   )
    9621             : 
    9622           0 :                t2 = m_walltime()
    9623             :                CALL fixed_r_report(unit_nr, &
    9624             :                                    iter_type=2, &
    9625             :                                    iteration=iteration, &
    9626             :                                    step_size=step_size, &
    9627             :                                    border_reached=border_reached, &
    9628             :                                    curvature=y_scalar, &
    9629             :                                    grad_norm_ratio=expected_reduction, &
    9630           0 :                                    time=t2 - t1)
    9631             : 
    9632             :                EXIT fixed_r_loop ! the inner loop
    9633             : 
    9634             :             END IF ! y is negative
    9635             : 
    9636             :             ! Step 3. Compute the step size along the direction
    9637         490 :             step_size = 0.0_dp
    9638         980 :             DO ispin = 1, nspins
    9639         490 :                CALL dbcsr_dot(m_model_r(ispin), m_model_rt(ispin), real_temp)
    9640         980 :                step_size = step_size + real_temp
    9641             :             END DO ! ispin
    9642         490 :             step_size = step_size/y_scalar
    9643             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Proposed step size: ", step_size
    9644             : 
    9645             :             ! Update the step matrix
    9646         980 :             DO ispin = 1, nspins
    9647         490 :                CALL dbcsr_copy(prev_step(ispin), step(ispin))
    9648         980 :                CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9649             :             END DO
    9650             : 
    9651             :             ! Compute step norm
    9652             :             CALL contravariant_matrix_norm( &
    9653             :                norm_out=step_norm, &
    9654             :                matrix_in=step, &
    9655             :                metric_in=almo_scf_env%matrix_s, &
    9656             :                quench_t_in=quench_t, &
    9657             :                eps_filter_in=almo_scf_env%eps_filter &
    9658         490 :                )
    9659             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9660             : 
    9661             :             ! Do not step beyond the trust radius
    9662         490 :             IF (step_norm .GT. radius_current) THEN
    9663             : 
    9664             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Norm is too large"
    9665             :                CALL step_size_to_border( &
    9666             :                   step_size_out=step_size, &
    9667             :                   metric_in=almo_scf_env%matrix_s, &
    9668             :                   position_in=prev_step, &
    9669             :                   direction_in=m_model_d, &
    9670             :                   trust_radius_in=radius_current, &
    9671             :                   quench_t_in=quench_t, &
    9672             :                   eps_filter_in=almo_scf_env%eps_filter &
    9673          34 :                   )
    9674             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9675             : 
    9676          68 :                DO ispin = 1, nspins
    9677          34 :                   CALL dbcsr_copy(step(ispin), prev_step(ispin))
    9678          68 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9679             :                END DO
    9680             : 
    9681             :                IF (debug_mode) THEN
    9682             :                   ! Compute step norm
    9683             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9684             :                   CALL contravariant_matrix_norm( &
    9685             :                      norm_out=step_norm, &
    9686             :                      matrix_in=step, &
    9687             :                      metric_in=almo_scf_env%matrix_s, &
    9688             :                      quench_t_in=quench_t, &
    9689             :                      eps_filter_in=almo_scf_env%eps_filter &
    9690             :                      )
    9691             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9692             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9693             :                END IF
    9694             : 
    9695          34 :                border_reached = .TRUE.
    9696          34 :                inner_loop_success = .TRUE.
    9697             : 
    9698             :                CALL predicted_reduction( &
    9699             :                   reduction_out=expected_reduction, &
    9700             :                   grad_in=grad, &
    9701             :                   step_in=step, &
    9702             :                   hess_in=m_model_hessian, &
    9703             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9704             :                   quench_t_in=quench_t, &
    9705             :                   special_case=my_special_case, &
    9706             :                   eps_filter=almo_scf_env%eps_filter, &
    9707             :                   domain_map=almo_scf_env%domain_map, &
    9708             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9709          34 :                   )
    9710             : 
    9711          34 :                t2 = m_walltime()
    9712             :                CALL fixed_r_report(unit_nr, &
    9713             :                                    iter_type=3, &
    9714             :                                    iteration=iteration, &
    9715             :                                    step_size=step_size, &
    9716             :                                    border_reached=border_reached, &
    9717             :                                    curvature=y_scalar, &
    9718             :                                    grad_norm_ratio=expected_reduction, &
    9719          34 :                                    time=t2 - t1)
    9720             : 
    9721             :                EXIT fixed_r_loop ! the inner loop
    9722             : 
    9723             :             END IF
    9724             : 
    9725         456 :             IF (optimizer%trustr_algorithm .EQ. trustr_cauchy) THEN
    9726             :                ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9727             : 
    9728          80 :                border_reached = .FALSE.
    9729          80 :                inner_loop_success = .TRUE.
    9730             : 
    9731             :                CALL predicted_reduction( &
    9732             :                   reduction_out=expected_reduction, &
    9733             :                   grad_in=grad, &
    9734             :                   step_in=step, &
    9735             :                   hess_in=m_model_hessian, &
    9736             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9737             :                   quench_t_in=quench_t, &
    9738             :                   special_case=my_special_case, &
    9739             :                   eps_filter=almo_scf_env%eps_filter, &
    9740             :                   domain_map=almo_scf_env%domain_map, &
    9741             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9742          80 :                   )
    9743             : 
    9744          80 :                t2 = m_walltime()
    9745             :                CALL fixed_r_report(unit_nr, &
    9746             :                                    iter_type=5, & ! Cauchy point
    9747             :                                    iteration=iteration, &
    9748             :                                    step_size=step_size, &
    9749             :                                    border_reached=border_reached, &
    9750             :                                    curvature=y_scalar, &
    9751             :                                    grad_norm_ratio=expected_reduction, &
    9752          80 :                                    time=t2 - t1)
    9753             : 
    9754             :                EXIT fixed_r_loop ! the inner loop
    9755             : 
    9756         376 :             ELSE IF (optimizer%trustr_algorithm .EQ. trustr_dogleg) THEN
    9757             : 
    9758             :                ! invert or pseudo-invert B
    9759         268 :                SELECT CASE (prec_type)
    9760             :                CASE (xalmo_prec_domain)
    9761             : 
    9762             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Pseudo-invert model Hessian"
    9763         268 :                   IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    9764             : 
    9765         156 :                      DO ispin = 1, nspins
    9766             :                         CALL pseudo_invert_diagonal_blk( &
    9767             :                            matrix_in=m_model_hessian(ispin), &
    9768             :                            matrix_out=m_model_hessian_inv(ispin), &
    9769             :                            nocc=almo_scf_env%nocc_of_domain(:, ispin) &
    9770         156 :                            )
    9771             :                      END DO
    9772             : 
    9773         190 :                   ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    9774             : 
    9775             :                      ! invert using cholesky decomposition
    9776           0 :                      DO ispin = 1, nspins
    9777             :                         CALL dbcsr_copy(m_model_hessian_inv(ispin), &
    9778           0 :                                         m_model_hessian(ispin))
    9779             :                         CALL cp_dbcsr_cholesky_decompose(m_model_hessian_inv(ispin), &
    9780             :                                                          para_env=almo_scf_env%para_env, &
    9781           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    9782             :                         CALL cp_dbcsr_cholesky_invert(m_model_hessian_inv(ispin), &
    9783             :                                                       para_env=almo_scf_env%para_env, &
    9784             :                                                       blacs_env=almo_scf_env%blacs_env, &
    9785           0 :                                                       uplo_to_full=.TRUE.)
    9786             :                         CALL dbcsr_filter(m_model_hessian_inv(ispin), &
    9787           0 :                                           almo_scf_env%eps_filter)
    9788             :                      END DO
    9789             : 
    9790             :                   ELSE
    9791             : 
    9792         380 :                      DO ispin = 1, nspins
    9793             :                         CALL construct_domain_preconditioner( &
    9794             :                            matrix_main=m_model_hessian(ispin), &
    9795             :                            subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9796             :                            subm_r_down=domain_r_down(:, ispin), &
    9797             :                            matrix_trimmer=quench_t(ispin), &
    9798             :                            dpattern=quench_t(ispin), &
    9799             :                            map=almo_scf_env%domain_map(ispin), &
    9800             :                            node_of_domain=almo_scf_env%cpu_of_domain, &
    9801             :                            preconditioner=domain_model_hessian_inv(:, ispin), &
    9802             :                            use_trimmer=.FALSE., &
    9803             :                            my_action=0, & ! do not do domain (1-r0) projection
    9804             :                            skip_inversion=.FALSE. &
    9805         380 :                            )
    9806             :                      END DO
    9807             : 
    9808             :                   END IF ! special_case
    9809             : 
    9810             :                   ! slower but more reliable way to get inverted hessian
    9811             :                   !DO ispin = 1, nspins
    9812             :                   !   CALL compute_preconditioner( &
    9813             :                   !      domain_prec_out=domain_model_hessian_inv(:, ispin), &
    9814             :                   !      m_prec_out=m_model_hessian_inv(ispin), & ! RZK-warning: this one is not inverted if DOMAINs
    9815             :                   !      m_ks=almo_scf_env%matrix_ks(ispin), &
    9816             :                   !      m_s=almo_scf_env%matrix_s(1), &
    9817             :                   !      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9818             :                   !      m_quench_t=quench_t(ispin), &
    9819             :                   !      m_FTsiginv=FTsiginv(ispin), &
    9820             :                   !      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9821             :                   !      m_ST=ST(ispin), &
    9822             :                   !      para_env=almo_scf_env%para_env, &
    9823             :                   !      blacs_env=almo_scf_env%blacs_env, &
    9824             :                   !      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9825             :                   !      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9826             :                   !      domain_r_down=domain_r_down(:, ispin), &
    9827             :                   !      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9828             :                   !      domain_map=almo_scf_env%domain_map(ispin), &
    9829             :                   !      assume_t0_q0x=.FALSE., &
    9830             :                   !      penalty_occ_vol=penalty_occ_vol, &
    9831             :                   !      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9832             :                   !      eps_filter=almo_scf_env%eps_filter, &
    9833             :                   !      neg_thr=1.0E10_dp, &
    9834             :                   !      spin_factor=spin_factor, &
    9835             :                   !      skip_inversion=.FALSE., &
    9836             :                   !      special_case=my_special_case)
    9837             :                   !ENDDO ! ispin
    9838             : 
    9839             :                CASE DEFAULT
    9840             : 
    9841         268 :                   CPABORT("Unknown preconditioner")
    9842             : 
    9843             :                END SELECT ! preconditioner type fork
    9844             : 
    9845             :                ! get pB = Binv.m_model_r = -Binv.grad
    9846         536 :                DO ispin = 1, nspins
    9847             : 
    9848             :                   ! Get B.d
    9849         268 :                   IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9850         268 :                       my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9851             : 
    9852             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9853             :                                          m_model_hessian_inv(ispin), &
    9854             :                                          m_model_r(ispin), &
    9855             :                                          0.0_dp, m_model_Bd(ispin), &
    9856          78 :                                          filter_eps=almo_scf_env%eps_filter)
    9857             : 
    9858             :                   ELSE
    9859             : 
    9860             :                      CALL apply_domain_operators( &
    9861             :                         matrix_in=m_model_r(ispin), &
    9862             :                         matrix_out=m_model_Bd(ispin), &
    9863             :                         operator1=domain_model_hessian_inv(:, ispin), &
    9864             :                         dpattern=quench_t(ispin), &
    9865             :                         map=almo_scf_env%domain_map(ispin), &
    9866             :                         node_of_domain=almo_scf_env%cpu_of_domain, &
    9867             :                         my_action=0, &
    9868         190 :                         filter_eps=almo_scf_env%eps_filter)
    9869             : 
    9870             :                   END IF ! special case
    9871             : 
    9872             :                END DO ! ispin
    9873             : 
    9874             :                ! Compute norm of pB
    9875             :                CALL contravariant_matrix_norm( &
    9876             :                   norm_out=step_norm, &
    9877             :                   matrix_in=m_model_Bd, &
    9878             :                   metric_in=almo_scf_env%matrix_s, &
    9879             :                   quench_t_in=quench_t, &
    9880             :                   eps_filter_in=almo_scf_env%eps_filter &
    9881         268 :                   )
    9882             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm: ", step_norm
    9883             : 
    9884             :                ! Do not step beyond the trust radius
    9885         268 :                IF (step_norm .LE. radius_current) THEN
    9886             : 
    9887             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Full dogleg"
    9888             : 
    9889         266 :                   border_reached = .FALSE.
    9890             : 
    9891         532 :                   DO ispin = 1, nspins
    9892         532 :                      CALL dbcsr_copy(step(ispin), m_model_Bd(ispin))
    9893             :                   END DO
    9894             : 
    9895         266 :                   fake_step_size_to_report = 2.0_dp
    9896         266 :                   iteration_type_to_report = 6
    9897             : 
    9898             :                ELSE ! take a shorter dogleg step
    9899             : 
    9900             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm is too large"
    9901             : 
    9902           2 :                   border_reached = .TRUE.
    9903             : 
    9904             :                   ! compute the dogleg vector = pB - pU
    9905             :                   ! this destroys -Binv.grad content
    9906           4 :                   DO ispin = 1, nspins
    9907           4 :                      CALL dbcsr_add(m_model_Bd(ispin), step(ispin), 1.0_dp, -1.0_dp)
    9908             :                   END DO
    9909             : 
    9910             :                   CALL step_size_to_border( &
    9911             :                      step_size_out=step_size, &
    9912             :                      metric_in=almo_scf_env%matrix_s, &
    9913             :                      position_in=step, &
    9914             :                      direction_in=m_model_Bd, &
    9915             :                      trust_radius_in=radius_current, &
    9916             :                      quench_t_in=quench_t, &
    9917             :                      eps_filter_in=almo_scf_env%eps_filter &
    9918           2 :                      )
    9919             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9920           2 :                   IF (step_size .GT. 1.0_dp .OR. step_size .LT. 0.0_dp) THEN
    9921           0 :                      IF (unit_nr > 0) &
    9922           0 :                         WRITE (unit_nr, *) "Step size (", step_size, ") must lie inside (0,1)"
    9923           0 :                      CPABORT("Wrong dog leg step. We should never end up here.")
    9924             :                   END IF
    9925             : 
    9926           4 :                   DO ispin = 1, nspins
    9927           4 :                      CALL dbcsr_add(step(ispin), m_model_Bd(ispin), 1.0_dp, step_size)
    9928             :                   END DO
    9929             : 
    9930           2 :                   fake_step_size_to_report = 1.0_dp + step_size
    9931           2 :                   iteration_type_to_report = 7
    9932             : 
    9933             :                END IF ! full or partial dogleg?
    9934             : 
    9935             :                IF (debug_mode) THEN
    9936             :                   ! Compute step norm
    9937             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9938             :                   CALL contravariant_matrix_norm( &
    9939             :                      norm_out=step_norm, &
    9940             :                      matrix_in=step, &
    9941             :                      metric_in=almo_scf_env%matrix_s, &
    9942             :                      quench_t_in=quench_t, &
    9943             :                      eps_filter_in=almo_scf_env%eps_filter &
    9944             :                      )
    9945             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9946             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9947             :                END IF
    9948             : 
    9949             :                CALL predicted_reduction( &
    9950             :                   reduction_out=expected_reduction, &
    9951             :                   grad_in=grad, &
    9952             :                   step_in=step, &
    9953             :                   hess_in=m_model_hessian, &
    9954             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9955             :                   quench_t_in=quench_t, &
    9956             :                   special_case=my_special_case, &
    9957             :                   eps_filter=almo_scf_env%eps_filter, &
    9958             :                   domain_map=almo_scf_env%domain_map, &
    9959             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9960         268 :                   )
    9961             : 
    9962         268 :                inner_loop_success = .TRUE.
    9963             : 
    9964         268 :                t2 = m_walltime()
    9965             :                CALL fixed_r_report(unit_nr, &
    9966             :                                    iter_type=iteration_type_to_report, &
    9967             :                                    iteration=iteration, &
    9968             :                                    step_size=fake_step_size_to_report, &
    9969             :                                    border_reached=border_reached, &
    9970             :                                    curvature=y_scalar, &
    9971             :                                    grad_norm_ratio=expected_reduction, &
    9972         268 :                                    time=t2 - t1)
    9973             : 
    9974             :                EXIT fixed_r_loop ! the inner loop
    9975             : 
    9976             :             END IF ! Non-iterative subproblem methods exit here
    9977             : 
    9978             :             ! Step 4: update model gradient
    9979         216 :             DO ispin = 1, nspins
    9980             :                ! save previous data
    9981         108 :                CALL dbcsr_copy(m_model_r_prev(ispin), m_model_r(ispin))
    9982             :                CALL dbcsr_add(m_model_r(ispin), m_model_Bd(ispin), &
    9983         216 :                               1.0_dp, -step_size)
    9984             :             END DO ! ispin
    9985             : 
    9986             :             ! Model grad norm
    9987         216 :             DO ispin = 1, nspins
    9988         216 :                grad_norm_spin(ispin) = dbcsr_maxabs(m_model_r(ispin))
    9989             :                !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9990             :                !                 dbcsr_frobenius_norm(quench_t(ispin))
    9991             :             END DO ! ispin
    9992         324 :             model_grad_norm = MAXVAL(grad_norm_spin)
    9993             : 
    9994             :             ! Check norm reduction
    9995         108 :             grad_norm_ratio = model_grad_norm/grad_norm_ref
    9996         108 :             IF (grad_norm_ratio .LT. optimizer%model_grad_norm_ratio) THEN
    9997             : 
    9998          26 :                border_reached = .FALSE.
    9999          26 :                inner_loop_success = .TRUE.
   10000             : 
   10001             :                CALL predicted_reduction( &
   10002             :                   reduction_out=expected_reduction, &
   10003             :                   grad_in=grad, &
   10004             :                   step_in=step, &
   10005             :                   hess_in=m_model_hessian, &
   10006             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
   10007             :                   quench_t_in=quench_t, &
   10008             :                   special_case=my_special_case, &
   10009             :                   eps_filter=almo_scf_env%eps_filter, &
   10010             :                   domain_map=almo_scf_env%domain_map, &
   10011             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
   10012          26 :                   )
   10013             : 
   10014          26 :                t2 = m_walltime()
   10015             :                CALL fixed_r_report(unit_nr, &
   10016             :                                    iter_type=4, &
   10017             :                                    iteration=iteration, &
   10018             :                                    step_size=step_size, &
   10019             :                                    border_reached=border_reached, &
   10020             :                                    curvature=y_scalar, &
   10021             :                                    grad_norm_ratio=expected_reduction, &
   10022          26 :                                    time=t2 - t1)
   10023             : 
   10024             :                EXIT fixed_r_loop ! the inner loop
   10025             : 
   10026             :             END IF
   10027             : 
   10028             :             ! Step 5: update model direction
   10029         164 :             DO ispin = 1, nspins
   10030             :                ! save previous data
   10031         164 :                CALL dbcsr_copy(m_model_rt_prev(ispin), m_model_rt(ispin))
   10032             :             END DO ! ispin
   10033             : 
   10034         164 :             DO ispin = 1, nspins
   10035             : 
   10036          82 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10037          82 :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10038             : 
   10039             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10040             :                                       m_s_inv, &
   10041             :                                       m_model_r(ispin), &
   10042             :                                       0.0_dp, m_model_rt(ispin), &
   10043           0 :                                       filter_eps=almo_scf_env%eps_filter)
   10044             : 
   10045          82 :                ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
   10046             : 
   10047             :                   CALL apply_domain_operators( &
   10048             :                      matrix_in=m_model_r(ispin), &
   10049             :                      matrix_out=m_model_rt(ispin), &
   10050             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
   10051             :                      dpattern=quench_t(ispin), &
   10052             :                      map=almo_scf_env%domain_map(ispin), &
   10053             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
   10054             :                      my_action=0, &
   10055          82 :                      filter_eps=almo_scf_env%eps_filter)
   10056             : 
   10057             :                END IF
   10058             : 
   10059             :             END DO ! ispin
   10060             : 
   10061             :             CALL compute_cg_beta( &
   10062             :                beta=beta, &
   10063             :                reset_conjugator=reset_conjugator, &
   10064             :                conjugator=optimizer%conjugator, &
   10065             :                grad=m_model_r(:), &
   10066             :                prev_grad=m_model_r_prev(:), &
   10067             :                step=m_model_rt(:), &
   10068             :                prev_step=m_model_rt_prev(:) &
   10069          82 :                )
   10070             : 
   10071         164 :             DO ispin = 1, nspins
   10072             :                ! update direction
   10073         164 :                CALL dbcsr_add(m_model_d(ispin), m_model_rt(ispin), beta, 1.0_dp)
   10074             :             END DO ! ispin
   10075             : 
   10076          82 :             t2 = m_walltime()
   10077             :             CALL fixed_r_report(unit_nr, &
   10078             :                                 iter_type=1, &
   10079             :                                 iteration=iteration, &
   10080             :                                 step_size=step_size, &
   10081             :                                 border_reached=border_reached, &
   10082             :                                 curvature=y_scalar, &
   10083             :                                 grad_norm_ratio=grad_norm_ratio, &
   10084          82 :                                 time=t2 - t1)
   10085          82 :             t1 = m_walltime()
   10086             : 
   10087             :          END DO fixed_r_loop
   10088             :          !!!! done with the inner loop
   10089             :          ! the inner loop must return: step, predicted reduction,
   10090             :          ! whether it reached the border and completed successfully
   10091             : 
   10092             :          IF (.NOT. inner_loop_success) THEN
   10093           0 :             CPABORT("Inner loop did not produce solution")
   10094             :          END IF
   10095             : 
   10096         816 :          DO ispin = 1, nspins
   10097             : 
   10098         408 :             CALL dbcsr_copy(m_theta_trial(ispin), m_theta(ispin))
   10099         816 :             CALL dbcsr_add(m_theta_trial(ispin), step(ispin), 1.0_dp, 1.0_dp)
   10100             : 
   10101             :          END DO ! ispin
   10102             : 
   10103             :          ! compute the energy
   10104             :          !IF (.NOT. same_position) THEN
   10105             :          CALL main_var_to_xalmos_and_loss_func( &
   10106             :             almo_scf_env=almo_scf_env, &
   10107             :             qs_env=qs_env, &
   10108             :             m_main_var_in=m_theta_trial, &
   10109             :             m_t_out=matrix_t_out, &
   10110             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
   10111             :             energy_out=energy_trial, &
   10112             :             penalty_out=penalty_trial, &
   10113             :             m_FTsiginv_out=FTsiginv, &
   10114             :             m_siginvTFTsiginv_out=siginvTFTsiginv, &
   10115             :             m_ST_out=ST, &
   10116             :             m_STsiginv0_in=STsiginv_0, &
   10117             :             m_quench_t_in=quench_t, &
   10118             :             domain_r_down_in=domain_r_down, &
   10119             :             assume_t0_q0x=assume_t0_q0x, &
   10120             :             just_started=.FALSE., &
   10121             :             optimize_theta=optimize_theta, &
   10122             :             normalize_orbitals=normalize_orbitals, &
   10123             :             perturbation_only=perturbation_only, &
   10124             :             do_penalty=penalty_occ_vol, &
   10125         408 :             special_case=my_special_case)
   10126         408 :          loss_trial = energy_trial + penalty_trial
   10127             :          !ENDIF ! not same_position
   10128             : 
   10129         408 :          rho = (loss_trial - loss_start)/expected_reduction
   10130         408 :          loss_change_to_report = loss_trial - loss_start
   10131             : 
   10132         408 :          IF (rho < 0.25_dp) THEN
   10133           0 :             radius_current = 0.25_dp*radius_current
   10134             :          ELSE
   10135         408 :             IF (rho > 0.75_dp .AND. border_reached) THEN
   10136           2 :                radius_current = MIN(2.0_dp*radius_current, radius_max)
   10137             :             END IF
   10138             :          END IF ! radius adjustment
   10139             : 
   10140         408 :          IF (rho > eta) THEN
   10141         816 :             DO ispin = 1, nspins
   10142         816 :                CALL dbcsr_copy(m_theta(ispin), m_theta_trial(ispin))
   10143             :             END DO ! ispin
   10144         408 :             loss_start = loss_trial
   10145         408 :             energy_start = energy_trial
   10146         408 :             penalty_start = penalty_trial
   10147         408 :             same_position = .FALSE.
   10148         408 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10149          92 :                almo_scf_env%almo_scf_energy = energy_trial
   10150             :             END IF
   10151             :          ELSE
   10152           0 :             same_position = .TRUE.
   10153           0 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10154           0 :                almo_scf_env%almo_scf_energy = energy_start
   10155             :             END IF
   10156             :          END IF ! finalize step
   10157             : 
   10158         408 :          t2outer = m_walltime()
   10159             :          CALL trust_r_report(unit_nr, &
   10160             :                              iter_type=2, &
   10161             :                              iteration=outer_iteration, &
   10162             :                              loss=loss_trial, &
   10163             :                              delta_loss=loss_change_to_report, &
   10164             :                              grad_norm=0.0_dp, &
   10165             :                              predicted_reduction=expected_reduction, &
   10166             :                              rho=rho, &
   10167             :                              radius=radius_current, &
   10168             :                              new=.NOT. same_position, &
   10169         408 :                              time=t2outer - t1outer)
   10170         426 :          t1outer = m_walltime()
   10171             : 
   10172             :       END DO adjust_r_loop
   10173             : 
   10174             :       ! post SCF-loop calculations
   10175          18 :       IF (scf_converged) THEN
   10176             : 
   10177             :          CALL wrap_up_xalmo_scf( &
   10178             :             qs_env=qs_env, &
   10179             :             almo_scf_env=almo_scf_env, &
   10180             :             perturbation_in=perturbation_only, &
   10181             :             m_xalmo_in=matrix_t_out, &
   10182             :             m_quench_in=quench_t, &
   10183          18 :             energy_inout=energy_start)
   10184             : 
   10185             :       END IF ! if converged
   10186             : 
   10187          36 :       DO ispin = 1, nspins
   10188          18 :          CALL dbcsr_release(m_model_hessian_inv(ispin))
   10189          18 :          CALL dbcsr_release(m_model_hessian(ispin))
   10190          18 :          CALL dbcsr_release(STsiginv_0(ispin))
   10191          18 :          CALL dbcsr_release(ST(ispin))
   10192          18 :          CALL dbcsr_release(FTsiginv(ispin))
   10193          18 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
   10194          18 :          CALL dbcsr_release(prev_step(ispin))
   10195          18 :          CALL dbcsr_release(grad(ispin))
   10196          18 :          CALL dbcsr_release(step(ispin))
   10197          18 :          CALL dbcsr_release(m_theta(ispin))
   10198          18 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
   10199          18 :          CALL dbcsr_release(m_model_r(ispin))
   10200          18 :          CALL dbcsr_release(m_model_rt(ispin))
   10201          18 :          CALL dbcsr_release(m_model_d(ispin))
   10202          18 :          CALL dbcsr_release(m_model_Bd(ispin))
   10203          18 :          CALL dbcsr_release(m_model_r_prev(ispin))
   10204          18 :          CALL dbcsr_release(m_model_rt_prev(ispin))
   10205          18 :          CALL dbcsr_release(m_theta_trial(ispin))
   10206          18 :          CALL release_submatrices(domain_r_down(:, ispin))
   10207          36 :          CALL release_submatrices(domain_model_hessian_inv(:, ispin))
   10208             :       END DO ! ispin
   10209             : 
   10210          18 :       IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10211             :           my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10212           2 :          CALL dbcsr_release(m_s_inv)
   10213             :       END IF
   10214             : 
   10215          18 :       DEALLOCATE (m_model_hessian)
   10216          18 :       DEALLOCATE (m_model_hessian_inv)
   10217          18 :       DEALLOCATE (siginvTFTsiginv)
   10218          18 :       DEALLOCATE (STsiginv_0)
   10219          18 :       DEALLOCATE (FTsiginv)
   10220          18 :       DEALLOCATE (ST)
   10221          18 :       DEALLOCATE (grad)
   10222          18 :       DEALLOCATE (prev_step)
   10223          18 :       DEALLOCATE (step)
   10224          18 :       DEALLOCATE (m_sig_sqrti_ii)
   10225          18 :       DEALLOCATE (m_model_r)
   10226          18 :       DEALLOCATE (m_model_rt)
   10227          18 :       DEALLOCATE (m_model_d)
   10228          18 :       DEALLOCATE (m_model_Bd)
   10229          18 :       DEALLOCATE (m_model_r_prev)
   10230          18 :       DEALLOCATE (m_model_rt_prev)
   10231          18 :       DEALLOCATE (m_theta_trial)
   10232             : 
   10233         146 :       DEALLOCATE (domain_r_down)
   10234         146 :       DEALLOCATE (domain_model_hessian_inv)
   10235             : 
   10236          18 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
   10237          18 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
   10238          18 :       DEALLOCATE (grad_norm_spin)
   10239          18 :       DEALLOCATE (nocc)
   10240             : 
   10241          18 :       DEALLOCATE (m_theta)
   10242             : 
   10243          18 :       IF (.NOT. scf_converged .AND. .NOT. optimizer%early_stopping_on) THEN
   10244           0 :          CPABORT("Optimization not converged! ")
   10245             :       END IF
   10246             : 
   10247          18 :       CALL timestop(handle)
   10248             : 
   10249          36 :    END SUBROUTINE almo_scf_xalmo_trustr
   10250             : 
   10251             : ! **************************************************************************************************
   10252             : !> \brief Computes molecular orbitals and the objective (loss) function from the main variables
   10253             : !>        Most important input and output variables are given as arguments explicitly.
   10254             : !>        Some variables inside almo_scf_env (KS, DM) and qs_env are also updated but are not
   10255             : !>        listed as arguments for brevity
   10256             : !> \param almo_scf_env ...
   10257             : !> \param qs_env ...
   10258             : !> \param m_main_var_in ...
   10259             : !> \param m_t_out ...
   10260             : !> \param energy_out ...
   10261             : !> \param penalty_out ...
   10262             : !> \param m_sig_sqrti_ii_out ...
   10263             : !> \param m_FTsiginv_out ...
   10264             : !> \param m_siginvTFTsiginv_out ...
   10265             : !> \param m_ST_out ...
   10266             : !> \param m_STsiginv0_in ...
   10267             : !> \param m_quench_t_in ...
   10268             : !> \param domain_r_down_in ...
   10269             : !> \param assume_t0_q0x ...
   10270             : !> \param just_started ...
   10271             : !> \param optimize_theta ...
   10272             : !> \param normalize_orbitals ...
   10273             : !> \param perturbation_only ...
   10274             : !> \param do_penalty ...
   10275             : !> \param special_case ...
   10276             : !> \par History
   10277             : !>       2019.12 created [Rustam Z Khaliullin]
   10278             : !> \author Rustam Z Khaliullin
   10279             : ! **************************************************************************************************
   10280        1474 :    SUBROUTINE main_var_to_xalmos_and_loss_func(almo_scf_env, qs_env, m_main_var_in, &
   10281        1474 :                                                m_t_out, energy_out, penalty_out, m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10282        1474 :                                                m_siginvTFTsiginv_out, m_ST_out, m_STsiginv0_in, m_quench_t_in, domain_r_down_in, &
   10283             :                                                assume_t0_q0x, just_started, optimize_theta, normalize_orbitals, perturbation_only, &
   10284             :                                                do_penalty, special_case)
   10285             : 
   10286             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10287             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10288             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_main_var_in
   10289             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_t_out
   10290             :       REAL(KIND=dp), INTENT(OUT)                         :: energy_out, penalty_out
   10291             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10292             :                                                             m_siginvTFTsiginv_out, m_ST_out, &
   10293             :                                                             m_STsiginv0_in, m_quench_t_in
   10294             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10295             :          INTENT(IN)                                      :: domain_r_down_in
   10296             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
   10297             :                                                             optimize_theta, normalize_orbitals, &
   10298             :                                                             perturbation_only, do_penalty
   10299             :       INTEGER, INTENT(IN)                                :: special_case
   10300             : 
   10301             :       CHARACTER(len=*), PARAMETER :: routineN = 'main_var_to_xalmos_and_loss_func'
   10302             : 
   10303             :       INTEGER                                            :: handle, ispin, nspins
   10304        1474 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10305             :       REAL(KIND=dp)                                      :: det1, energy_ispin, penalty_amplitude, &
   10306             :                                                             spin_factor
   10307             : 
   10308        1474 :       CALL timeset(routineN, handle)
   10309             : 
   10310        1474 :       energy_out = 0.0_dp
   10311        1474 :       penalty_out = 0.0_dp
   10312             : 
   10313        1474 :       nspins = SIZE(m_main_var_in)
   10314        1474 :       IF (nspins == 1) THEN
   10315        1474 :          spin_factor = 2.0_dp
   10316             :       ELSE
   10317           0 :          spin_factor = 1.0_dp
   10318             :       END IF
   10319             : 
   10320        1474 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
   10321             : 
   10322        4422 :       ALLOCATE (nocc(nspins))
   10323        2948 :       DO ispin = 1, nspins
   10324             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
   10325        2948 :                              nfullrows_total=nocc(ispin))
   10326             :       END DO
   10327             : 
   10328        2948 :       DO ispin = 1, nspins
   10329             : 
   10330             :          ! compute MO coefficients from the main variable
   10331             :          CALL compute_xalmos_from_main_var( &
   10332             :             m_var_in=m_main_var_in(ispin), &
   10333             :             m_t_out=m_t_out(ispin), &
   10334             :             m_quench_t=m_quench_t_in(ispin), &
   10335             :             m_t0=almo_scf_env%matrix_t_blk(ispin), &
   10336             :             m_oo_template=almo_scf_env%matrix_sigma_inv(ispin), &
   10337             :             m_STsiginv0=m_STsiginv0_in(ispin), &
   10338             :             m_s=almo_scf_env%matrix_s(1), &
   10339             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii_out(ispin), &
   10340             :             domain_r_down=domain_r_down_in(:, ispin), &
   10341             :             domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
   10342             :             domain_map=almo_scf_env%domain_map(ispin), &
   10343             :             cpu_of_domain=almo_scf_env%cpu_of_domain, &
   10344             :             assume_t0_q0x=assume_t0_q0x, &
   10345             :             just_started=just_started, &
   10346             :             optimize_theta=optimize_theta, &
   10347             :             normalize_orbitals=normalize_orbitals, &
   10348             :             envelope_amplitude=almo_scf_env%envelope_amplitude, &
   10349             :             eps_filter=almo_scf_env%eps_filter, &
   10350             :             special_case=special_case, &
   10351             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10352             :             order_lanczos=almo_scf_env%order_lanczos, &
   10353             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10354        1474 :             max_iter_lanczos=almo_scf_env%max_iter_lanczos)
   10355             : 
   10356             :          ! compute the global projectors (for the density matrix)
   10357             :          CALL almo_scf_t_to_proj( &
   10358             :             t=m_t_out(ispin), &
   10359             :             p=almo_scf_env%matrix_p(ispin), &
   10360             :             eps_filter=almo_scf_env%eps_filter, &
   10361             :             orthog_orbs=.FALSE., &
   10362             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10363             :             s=almo_scf_env%matrix_s(1), &
   10364             :             sigma=almo_scf_env%matrix_sigma(ispin), &
   10365             :             sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
   10366             :             use_guess=.FALSE., &
   10367             :             algorithm=almo_scf_env%sigma_inv_algorithm, &
   10368             :             inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
   10369             :             inverse_accelerator=almo_scf_env%order_lanczos, &
   10370             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10371             :             max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
   10372             :             para_env=almo_scf_env%para_env, &
   10373        1474 :             blacs_env=almo_scf_env%blacs_env)
   10374             : 
   10375             :          ! compute dm from the projector(s)
   10376             :          CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
   10377        2948 :                           spin_factor)
   10378             : 
   10379             :       END DO ! ispin
   10380             : 
   10381             :       ! update the KS matrix and energy if necessary
   10382        1474 :       IF (perturbation_only) THEN
   10383             :          ! note: do not combine the two IF statements
   10384         212 :          IF (just_started) THEN
   10385          48 :             DO ispin = 1, nspins
   10386             :                CALL dbcsr_copy(almo_scf_env%matrix_ks(ispin), &
   10387          48 :                                almo_scf_env%matrix_ks_0deloc(ispin))
   10388             :             END DO
   10389             :          END IF
   10390             :       ELSE
   10391             :          ! the KS matrix is updated outside the spin loop
   10392             :          CALL almo_dm_to_almo_ks(qs_env, &
   10393             :                                  almo_scf_env%matrix_p, &
   10394             :                                  almo_scf_env%matrix_ks, &
   10395             :                                  energy_out, &
   10396             :                                  almo_scf_env%eps_filter, &
   10397        1262 :                                  almo_scf_env%mat_distr_aos)
   10398             :       END IF
   10399             : 
   10400        1474 :       penalty_out = 0.0_dp
   10401        2948 :       DO ispin = 1, nspins
   10402             : 
   10403             :          CALL compute_frequently_used_matrices( &
   10404             :             filter_eps=almo_scf_env%eps_filter, &
   10405             :             m_T_in=m_t_out(ispin), &
   10406             :             m_siginv_in=almo_scf_env%matrix_sigma_inv(ispin), &
   10407             :             m_S_in=almo_scf_env%matrix_s(1), &
   10408             :             m_F_in=almo_scf_env%matrix_ks(ispin), &
   10409             :             m_FTsiginv_out=m_FTsiginv_out(ispin), &
   10410             :             m_siginvTFTsiginv_out=m_siginvTFTsiginv_out(ispin), &
   10411        1474 :             m_ST_out=m_ST_out(ispin))
   10412             : 
   10413        1474 :          IF (perturbation_only) THEN
   10414             :             ! calculate objective function Tr(F_0 R)
   10415         212 :             IF (ispin .EQ. 1) energy_out = 0.0_dp
   10416         212 :             CALL dbcsr_dot(m_t_out(ispin), m_FTsiginv_out(ispin), energy_ispin)
   10417         212 :             energy_out = energy_out + energy_ispin*spin_factor
   10418             :          END IF
   10419             : 
   10420        2948 :          IF (do_penalty) THEN
   10421             : 
   10422             :             CALL determinant(almo_scf_env%matrix_sigma(ispin), det1, &
   10423           0 :                              almo_scf_env%eps_filter)
   10424             :             penalty_out = penalty_out - &
   10425           0 :                           penalty_amplitude*spin_factor*nocc(ispin)*LOG(det1)
   10426             : 
   10427             :          END IF
   10428             : 
   10429             :       END DO ! ispin
   10430             : 
   10431        1474 :       DEALLOCATE (nocc)
   10432             : 
   10433        1474 :       CALL timestop(handle)
   10434             : 
   10435        1474 :    END SUBROUTINE main_var_to_xalmos_and_loss_func
   10436             : 
   10437             : ! **************************************************************************************************
   10438             : !> \brief Computes the step size required to reach the trust-radius border,
   10439             : !>        measured from the origin,
   10440             : !>        given the current position (position) in the direction (direction)
   10441             : !> \param step_size_out ...
   10442             : !> \param metric_in ...
   10443             : !> \param position_in ...
   10444             : !> \param direction_in ...
   10445             : !> \param trust_radius_in ...
   10446             : !> \param quench_t_in ...
   10447             : !> \param eps_filter_in ...
   10448             : !> \par History
   10449             : !>       2019.12 created [Rustam Z Khaliullin]
   10450             : !> \author Rustam Z Khaliullin
   10451             : ! **************************************************************************************************
   10452          36 :    SUBROUTINE step_size_to_border(step_size_out, metric_in, position_in, &
   10453          36 :                                   direction_in, trust_radius_in, quench_t_in, eps_filter_in)
   10454             : 
   10455             :       REAL(KIND=dp), INTENT(INOUT)                       :: step_size_out
   10456             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: metric_in, position_in, direction_in
   10457             :       REAL(KIND=dp), INTENT(IN)                          :: trust_radius_in
   10458             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10459             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10460             : 
   10461             :       INTEGER                                            :: isol, ispin, nsolutions, &
   10462             :                                                             nsolutions_found, nspins
   10463          36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10464             :       REAL(KIND=dp)                                      :: discrim_sign, discriminant, solution, &
   10465             :                                                             spin_factor, temp_real
   10466             :       REAL(KIND=dp), DIMENSION(3)                        :: coef
   10467          36 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10468             : 
   10469          36 :       step_size_out = 0.0_dp
   10470             : 
   10471          36 :       nspins = SIZE(position_in)
   10472          36 :       IF (nspins == 1) THEN
   10473             :          spin_factor = 2.0_dp
   10474             :       ELSE
   10475           0 :          spin_factor = 1.0_dp
   10476             :       END IF
   10477             : 
   10478         108 :       ALLOCATE (nocc(nspins))
   10479         144 :       ALLOCATE (m_temp_no(nspins))
   10480             : 
   10481          36 :       coef(:) = 0.0_dp
   10482          72 :       DO ispin = 1, nspins
   10483             : 
   10484             :          CALL dbcsr_create(m_temp_no(ispin), &
   10485          36 :                            template=direction_in(ispin))
   10486             : 
   10487             :          CALL dbcsr_get_info(direction_in(ispin), &
   10488          36 :                              nfullcols_total=nocc(ispin))
   10489             : 
   10490          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10491             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10492             :                              metric_in(1), &
   10493             :                              position_in(ispin), &
   10494             :                              0.0_dp, m_temp_no(ispin), &
   10495          36 :                              retain_sparsity=.TRUE.)
   10496          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10497          36 :          CALL dbcsr_dot(position_in(ispin), m_temp_no(ispin), temp_real)
   10498          36 :          coef(3) = coef(3) + temp_real/nocc(ispin)
   10499          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10500          36 :          coef(2) = coef(2) + 2.0_dp*temp_real/nocc(ispin)
   10501          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10502             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10503             :                              metric_in(1), &
   10504             :                              direction_in(ispin), &
   10505             :                              0.0_dp, m_temp_no(ispin), &
   10506          36 :                              retain_sparsity=.TRUE.)
   10507          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10508          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10509          36 :          coef(1) = coef(1) + temp_real/nocc(ispin)
   10510             : 
   10511          72 :          CALL dbcsr_release(m_temp_no(ispin))
   10512             : 
   10513             :       END DO !ispin
   10514             : 
   10515          36 :       DEALLOCATE (nocc)
   10516          36 :       DEALLOCATE (m_temp_no)
   10517             : 
   10518         144 :       coef(:) = coef(:)*spin_factor
   10519          36 :       coef(3) = coef(3) - trust_radius_in*trust_radius_in
   10520             : 
   10521             :       ! solve the quadratic equation
   10522          36 :       discriminant = coef(2)*coef(2) - 4.0_dp*coef(1)*coef(3)
   10523          36 :       IF (discriminant .GT. TINY(discriminant)) THEN
   10524             :          nsolutions = 2
   10525           0 :       ELSE IF (discriminant .LT. 0.0_dp) THEN
   10526           0 :          nsolutions = 0
   10527           0 :          CPABORT("Step to border: no solutions")
   10528             :       ELSE
   10529             :          nsolutions = 1
   10530             :       END IF
   10531             : 
   10532          36 :       discrim_sign = 1.0_dp
   10533          36 :       nsolutions_found = 0
   10534         108 :       DO isol = 1, nsolutions
   10535          72 :          solution = (-coef(2) + discrim_sign*SQRT(discriminant))/(2.0_dp*coef(1))
   10536          72 :          IF (solution .GT. 0.0_dp) THEN
   10537          36 :             nsolutions_found = nsolutions_found + 1
   10538          36 :             step_size_out = solution
   10539             :          END IF
   10540         108 :          discrim_sign = -discrim_sign
   10541             :       END DO
   10542             : 
   10543          36 :       IF (nsolutions_found == 0) THEN
   10544           0 :          CPABORT("Step to border: no positive solutions")
   10545          36 :       ELSE IF (nsolutions_found == 2) THEN
   10546           0 :          CPABORT("Two positive border steps possible!")
   10547             :       END IF
   10548             : 
   10549          36 :    END SUBROUTINE step_size_to_border
   10550             : 
   10551             : ! **************************************************************************************************
   10552             : !> \brief Computes a norm of a contravariant NBasis x Occ matrix using proper metric
   10553             : !> \param norm_out ...
   10554             : !> \param matrix_in ...
   10555             : !> \param metric_in ...
   10556             : !> \param quench_t_in ...
   10557             : !> \param eps_filter_in ...
   10558             : !> \par History
   10559             : !>       2019.12 created [Rustam Z Khaliullin]
   10560             : !> \author Rustam Z Khaliullin
   10561             : ! **************************************************************************************************
   10562         758 :    SUBROUTINE contravariant_matrix_norm(norm_out, matrix_in, metric_in, &
   10563         758 :                                         quench_t_in, eps_filter_in)
   10564             : 
   10565             :       REAL(KIND=dp), INTENT(OUT)                         :: norm_out
   10566             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: matrix_in, metric_in, quench_t_in
   10567             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10568             : 
   10569             :       INTEGER                                            :: ispin, nspins
   10570         758 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10571             :       REAL(KIND=dp)                                      :: my_norm, spin_factor, temp_real
   10572         758 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10573             : 
   10574             :       ! Frist thing: assign the output value to avoid norms being undefined
   10575         758 :       norm_out = 0.0_dp
   10576             : 
   10577         758 :       nspins = SIZE(matrix_in)
   10578         758 :       IF (nspins == 1) THEN
   10579             :          spin_factor = 2.0_dp
   10580             :       ELSE
   10581           0 :          spin_factor = 1.0_dp
   10582             :       END IF
   10583             : 
   10584        2274 :       ALLOCATE (nocc(nspins))
   10585        3032 :       ALLOCATE (m_temp_no(nspins))
   10586             : 
   10587         758 :       my_norm = 0.0_dp
   10588        1516 :       DO ispin = 1, nspins
   10589             : 
   10590         758 :          CALL dbcsr_create(m_temp_no(ispin), template=matrix_in(ispin))
   10591             : 
   10592             :          CALL dbcsr_get_info(matrix_in(ispin), &
   10593         758 :                              nfullcols_total=nocc(ispin))
   10594             : 
   10595         758 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10596             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10597             :                              metric_in(1), &
   10598             :                              matrix_in(ispin), &
   10599             :                              0.0_dp, m_temp_no(ispin), &
   10600         758 :                              retain_sparsity=.TRUE.)
   10601         758 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10602         758 :          CALL dbcsr_dot(matrix_in(ispin), m_temp_no(ispin), temp_real)
   10603             : 
   10604         758 :          my_norm = my_norm + temp_real/nocc(ispin)
   10605             : 
   10606        1516 :          CALL dbcsr_release(m_temp_no(ispin))
   10607             : 
   10608             :       END DO !ispin
   10609             : 
   10610         758 :       DEALLOCATE (nocc)
   10611         758 :       DEALLOCATE (m_temp_no)
   10612             : 
   10613         758 :       my_norm = my_norm*spin_factor
   10614         758 :       norm_out = SQRT(my_norm)
   10615             : 
   10616         758 :    END SUBROUTINE contravariant_matrix_norm
   10617             : 
   10618             : ! **************************************************************************************************
   10619             : !> \brief Loss reduction for a given step is estimated using
   10620             : !>        gradient and hessian
   10621             : !> \param reduction_out ...
   10622             : !> \param grad_in ...
   10623             : !> \param step_in ...
   10624             : !> \param hess_in ...
   10625             : !> \param hess_submatrix_in ...
   10626             : !> \param quench_t_in ...
   10627             : !> \param special_case ...
   10628             : !> \param eps_filter ...
   10629             : !> \param domain_map ...
   10630             : !> \param cpu_of_domain ...
   10631             : !> \par History
   10632             : !>       2019.12 created [Rustam Z Khaliullin]
   10633             : !> \author Rustam Z Khaliullin
   10634             : ! **************************************************************************************************
   10635         408 :    SUBROUTINE predicted_reduction(reduction_out, grad_in, step_in, hess_in, &
   10636         408 :                                   hess_submatrix_in, quench_t_in, special_case, eps_filter, domain_map, &
   10637         408 :                                   cpu_of_domain)
   10638             : 
   10639             :       !RZK-noncritical: can be formulated without submatrices
   10640             :       REAL(KIND=dp), INTENT(INOUT)                       :: reduction_out
   10641             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad_in, step_in, hess_in
   10642             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10643             :          INTENT(IN)                                      :: hess_submatrix_in
   10644             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10645             :       INTEGER, INTENT(IN)                                :: special_case
   10646             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
   10647             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
   10648             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
   10649             : 
   10650             :       INTEGER                                            :: ispin, nspins
   10651             :       REAL(KIND=dp)                                      :: my_reduction, spin_factor, temp_real
   10652         408 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10653             : 
   10654         408 :       reduction_out = 0.0_dp
   10655             : 
   10656         408 :       nspins = SIZE(grad_in)
   10657         408 :       IF (nspins == 1) THEN
   10658             :          spin_factor = 2.0_dp
   10659             :       ELSE
   10660           0 :          spin_factor = 1.0_dp
   10661             :       END IF
   10662             : 
   10663        1632 :       ALLOCATE (m_temp_no(nspins))
   10664             : 
   10665         408 :       my_reduction = 0.0_dp
   10666         816 :       DO ispin = 1, nspins
   10667             : 
   10668         408 :          CALL dbcsr_create(m_temp_no(ispin), template=grad_in(ispin))
   10669             : 
   10670         408 :          CALL dbcsr_dot(step_in(ispin), grad_in(ispin), temp_real)
   10671         408 :          my_reduction = my_reduction + temp_real
   10672             : 
   10673             :          ! Get Hess.step
   10674         408 :          IF (special_case .EQ. xalmo_case_block_diag .OR. &
   10675             :              special_case .EQ. xalmo_case_fully_deloc) THEN
   10676             : 
   10677             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10678             :                                 hess_in(ispin), &
   10679             :                                 step_in(ispin), &
   10680             :                                 0.0_dp, m_temp_no(ispin), &
   10681          92 :                                 filter_eps=eps_filter)
   10682             : 
   10683             :          ELSE
   10684             : 
   10685             :             CALL apply_domain_operators( &
   10686             :                matrix_in=step_in(ispin), &
   10687             :                matrix_out=m_temp_no(ispin), &
   10688             :                operator1=hess_submatrix_in(:, ispin), &
   10689             :                dpattern=quench_t_in(ispin), &
   10690             :                map=domain_map(ispin), &
   10691             :                node_of_domain=cpu_of_domain, &
   10692             :                my_action=0, &
   10693         316 :                filter_eps=eps_filter)
   10694             : 
   10695             :          END IF ! special case
   10696             : 
   10697             :          ! Get y=step^T.Hess.step
   10698         408 :          CALL dbcsr_dot(step_in(ispin), m_temp_no(ispin), temp_real)
   10699         408 :          my_reduction = my_reduction + 0.5_dp*temp_real
   10700             : 
   10701         816 :          CALL dbcsr_release(m_temp_no(ispin))
   10702             : 
   10703             :       END DO ! ispin
   10704             : 
   10705             :       !RZK-critical: do we need to multiply by the spin factor?
   10706         408 :       my_reduction = spin_factor*my_reduction
   10707             : 
   10708         408 :       reduction_out = my_reduction
   10709             : 
   10710         408 :       DEALLOCATE (m_temp_no)
   10711             : 
   10712         408 :    END SUBROUTINE predicted_reduction
   10713             : 
   10714             : ! **************************************************************************************************
   10715             : !> \brief Prints key quantities from the fixed-radius minimizer
   10716             : !> \param unit_nr ...
   10717             : !> \param iter_type ...
   10718             : !> \param iteration ...
   10719             : !> \param step_size ...
   10720             : !> \param border_reached ...
   10721             : !> \param curvature ...
   10722             : !> \param grad_norm_ratio ...
   10723             : !> \param predicted_reduction ...
   10724             : !> \param time ...
   10725             : !> \par History
   10726             : !>       2019.12 created [Rustam Z Khaliullin]
   10727             : !> \author Rustam Z Khaliullin
   10728             : ! **************************************************************************************************
   10729         898 :    SUBROUTINE fixed_r_report(unit_nr, iter_type, iteration, step_size, &
   10730             :                              border_reached, curvature, grad_norm_ratio, predicted_reduction, time)
   10731             : 
   10732             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10733             :       REAL(KIND=dp), INTENT(IN)                          :: step_size
   10734             :       LOGICAL, INTENT(IN)                                :: border_reached
   10735             :       REAL(KIND=dp), INTENT(IN)                          :: curvature
   10736             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_norm_ratio, predicted_reduction
   10737             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10738             : 
   10739             :       CHARACTER(LEN=20)                                  :: iter_type_str
   10740             :       REAL(KIND=dp)                                      :: loss_or_grad_change
   10741             : 
   10742         898 :       loss_or_grad_change = 0.0_dp
   10743         898 :       IF (PRESENT(grad_norm_ratio)) THEN
   10744         898 :          loss_or_grad_change = grad_norm_ratio
   10745           0 :       ELSE IF (PRESENT(predicted_reduction)) THEN
   10746           0 :          loss_or_grad_change = predicted_reduction
   10747             :       ELSE
   10748           0 :          CPABORT("one argument is missing")
   10749             :       END IF
   10750             : 
   10751        1306 :       SELECT CASE (iter_type)
   10752             :       CASE (0)
   10753         408 :          iter_type_str = TRIM("Ignored")
   10754             :       CASE (1)
   10755          82 :          iter_type_str = TRIM("PCG")
   10756             :       CASE (2)
   10757           0 :          iter_type_str = TRIM("Neg. curvatr.")
   10758             :       CASE (3)
   10759          34 :          iter_type_str = TRIM("Step too long")
   10760             :       CASE (4)
   10761          26 :          iter_type_str = TRIM("Grad. reduced")
   10762             :       CASE (5)
   10763          80 :          iter_type_str = TRIM("Cauchy point")
   10764             :       CASE (6)
   10765         266 :          iter_type_str = TRIM("Full dogleg")
   10766             :       CASE (7)
   10767           2 :          iter_type_str = TRIM("Part. dogleg")
   10768             :       CASE DEFAULT
   10769         898 :          CPABORT("unknown report type")
   10770             :       END SELECT
   10771             : 
   10772         898 :       IF (unit_nr > 0) THEN
   10773             : 
   10774         204 :          SELECT CASE (iter_type)
   10775             :          CASE (0)
   10776             : 
   10777         204 :             WRITE (unit_nr, *)
   10778             :             WRITE (unit_nr, '(T4,A15,A6,A10,A10,A7,A20,A8)') &
   10779         204 :                "Action", &
   10780         204 :                "Iter", &
   10781         204 :                "Curv", &
   10782         204 :                "Step", &
   10783         204 :                "Edge?", &
   10784         204 :                "Grad/o.f. reduc", &
   10785         408 :                "Time"
   10786             : 
   10787             :          CASE DEFAULT
   10788             : 
   10789             :             WRITE (unit_nr, '(T4,A15,I6,F10.5,F10.5,L7,F20.10,F8.2)') &
   10790         245 :                iter_type_str, &
   10791         245 :                iteration, &
   10792         245 :                curvature, step_size, border_reached, &
   10793         245 :                loss_or_grad_change, &
   10794         694 :                time
   10795             : 
   10796             :          END SELECT
   10797             : 
   10798             :          ! epilogue
   10799         204 :          SELECT CASE (iter_type)
   10800             :          CASE (2, 3, 4, 5, 6, 7)
   10801             : 
   10802         449 :             WRITE (unit_nr, *)
   10803             : 
   10804             :          END SELECT
   10805             : 
   10806             :       END IF
   10807             : 
   10808         898 :    END SUBROUTINE fixed_r_report
   10809             : 
   10810             : ! **************************************************************************************************
   10811             : !> \brief Prints key quantities from the loop that tunes trust radius
   10812             : !> \param unit_nr ...
   10813             : !> \param iter_type ...
   10814             : !> \param iteration ...
   10815             : !> \param radius ...
   10816             : !> \param loss ...
   10817             : !> \param delta_loss ...
   10818             : !> \param grad_norm ...
   10819             : !> \param predicted_reduction ...
   10820             : !> \param rho ...
   10821             : !> \param new ...
   10822             : !> \param time ...
   10823             : !> \par History
   10824             : !>       2019.12 created [Rustam Z Khaliullin]
   10825             : !> \author Rustam Z Khaliullin
   10826             : ! **************************************************************************************************
   10827         843 :    SUBROUTINE trust_r_report(unit_nr, iter_type, iteration, radius, &
   10828             :                              loss, delta_loss, grad_norm, predicted_reduction, rho, new, time)
   10829             : 
   10830             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10831             :       REAL(KIND=dp), INTENT(IN)                          :: radius, loss, delta_loss, grad_norm, &
   10832             :                                                             predicted_reduction, rho
   10833             :       LOGICAL, INTENT(IN)                                :: new
   10834             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10835             : 
   10836             :       CHARACTER(LEN=20)                                  :: iter_status, iter_type_str
   10837             : 
   10838         852 :       SELECT CASE (iter_type)
   10839             :       CASE (0) ! header
   10840           9 :          iter_type_str = TRIM("Iter")
   10841           9 :          iter_status = TRIM("Stat")
   10842             :       CASE (1) ! first iteration, not all data is available yet
   10843         426 :          iter_type_str = TRIM("TR INI")
   10844         426 :          IF (new) THEN
   10845         426 :             iter_status = "  New" ! new point
   10846             :          ELSE
   10847           0 :             iter_status = " Redo" ! restarted
   10848             :          END IF
   10849             :       CASE (2) ! typical
   10850         408 :          iter_type_str = TRIM("TR FIN")
   10851         408 :          IF (new) THEN
   10852         408 :             iter_status = "  Acc" ! accepted
   10853             :          ELSE
   10854           0 :             iter_status = "  Rej" ! rejected
   10855             :          END IF
   10856             :       CASE DEFAULT
   10857         843 :          CPABORT("unknown report type")
   10858             :       END SELECT
   10859             : 
   10860         843 :       IF (unit_nr > 0) THEN
   10861             : 
   10862           9 :          SELECT CASE (iter_type)
   10863             :          CASE (0)
   10864             : 
   10865             :             WRITE (unit_nr, '(T2,A6,A5,A6,A22,A10,T67,A7,A6)') &
   10866           9 :                "Method", &
   10867           9 :                "Stat", &
   10868           9 :                "Iter", &
   10869           9 :                "Objective Function", &
   10870           9 :                "Conver", &!"Model Change", "Rho", &
   10871           9 :                "Radius", &
   10872          18 :                "Time"
   10873             :             WRITE (unit_nr, '(T41,A10,A10,A6)') &
   10874             :                !"Method", &
   10875             :                !"Iter", &
   10876             :                !"Objective Function", &
   10877           9 :                "Change", "Expct.", "Rho"
   10878             :             !"Radius", &
   10879             :             !"Time"
   10880             : 
   10881             :          CASE (1)
   10882             : 
   10883             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,T67,ES7.0,F6.1)') &
   10884         213 :                iter_type_str, &
   10885         213 :                iter_status, &
   10886         213 :                iteration, &
   10887         213 :                loss, &
   10888         213 :                grad_norm, & ! distinct
   10889         213 :                radius, &
   10890         426 :                time
   10891             : 
   10892             :          CASE (2)
   10893             : 
   10894             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,ES10.2,F6.1,ES7.0,F6.1)') &
   10895         204 :                iter_type_str, &
   10896         204 :                iter_status, &
   10897         204 :                iteration, &
   10898         204 :                loss, &
   10899         204 :                delta_loss, predicted_reduction, rho, & ! distinct
   10900         204 :                radius, &
   10901         630 :                time
   10902             : 
   10903             :          END SELECT
   10904             :       END IF
   10905             : 
   10906         843 :    END SUBROUTINE trust_r_report
   10907             : 
   10908             : ! **************************************************************************************************
   10909             : !> \brief ...
   10910             : !> \param unit_nr ...
   10911             : !> \param ref_energy ...
   10912             : !> \param energy_lowering ...
   10913             : ! **************************************************************************************************
   10914          26 :    SUBROUTINE energy_lowering_report(unit_nr, ref_energy, energy_lowering)
   10915             : 
   10916             :       INTEGER, INTENT(IN)                                :: unit_nr
   10917             :       REAL(KIND=dp), INTENT(IN)                          :: ref_energy, energy_lowering
   10918             : 
   10919             :       ! print out the energy lowering
   10920          26 :       IF (unit_nr > 0) THEN
   10921          13 :          WRITE (unit_nr, *)
   10922          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY OF BLOCK-DIAGONAL ALMOs:", &
   10923          26 :             ref_energy
   10924          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY LOWERING:", &
   10925          26 :             energy_lowering
   10926          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "CORRECTED ENERGY:", &
   10927          26 :             ref_energy + energy_lowering
   10928          13 :          WRITE (unit_nr, *)
   10929             :       END IF
   10930             : 
   10931          26 :    END SUBROUTINE energy_lowering_report
   10932             : 
   10933             :    ! post SCF-loop calculations
   10934             : ! **************************************************************************************************
   10935             : !> \brief ...
   10936             : !> \param qs_env ...
   10937             : !> \param almo_scf_env ...
   10938             : !> \param perturbation_in ...
   10939             : !> \param m_xalmo_in ...
   10940             : !> \param m_quench_in ...
   10941             : !> \param energy_inout ...
   10942             : ! **************************************************************************************************
   10943         104 :    SUBROUTINE wrap_up_xalmo_scf(qs_env, almo_scf_env, perturbation_in, &
   10944         104 :                                 m_xalmo_in, m_quench_in, energy_inout)
   10945             : 
   10946             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10947             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10948             :       LOGICAL, INTENT(IN)                                :: perturbation_in
   10949             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_xalmo_in, m_quench_in
   10950             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_inout
   10951             : 
   10952             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'wrap_up_xalmo_scf'
   10953             : 
   10954             :       INTEGER                                            :: eda_unit, handle, ispin, nspins, unit_nr
   10955             :       TYPE(cp_logger_type), POINTER                      :: logger
   10956         104 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no1, m_temp_no2
   10957             :       TYPE(section_vals_type), POINTER                   :: almo_print_section, input
   10958             : 
   10959         104 :       CALL timeset(routineN, handle)
   10960             : 
   10961             :       ! get a useful output_unit
   10962         104 :       logger => cp_get_default_logger()
   10963         104 :       IF (logger%para_env%is_source()) THEN
   10964          52 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
   10965             :       ELSE
   10966          52 :          unit_nr = -1
   10967             :       END IF
   10968             : 
   10969         104 :       nspins = almo_scf_env%nspins
   10970             : 
   10971             :       ! RZK-warning: must obtain MO coefficients from final theta
   10972             : 
   10973         104 :       IF (perturbation_in) THEN
   10974             : 
   10975          96 :          ALLOCATE (m_temp_no1(nspins))
   10976          72 :          ALLOCATE (m_temp_no2(nspins))
   10977             : 
   10978          48 :          DO ispin = 1, nspins
   10979          24 :             CALL dbcsr_create(m_temp_no1(ispin), template=m_xalmo_in(ispin))
   10980          48 :             CALL dbcsr_create(m_temp_no2(ispin), template=m_xalmo_in(ispin))
   10981             :          END DO
   10982             : 
   10983             :          ! return perturbed density to qs_env
   10984             :          CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, &
   10985          24 :                                 almo_scf_env%mat_distr_aos)
   10986             : 
   10987             :          ! compute energy correction and perform
   10988             :          ! detailed decomposition analysis (if requested)
   10989             :          ! reuse step and grad matrices to store decomposition results
   10990             :          CALL xalmo_analysis( &
   10991             :             detailed_analysis=almo_scf_env%almo_analysis%do_analysis, &
   10992             :             eps_filter=almo_scf_env%eps_filter, &
   10993             :             m_T_in=m_xalmo_in, &
   10994             :             m_T0_in=almo_scf_env%matrix_t_blk, &
   10995             :             m_siginv_in=almo_scf_env%matrix_sigma_inv, &
   10996             :             m_siginv0_in=almo_scf_env%matrix_sigma_inv_0deloc, &
   10997             :             m_S_in=almo_scf_env%matrix_s, &
   10998             :             m_KS0_in=almo_scf_env%matrix_ks_0deloc, &
   10999             :             m_quench_t_in=m_quench_in, &
   11000             :             energy_out=energy_inout, & ! get energy loewring
   11001             :             m_eda_out=m_temp_no1, &
   11002             :             m_cta_out=m_temp_no2 &
   11003          24 :             )
   11004             : 
   11005          24 :          IF (almo_scf_env%almo_analysis%do_analysis) THEN
   11006             : 
   11007           4 :             DO ispin = 1, nspins
   11008             : 
   11009             :                ! energy decomposition analysis (EDA)
   11010           2 :                IF (unit_nr > 0) THEN
   11011           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
   11012             :                END IF
   11013             : 
   11014             :                ! open the output file, print and close
   11015           2 :                CALL get_qs_env(qs_env, input=input)
   11016           2 :                almo_print_section => section_vals_get_subs_vals(input, "DFT%ALMO_SCF%ANALYSIS%PRINT")
   11017             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11018           2 :                                                "ALMO_EDA_CT", extension=".dat", local=.TRUE.)
   11019           2 :                CALL dbcsr_print_block_sum(m_temp_no1(ispin), eda_unit)
   11020             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11021           2 :                                                  "ALMO_EDA_CT", local=.TRUE.)
   11022             : 
   11023             :                ! charge transfer analysis (CTA)
   11024           2 :                IF (unit_nr > 0) THEN
   11025           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF CHARGE TRANSFER TERMS"
   11026             :                END IF
   11027             : 
   11028             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11029           2 :                                                "ALMO_CTA", extension=".dat", local=.TRUE.)
   11030           2 :                CALL dbcsr_print_block_sum(m_temp_no2(ispin), eda_unit)
   11031             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11032           4 :                                                  "ALMO_CTA", local=.TRUE.)
   11033             : 
   11034             :             END DO ! ispin
   11035             : 
   11036             :          END IF ! do ALMO EDA/CTA
   11037             : 
   11038             :          CALL energy_lowering_report( &
   11039             :             unit_nr=unit_nr, &
   11040             :             ref_energy=almo_scf_env%almo_scf_energy, &
   11041          24 :             energy_lowering=energy_inout)
   11042             :          CALL almo_scf_update_ks_energy(qs_env, &
   11043             :                                         energy=almo_scf_env%almo_scf_energy, &
   11044          24 :                                         energy_singles_corr=energy_inout)
   11045             : 
   11046          48 :          DO ispin = 1, nspins
   11047          24 :             CALL dbcsr_release(m_temp_no1(ispin))
   11048          48 :             CALL dbcsr_release(m_temp_no2(ispin))
   11049             :          END DO
   11050             : 
   11051          24 :          DEALLOCATE (m_temp_no1)
   11052          24 :          DEALLOCATE (m_temp_no2)
   11053             : 
   11054             :       ELSE ! non-perturbative
   11055             : 
   11056             :          CALL almo_scf_update_ks_energy(qs_env, &
   11057          80 :                                         energy=energy_inout)
   11058             : 
   11059             :       END IF ! if perturbation only
   11060             : 
   11061         104 :       CALL timestop(handle)
   11062             : 
   11063         104 :    END SUBROUTINE wrap_up_xalmo_scf
   11064             : 
   11065             : ! **************************************************************************************************
   11066             : !> \brief Computes tanh(alpha*x) of the matrix elements. Fails if |alpha*x| >= 1.
   11067             : !> \param matrix ...
   11068             : !> \param alpha ...
   11069             : !> \author Ole Schuett
   11070             : ! **************************************************************************************************
   11071           0 :    SUBROUTINE tanh_of_elements(matrix, alpha)
   11072             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11073             :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11074             : 
   11075             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'tanh_of_elements'
   11076             : 
   11077             :       INTEGER                                            :: handle
   11078           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11079             :       TYPE(dbcsr_iterator_type)                          :: iter
   11080             : 
   11081           0 :       CALL timeset(routineN, handle)
   11082           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11083           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11084           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11085           0 :          block = TANH(alpha*block)
   11086             :       END DO
   11087           0 :       CALL dbcsr_iterator_stop(iter)
   11088           0 :       CALL timestop(handle)
   11089             : 
   11090           0 :    END SUBROUTINE tanh_of_elements
   11091             : 
   11092             : ! **************************************************************************************************
   11093             : !> \brief Computes d(tanh(alpha*x)) / dx of the matrix elements. Fails if |alpha*x| >= 1.
   11094             : !> \param matrix ...
   11095             : !> \param alpha ...
   11096             : !> \author Ole Schuett
   11097             : ! **************************************************************************************************
   11098           0 :    SUBROUTINE dtanh_of_elements(matrix, alpha)
   11099             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11100             :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11101             : 
   11102             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dtanh_of_elements'
   11103             : 
   11104             :       INTEGER                                            :: handle
   11105           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11106             :       TYPE(dbcsr_iterator_type)                          :: iter
   11107             : 
   11108           0 :       CALL timeset(routineN, handle)
   11109           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11110           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11111           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11112           0 :          block = alpha*(1.0_dp - TANH(block)**2)
   11113             :       END DO
   11114           0 :       CALL dbcsr_iterator_stop(iter)
   11115           0 :       CALL timestop(handle)
   11116             : 
   11117           0 :    END SUBROUTINE dtanh_of_elements
   11118             : 
   11119             : ! **************************************************************************************************
   11120             : !> \brief Computes 1/x of the matrix elements.
   11121             : !> \param matrix ...
   11122             : !> \author Ole Schuett
   11123             : ! **************************************************************************************************
   11124           0 :    SUBROUTINE inverse_of_elements(matrix)
   11125             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11126             : 
   11127             :       CHARACTER(len=*), PARAMETER :: routineN = 'inverse_of_elements'
   11128             : 
   11129             :       INTEGER                                            :: handle
   11130           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11131             :       TYPE(dbcsr_iterator_type)                          :: iter
   11132             : 
   11133           0 :       CALL timeset(routineN, handle)
   11134           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11135           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11136           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11137           0 :          block = 1.0_dp/block
   11138             :       END DO
   11139           0 :       CALL dbcsr_iterator_stop(iter)
   11140           0 :       CALL timestop(handle)
   11141             : 
   11142           0 :    END SUBROUTINE inverse_of_elements
   11143             : 
   11144             : END MODULE almo_scf_optimizer
   11145             : 

Generated by: LCOV version 1.15