LCOV - CP2K Regtests (git:b4bd748)

LCOV - code coverage report

Current view:	top level - src - mp2_ri_grad.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:b4bd748)	Lines:	607	615	98.7 %
Date:	2025-03-09 07:56:22	Functions:	5	5	100.0 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
      10             : !> \par History
      11             : !>      10.2013 created [Mauro Del Ben]
      12             : ! **************************************************************************************************
      13             : MODULE mp2_ri_grad
      14             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      15             :                                               get_atomic_kind_set
      16             :    USE cell_types,                      ONLY: cell_type
      17             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      18             :    USE cp_control_types,                ONLY: dft_control_type
      19             :    USE cp_dbcsr_api,                    ONLY: &
      20             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
      21             :         dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
      22             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      23             :                                               dbcsr_deallocate_matrix_set
      24             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param
      25             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      26             :                                               cp_fm_struct_release,&
      27             :                                               cp_fm_struct_type
      28             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      29             :                                               cp_fm_get_info,&
      30             :                                               cp_fm_release,&
      31             :                                               cp_fm_set_all,&
      32             :                                               cp_fm_to_fm_submat,&
      33             :                                               cp_fm_type
      34             :    USE input_constants,                 ONLY: do_eri_gpw,&
      35             :                                               do_eri_mme,&
      36             :                                               ri_mp2_laplace,&
      37             :                                               ri_mp2_method_gpw,&
      38             :                                               ri_rpa_method_gpw
      39             :    USE kinds,                           ONLY: dp
      40             :    USE libint_2c_3c,                    ONLY: compare_potential_types
      41             :    USE message_passing,                 ONLY: mp_para_env_release,&
      42             :                                               mp_para_env_type,&
      43             :                                               mp_request_null,&
      44             :                                               mp_request_type,&
      45             :                                               mp_waitall
      46             :    USE mp2_eri,                         ONLY: mp2_eri_2c_integrate,&
      47             :                                               mp2_eri_3c_integrate,&
      48             :                                               mp2_eri_deallocate_forces,&
      49             :                                               mp2_eri_force
      50             :    USE mp2_eri_gpw,                     ONLY: cleanup_gpw,&
      51             :                                               integrate_potential_forces_2c,&
      52             :                                               integrate_potential_forces_3c_1c,&
      53             :                                               integrate_potential_forces_3c_2c,&
      54             :                                               prepare_gpw
      55             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      56             :                                               integ_mat_buffer_type_2D,&
      57             :                                               mp2_type
      58             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      59             :    USE particle_types,                  ONLY: particle_type
      60             :    USE pw_env_types,                    ONLY: pw_env_type
      61             :    USE pw_poisson_types,                ONLY: pw_poisson_type
      62             :    USE pw_pool_types,                   ONLY: pw_pool_type
      63             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      64             :                                               pw_r3d_rs_type
      65             :    USE qs_environment_types,            ONLY: get_qs_env,&
      66             :                                               qs_environment_type
      67             :    USE qs_force_types,                  ONLY: allocate_qs_force,&
      68             :                                               qs_force_type,&
      69             :                                               zero_qs_force
      70             :    USE qs_kind_types,                   ONLY: qs_kind_type
      71             :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      72             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      73             :    USE task_list_types,                 ONLY: task_list_type
      74             :    USE util,                            ONLY: get_limit
      75             :    USE virial_types,                    ONLY: virial_type
      76             : #include "./base/base_uses.f90"
      77             : 
      78             :    IMPLICIT NONE
      79             : 
      80             :    PRIVATE
      81             : 
      82             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
      83             : 
      84             :    PUBLIC :: calc_ri_mp2_nonsep
      85             : 
      86             : CONTAINS
      87             : 
      88             : ! **************************************************************************************************
      89             : !> \brief Calculate the non-separable part of the gradients and update the
      90             : !>        Lagrangian
      91             : !> \param qs_env ...
      92             : !> \param mp2_env ...
      93             : !> \param para_env ...
      94             : !> \param para_env_sub ...
      95             : !> \param cell ...
      96             : !> \param particle_set ...
      97             : !> \param atomic_kind_set ...
      98             : !> \param qs_kind_set ...
      99             : !> \param mo_coeff ...
     100             : !> \param nmo ...
     101             : !> \param homo ...
     102             : !> \param dimen_RI ...
     103             : !> \param Eigenval ...
     104             : !> \param my_group_L_start ...
     105             : !> \param my_group_L_end ...
     106             : !> \param my_group_L_size ...
     107             : !> \param sab_orb_sub ...
     108             : !> \param mat_munu ...
     109             : !> \param blacs_env_sub ...
     110             : !> \author Mauro Del Ben
     111             : ! **************************************************************************************************
     112         264 :    SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
     113         264 :                                  atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, &
     114             :                                  my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
     115             :                                  blacs_env_sub)
     116             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     117             :       TYPE(mp2_type)                                     :: mp2_env
     118             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     119             :       TYPE(cell_type), POINTER                           :: cell
     120             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     121             :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     122             :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     123             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
     124             :       INTEGER, INTENT(IN)                                :: nmo
     125             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: homo
     126             :       INTEGER, INTENT(IN)                                :: dimen_RI
     127             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: Eigenval
     128             :       INTEGER, INTENT(IN)                                :: my_group_L_start, my_group_L_end, &
     129             :                                                             my_group_L_size
     130             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     131             :          POINTER                                         :: sab_orb_sub
     132             :       TYPE(dbcsr_p_type), INTENT(INOUT)                  :: mat_munu
     133             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env_sub
     134             : 
     135             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
     136             : 
     137             :       INTEGER                                            :: dimen, eri_method, handle, handle2, i, &
     138             :                                                             ikind, ispin, itmp(2), L_counter, LLL, &
     139             :                                                             my_P_end, my_P_size, my_P_start, nspins
     140         264 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, kind_of, natom_of_kind, &
     141         264 :                                                             virtual
     142             :       LOGICAL                                            :: alpha_beta, use_virial
     143             :       REAL(KIND=dp)                                      :: cutoff_old, eps_filter, factor, &
     144             :                                                             factor_2c, relative_cutoff_old
     145         264 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: e_cutoff_old
     146         264 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: G_PQ_local, G_PQ_local_2
     147             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: h_stress, pv_virial
     148         264 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: I_tmp2
     149             :       TYPE(cp_eri_mme_param), POINTER                    :: eri_param
     150             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     151         264 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: L1_mu_i, L2_nu_a
     152             :       TYPE(dbcsr_p_type)                                 :: matrix_P_munu
     153             :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:)      :: mo_coeff_o, mo_coeff_v
     154         264 :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :)   :: G_P_ia
     155         264 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_munu_local, matrix_P_munu_local
     156             :       TYPE(dbcsr_type)                                   :: matrix_P_munu_nosym
     157         264 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
     158             :       TYPE(dft_control_type), POINTER                    :: dft_control
     159         264 :       TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:)     :: force_2c, force_2c_RI, force_3c_aux, &
     160         264 :                                                             force_3c_orb_mu, force_3c_orb_nu
     161        1056 :       TYPE(pw_c1d_gs_type)                               :: dvg(3), pot_g, rho_g, rho_g_copy
     162             :       TYPE(pw_env_type), POINTER                         :: pw_env_sub
     163             :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
     164             :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
     165             :       TYPE(pw_r3d_rs_type)                               :: psi_L, rho_r
     166         264 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force, mp2_force
     167             :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     168             :       TYPE(task_list_type), POINTER                      :: task_list_sub
     169             :       TYPE(virial_type), POINTER                         :: virial
     170             : 
     171         264 :       CALL timeset(routineN, handle)
     172             : 
     173         264 :       eri_method = mp2_env%eri_method
     174         264 :       eri_param => mp2_env%eri_mme_param
     175             : 
     176             :       ! Find out whether we have a closed or open shell
     177         264 :       nspins = SIZE(homo)
     178         264 :       alpha_beta = (nspins == 2)
     179             : 
     180         264 :       dimen = nmo
     181         792 :       ALLOCATE (virtual(nspins))
     182         614 :       virtual(:) = dimen - homo(:)
     183         264 :       eps_filter = mp2_env%mp2_gpw%eps_filter
     184       29636 :       ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
     185         614 :       DO ispin = 1, nspins
     186         350 :          mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
     187         350 :          mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
     188       16122 :          DO LLL = 1, my_group_L_size
     189       15858 :             G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
     190             :          END DO
     191             :       END DO
     192         264 :       DEALLOCATE (mp2_env%ri_grad%G_P_ia)
     193             : 
     194         264 :       itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
     195         264 :       my_P_start = itmp(1)
     196         264 :       my_P_end = itmp(2)
     197         264 :       my_P_size = itmp(2) - itmp(1) + 1
     198             : 
     199        1056 :       ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
     200      997646 :       G_PQ_local = 0.0_dp
     201      944212 :       G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
     202         264 :       DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
     203      944212 :       G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
     204         264 :       CALL para_env_sub%sum(G_PQ_local)
     205         264 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     206          36 :          ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
     207       41844 :          G_PQ_local_2 = 0.0_dp
     208       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
     209          12 :          DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
     210       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
     211          12 :          CALL para_env_sub%sum(G_PQ_local_2)
     212             :       END IF
     213             : 
     214             :       ! create matrix holding the back transformation (G_P_inu)
     215        1142 :       ALLOCATE (matrix_P_inu(nspins))
     216         614 :       DO ispin = 1, nspins
     217         614 :          CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
     218             :       END DO
     219             : 
     220             :       ! non symmetric matrix
     221             :       CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
     222         264 :                         matrix_type=dbcsr_type_no_symmetry)
     223             : 
     224             :       ! create Lagrangian matrices in mixed AO/MO formalism
     225         878 :       ALLOCATE (Lag_mu_i_1(nspins))
     226         614 :       DO ispin = 1, nspins
     227         350 :          CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
     228         614 :          CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
     229             :       END DO
     230             : 
     231         878 :       ALLOCATE (Lag_nu_a_2(nspins))
     232         614 :       DO ispin = 1, nspins
     233         350 :          CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
     234         614 :          CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
     235             :       END DO
     236             : 
     237             :       ! get forces
     238         264 :       NULLIFY (force, virial)
     239         264 :       CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
     240             : 
     241             :       ! check if we want to calculate the virial
     242         264 :       use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
     243             : 
     244         264 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
     245         264 :       NULLIFY (mp2_force)
     246         264 :       CALL allocate_qs_force(mp2_force, natom_of_kind)
     247         264 :       DEALLOCATE (natom_of_kind)
     248         264 :       CALL zero_qs_force(mp2_force)
     249         264 :       mp2_env%ri_grad%mp2_force => mp2_force
     250             : 
     251         264 :       factor_2c = -4.0_dp
     252         264 :       IF (mp2_env%method == ri_rpa_method_gpw) THEN
     253          20 :          factor_2c = -1.0_dp
     254          20 :          IF (alpha_beta) factor_2c = -2.0_dp
     255             :       END IF
     256             : 
     257             :       ! prepare integral derivatives with mme method
     258         264 :       IF (eri_method .EQ. do_eri_mme) THEN
     259        1184 :          ALLOCATE (matrix_P_munu_local(my_group_L_size))
     260        1158 :          ALLOCATE (mat_munu_local(my_group_L_size))
     261          26 :          L_counter = 0
     262        1132 :          DO LLL = my_group_L_start, my_group_L_end
     263        1106 :             L_counter = L_counter + 1
     264        1106 :             ALLOCATE (mat_munu_local(L_counter)%matrix)
     265             :             CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
     266        1106 :                               matrix_type=dbcsr_type_symmetric)
     267        1106 :             CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
     268        1106 :             CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
     269             : 
     270             :             CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     271             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     272        1132 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     273             :          END DO
     274             : 
     275          78 :          ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
     276       95790 :          I_tmp2(:, :) = 0.0_dp
     277             :          CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
     278             :                                    basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     279             :                                    hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     280          26 :                                    eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
     281          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     282           0 :             I_tmp2(:, :) = 0.0_dp
     283             :             CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     284             :                                       basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     285             :                                       hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     286           0 :                                       eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
     287             :          END IF
     288          26 :          DEALLOCATE (I_tmp2)
     289             : 
     290             :          CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     291             :                                    first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
     292             :                                    basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
     293             :                                    sab_nl=sab_orb_sub, eri_method=eri_method, &
     294             :                                    pabc=matrix_P_munu_local, &
     295          26 :                                    force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
     296             : 
     297          26 :          L_counter = 0
     298        1132 :          DO LLL = my_group_L_start, my_group_L_end
     299        1106 :             L_counter = L_counter + 1
     300        2432 :             DO ispin = 1, nspins
     301             :                CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
     302        2432 :                                    0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     303             :             END DO
     304             : 
     305             :             ! The matrices of G_P_ia are deallocated here
     306             :             CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
     307             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     308        1132 :                                    eps_filter)
     309             :          END DO
     310             : 
     311          72 :          DO ikind = 1, SIZE(force)
     312             :             mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
     313             :                                                  force_3c_orb_mu(ikind)%forces(:, :) + &
     314             :                                                  force_3c_orb_nu(ikind)%forces(:, :) + &
     315         334 :                                                  force_3c_aux(ikind)%forces(:, :)
     316             : 
     317          72 :             IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     318           0 :                mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
     319             :             END IF
     320             :          END DO
     321             : 
     322          26 :          CALL mp2_eri_deallocate_forces(force_2c)
     323          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     324           0 :             CALL mp2_eri_deallocate_forces(force_2c_RI)
     325             :          END IF
     326          26 :          CALL mp2_eri_deallocate_forces(force_3c_aux)
     327          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
     328          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
     329          26 :          CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
     330          26 :          CALL dbcsr_deallocate_matrix_set(mat_munu_local)
     331             : 
     332         238 :       ELSEIF (eri_method == do_eri_gpw) THEN
     333         238 :          CALL get_qs_env(qs_env, ks_env=ks_env)
     334             : 
     335         238 :          CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
     336             : 
     337             :          ! Supporting stuff for GPW
     338             :          CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     339         238 :                           auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
     340             : 
     341             :          ! in case virial is required we need auxiliary pw
     342             :          ! for calculate the MP2-volume contribution to the virial
     343             :          ! (hartree potential derivatives)
     344         238 :          IF (use_virial) THEN
     345          50 :             CALL auxbas_pw_pool%create_pw(rho_g_copy)
     346         200 :             DO i = 1, 3
     347         200 :                CALL auxbas_pw_pool%create_pw(dvg(i))
     348             :             END DO
     349             :          END IF
     350             : 
     351             :          ! start main loop over auxiliary basis functions
     352         238 :          CALL timeset(routineN//"_loop", handle2)
     353             : 
     354         238 :          IF (use_virial) h_stress = 0.0_dp
     355             : 
     356         238 :          L_counter = 0
     357       10712 :          DO LLL = my_group_L_start, my_group_L_end
     358       10474 :             L_counter = L_counter + 1
     359             : 
     360             :             CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     361             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     362       10474 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     363             : 
     364             :             CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     365             :                                                qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     366             :                                                pot_g, mp2_env%potential_parameter, use_virial, &
     367             :                                                rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
     368       10474 :                                                mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     369             : 
     370       10474 :             IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     371             : 
     372             :                CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     373             :                                                   qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     374             :                                                   pot_g, mp2_env%ri_metric, use_virial, &
     375             :                                                   rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
     376         498 :                                                   mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     377             :             END IF
     378             : 
     379       36382 :             IF (use_virial) pv_virial = virial%pv_virial
     380             :             CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
     381       10474 :                                                   task_list_sub)
     382       10474 :             IF (use_virial) THEN
     383       28067 :                h_stress = h_stress + (virial%pv_virial - pv_virial)
     384       28067 :                virial%pv_virial = pv_virial
     385             :             END IF
     386             : 
     387             :             ! The matrices of G_P_ia are deallocated here
     388             :             CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
     389             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     390       10474 :                                    eps_filter)
     391             : 
     392             :             CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
     393             :                                                   mp2_env%ri_metric, &
     394             :                                                   ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
     395             :                                                   h_stress, para_env_sub, kind_of, atom_of_kind, &
     396       10712 :                                                   qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
     397             :          END DO
     398             : 
     399         238 :          CALL timestop(handle2)
     400             : 
     401         238 :          DEALLOCATE (kind_of)
     402         238 :          DEALLOCATE (atom_of_kind)
     403             : 
     404         238 :          IF (use_virial) THEN
     405          50 :             CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
     406         200 :             DO i = 1, 3
     407         200 :                CALL auxbas_pw_pool%give_back_pw(dvg(i))
     408             :             END DO
     409             :          END IF
     410             : 
     411             :          CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     412         238 :                           task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
     413             : 
     414         238 :          CALL dbcsr_release(matrix_P_munu%matrix)
     415         238 :          DEALLOCATE (matrix_P_munu%matrix)
     416             : 
     417             :       END IF
     418             : 
     419         864 :       IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
     420             : 
     421         264 :       DEALLOCATE (G_PQ_local)
     422         264 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
     423             : 
     424         264 :       CALL dbcsr_release(matrix_P_munu_nosym)
     425             : 
     426         614 :       DO ispin = 1, nspins
     427         614 :          CALL dbcsr_release(matrix_P_inu(ispin))
     428             :       END DO
     429         264 :       DEALLOCATE (matrix_P_inu, G_P_ia)
     430             : 
     431             :       ! move the forces in the correct place
     432         264 :       IF (eri_method .EQ. do_eri_gpw) THEN
     433         704 :          DO ikind = 1, SIZE(mp2_force)
     434        6338 :             mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
     435        3640 :             force(ikind)%rho_elec(:, :) = 0.0_dp
     436             :          END DO
     437             :       END IF
     438             : 
     439             :       ! Now we move from the local matrices to the global ones
     440             :       ! defined over all MPI tasks
     441             :       ! Start with moving from the DBCSR to FM for the lagrangians
     442             : 
     443        1756 :       ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
     444         614 :       DO ispin = 1, nspins
     445             :          ! Now we move from the local matrices to the global ones
     446             :          ! defined over all MPI tasks
     447             :          ! Start with moving from the DBCSR to FM for the lagrangians
     448         350 :          NULLIFY (fm_struct_tmp)
     449             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     450         350 :                                   nrow_global=dimen, ncol_global=homo(ispin))
     451         350 :          CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
     452         350 :          CALL cp_fm_struct_release(fm_struct_tmp)
     453         350 :          CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
     454         350 :          CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
     455             : 
     456             :          ! release Lag_mu_i_1
     457         350 :          CALL dbcsr_release(Lag_mu_i_1(ispin))
     458             : 
     459         350 :          NULLIFY (fm_struct_tmp)
     460             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     461         350 :                                   nrow_global=dimen, ncol_global=virtual(ispin))
     462         350 :          CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
     463         350 :          CALL cp_fm_struct_release(fm_struct_tmp)
     464         350 :          CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
     465         350 :          CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
     466             : 
     467             :          ! release Lag_nu_a_2
     468         614 :          CALL dbcsr_release(Lag_nu_a_2(ispin))
     469             :       END DO
     470         264 :       DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
     471             : 
     472             :       ! Set the factor to multiply P_ij (depends on the open or closed shell)
     473         264 :       factor = 1.0_dp
     474         264 :       IF (alpha_beta) factor = 0.50_dp
     475             : 
     476         614 :       DO ispin = 1, nspins
     477             :          CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), dimen, para_env, &
     478             :                          para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
     479         614 :                          factor, ispin)
     480             :       END DO
     481         264 :       DEALLOCATE (L1_mu_i, L2_nu_a)
     482             : 
     483         264 :       CALL timestop(handle)
     484             : 
     485         528 :    END SUBROUTINE calc_ri_mp2_nonsep
     486             : 
     487             : ! **************************************************************************************************
     488             : !> \brief Transforms G_P_ia to G_P_munu
     489             : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
     490             : !> \param G_P_munu_nosym ...
     491             : !> \param mat_munu ...
     492             : !> \param G_P_ia ...
     493             : !> \param G_P_inu ...
     494             : !> \param mo_coeff_v ...
     495             : !> \param mo_coeff_o ...
     496             : !> \param eps_filter ...
     497             : ! **************************************************************************************************
     498       11580 :    SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
     499       11580 :                                      mo_coeff_v, mo_coeff_o, eps_filter)
     500             :       TYPE(dbcsr_type), POINTER                          :: G_P_munu
     501             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu_nosym, mat_munu
     502             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     503             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     504             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     505             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     506             : 
     507             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
     508             : 
     509             :       INTEGER                                            :: handle
     510             : 
     511       11580 :       IF (.NOT. ASSOCIATED(G_P_munu)) THEN
     512        1344 :          ALLOCATE (G_P_munu)
     513             :          CALL dbcsr_create(G_P_munu, template=mat_munu, &
     514        1344 :                            matrix_type=dbcsr_type_symmetric)
     515             :       END IF
     516             : 
     517       11580 :       CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
     518             : 
     519             :       ! symmetrize
     520       11580 :       CALL timeset(routineN//"_symmetrize", handle)
     521       11580 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     522       11580 :       CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
     523             :       CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
     524       11580 :                      alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
     525             :       ! this is a trick to avoid that integrate_v_rspace starts to cry
     526       11580 :       CALL dbcsr_copy(mat_munu, G_P_munu, keep_sparsity=.TRUE.)
     527       11580 :       CALL dbcsr_copy(G_P_munu, mat_munu)
     528             : 
     529       11580 :       CALL timestop(handle)
     530             : 
     531       11580 :    END SUBROUTINE G_P_transform_MO_to_AO
     532             : 
     533             : ! **************************************************************************************************
     534             : !> \brief ...
     535             : !> \param G_P_ia ...
     536             : !> \param G_P_inu ...
     537             : !> \param G_P_munu ...
     538             : !> \param mo_coeff_v ...
     539             : !> \param mo_coeff_o ...
     540             : !> \param eps_filter ...
     541             : ! **************************************************************************************************
     542       11580 :    SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
     543             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     544             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     545             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu
     546             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     547             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     548             : 
     549             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
     550             : 
     551             :       INTEGER                                            :: handle, ispin
     552             :       REAL(KIND=dp)                                      :: factor
     553             : 
     554       11580 :       CALL timeset(routineN, handle)
     555             : 
     556       11580 :       factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
     557             : 
     558       11580 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     559             : 
     560       27088 :       DO ispin = 1, SIZE(G_P_ia)
     561             :          ! first back-transformation a->nu
     562             :          CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
     563       15508 :                              0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
     564             : 
     565             :          ! second back-transformation i->mu
     566             :          CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
     567       27088 :                              1.0_dp, G_P_munu, filter_eps=eps_filter)
     568             :       END DO
     569             : 
     570       11580 :       CALL timestop(handle)
     571             : 
     572       11580 :    END SUBROUTINE G_P_transform_alpha_beta
     573             : 
     574             : ! **************************************************************************************************
     575             : !> \brief ...
     576             : !> \param mat_munu ...
     577             : !> \param matrix_P_inu ...
     578             : !> \param Lag_mu_i_1 ...
     579             : !> \param G_P_ia ...
     580             : !> \param mo_coeff_o ...
     581             : !> \param Lag_nu_a_2 ...
     582             : !> \param eps_filter ...
     583             : ! **************************************************************************************************
     584       11580 :    SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
     585       23160 :                                 G_P_ia, mo_coeff_o, Lag_nu_a_2, &
     586             :                                 eps_filter)
     587             :       TYPE(dbcsr_type), INTENT(IN)                       :: mat_munu
     588             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: matrix_P_inu, Lag_mu_i_1
     589             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: G_P_ia
     590             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_o
     591             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: Lag_nu_a_2
     592             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     593             : 
     594             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'update_lagrangian'
     595             : 
     596             :       INTEGER                                            :: handle, ispin
     597             : 
     598             :       ! update lagrangian
     599       11580 :       CALL timeset(routineN, handle)
     600             : 
     601       27088 :       DO ispin = 1, SIZE(G_P_ia)
     602             :          ! first contract mat_munu with the half back transformed Gamma_i_nu
     603             :          ! in order to update Lag_mu_i_1
     604             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
     605       15508 :                              1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
     606             : 
     607             :          ! transform first index of mat_munu and store the result into matrix_P_inu
     608       15508 :          CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
     609             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
     610       15508 :                              0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     611             : 
     612             :          ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
     613             :          ! in order to update Lag_nu_a_2
     614             :          CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
     615       15508 :                              1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
     616             : 
     617             :          ! release the actual gamma_P_ia
     618       15508 :          CALL dbcsr_release(G_P_ia(ispin)%matrix)
     619       27088 :          DEALLOCATE (G_P_ia(ispin)%matrix)
     620             :       END DO
     621             : 
     622       11580 :       CALL timestop(handle)
     623             : 
     624       11580 :    END SUBROUTINE update_lagrangian
     625             : 
     626             : ! **************************************************************************************************
     627             : !> \brief ...
     628             : !> \param qs_env ...
     629             : !> \param mp2_env ...
     630             : !> \param mo_coeff ...
     631             : !> \param homo ...
     632             : !> \param virtual ...
     633             : !> \param dimen ...
     634             : !> \param para_env ...
     635             : !> \param para_env_sub ...
     636             : !> \param Eigenval ...
     637             : !> \param L1_mu_i ...
     638             : !> \param L2_nu_a ...
     639             : !> \param factor ...
     640             : !> \param kspin ...
     641             : ! **************************************************************************************************
     642         350 :    SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, &
     643         350 :                          Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
     644             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     645             :       TYPE(mp2_type)                                     :: mp2_env
     646             :       TYPE(cp_fm_type), INTENT(IN)                       :: mo_coeff
     647             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen
     648             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     649             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
     650             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: L1_mu_i, L2_nu_a
     651             :       REAL(KIND=dp), INTENT(IN)                          :: factor
     652             :       INTEGER, INTENT(IN)                                :: kspin
     653             : 
     654             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'create_W_P'
     655             : 
     656             :       INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
     657             :          iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
     658             :          my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_local, ncol_local_1i, &
     659             :          ncol_local_2a, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, number_of_rec, &
     660             :          number_of_send, proc_receive, proc_receive_static, proc_send, proc_send_ex, &
     661             :          proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, rec_row_size, &
     662             :          send_col_size, send_counter, send_pcol, send_prow, send_row_size, size_rec_buffer, &
     663             :          size_send_buffer
     664         350 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size, &
     665         350 :                                                             pos_info, pos_info_ex, proc_2_send_pos
     666         350 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
     667         350 :          my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
     668         350 :          sizes_2a
     669         350 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: col_indeces_info_1i, &
     670         350 :                                                             col_indeces_info_2a, &
     671         350 :                                                             row_indeces_info_1i, &
     672         350 :                                                             row_indeces_info_2a
     673         350 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, col_indices_1i, &
     674         350 :                                                             col_indices_2a, row_indices, &
     675         350 :                                                             row_indices_1i, row_indices_2a
     676         350 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ab_rec, ab_send, mat_rec, mat_send
     677             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     678             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     679             :       TYPE(cp_fm_type)                                   :: fm_P_ij, L_mu_q
     680             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     681         350 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     682             :       TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
     683         350 :          DIMENSION(:)                                    :: buffer_cyclic
     684             :       TYPE(mp_para_env_type), POINTER                    :: para_env_exchange
     685         350 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     686             : 
     687         350 :       CALL timeset(routineN, handle)
     688             : 
     689             :       ! create the globally distributed mixed lagrangian
     690         350 :       NULLIFY (blacs_env)
     691         350 :       CALL get_qs_env(qs_env, blacs_env=blacs_env)
     692             : 
     693         350 :       NULLIFY (fm_struct_tmp)
     694             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
     695         350 :                                nrow_global=dimen, ncol_global=dimen)
     696         350 :       CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
     697         350 :       CALL cp_fm_struct_release(fm_struct_tmp)
     698         350 :       CALL cp_fm_set_all(L_mu_q, 0.0_dp)
     699             : 
     700             :       ! create all information array
     701        1050 :       ALLOCATE (pos_info(0:para_env%num_pe - 1))
     702         350 :       CALL para_env%allgather(para_env_sub%mepos, pos_info)
     703             : 
     704             :       ! get matrix information for the global
     705             :       CALL cp_fm_get_info(matrix=L_mu_q, &
     706             :                           nrow_local=nrow_local, &
     707             :                           ncol_local=ncol_local, &
     708             :                           row_indices=row_indices, &
     709         350 :                           col_indices=col_indices)
     710         350 :       myprow = L_mu_q%matrix_struct%context%mepos(1)
     711         350 :       mypcol = L_mu_q%matrix_struct%context%mepos(2)
     712         350 :       nprow = L_mu_q%matrix_struct%context%num_pe(1)
     713         350 :       npcol = L_mu_q%matrix_struct%context%num_pe(2)
     714             : 
     715        1400 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     716        1400 :       grid_2_mepos = 0
     717         350 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     718         350 :       CALL para_env%sum(grid_2_mepos)
     719             : 
     720             :       ! get matrix information for L1_mu_i
     721             :       CALL cp_fm_get_info(matrix=L1_mu_i, &
     722             :                           nrow_local=nrow_local_1i, &
     723             :                           ncol_local=ncol_local_1i, &
     724             :                           row_indices=row_indices_1i, &
     725         350 :                           col_indices=col_indices_1i)
     726             : 
     727        1050 :       ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
     728        1050 :       CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
     729             : 
     730             :       ! get matrix information for L2_nu_a
     731             :       CALL cp_fm_get_info(matrix=L2_nu_a, &
     732             :                           nrow_local=nrow_local_2a, &
     733             :                           ncol_local=ncol_local_2a, &
     734             :                           row_indices=row_indices_2a, &
     735         350 :                           col_indices=col_indices_2a)
     736             : 
     737        1050 :       ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
     738        1050 :       CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
     739             : 
     740             :       ! Here we perform a ring communication scheme taking into account
     741             :       ! for the sub-group distribution of the source matrices.
     742             :       ! as a first step we need to redistribute the data within
     743             :       ! the subgroup.
     744             :       ! In order to do so we have to allocate the structure
     745             :       ! that will hold the local data involved in communication, this
     746             :       ! structure will be the same for processes in different subgroups
     747             :       ! sharing the same position in the subgroup.
     748             :       ! -1) create the exchange para_env
     749         350 :       color_exchange = para_env_sub%mepos
     750         350 :       ALLOCATE (para_env_exchange)
     751         350 :       CALL para_env_exchange%from_split(para_env, color_exchange)
     752        1050 :       ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
     753         350 :       CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
     754        1050 :       ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
     755        1050 :       CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
     756             : 
     757             :       ! 0) store some info about indeces of the fm matrices (subgroup)
     758         350 :       CALL timeset(routineN//"_inx", handle2)
     759             :       ! matrix L1_mu_i
     760         716 :       max_row_size = MAXVAL(sizes_1i(1, :))
     761         716 :       max_col_size = MAXVAL(sizes_1i(2, :))
     762        1400 :       ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
     763        1400 :       ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
     764        1050 :       ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
     765        1050 :       ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
     766       20582 :       row_indeces_info_1i = 0
     767        4742 :       col_indeces_info_1i = 0
     768         350 :       dummy_proc = 0
     769             :       ! row
     770        6762 :       DO iiB = 1, nrow_local_1i
     771        6412 :          i_global = row_indices_1i(iiB)
     772        6412 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     773        6412 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     774        6412 :          my_row_indeces_info_1i(1, iiB) = send_prow
     775        6762 :          my_row_indeces_info_1i(2, iiB) = i_local
     776             :       END DO
     777             :       ! col
     778        1620 :       DO jjB = 1, ncol_local_1i
     779        1270 :          j_global = col_indices_1i(jjB)
     780        1270 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     781        1270 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     782        1270 :          my_col_indeces_info_1i(1, jjB) = send_pcol
     783        1620 :          my_col_indeces_info_1i(2, jjB) = j_local
     784             :       END DO
     785         350 :       CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
     786         350 :       CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
     787         350 :       DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
     788             : 
     789             :       ! matrix L2_nu_a
     790         716 :       max_row_size = MAXVAL(sizes_2a(1, :))
     791         716 :       max_col_size = MAXVAL(sizes_2a(2, :))
     792        1400 :       ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
     793        1400 :       ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
     794        1050 :       ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
     795        1050 :       ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
     796       20582 :       row_indeces_info_2a = 0
     797       17636 :       col_indeces_info_2a = 0
     798             :       ! row
     799        6762 :       DO iiB = 1, nrow_local_2a
     800        6412 :          i_global = row_indices_2a(iiB)
     801        6412 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     802        6412 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     803        6412 :          my_row_indeces_info_2a(1, iiB) = send_prow
     804        6762 :          my_row_indeces_info_2a(2, iiB) = i_local
     805             :       END DO
     806             :       ! col
     807        5682 :       DO jjB = 1, ncol_local_2a
     808        5332 :          j_global = col_indices_2a(jjB) + homo
     809        5332 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     810        5332 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     811        5332 :          my_col_indeces_info_2a(1, jjB) = send_pcol
     812        5682 :          my_col_indeces_info_2a(2, jjB) = j_local
     813             :       END DO
     814         350 :       CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
     815         350 :       CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
     816         350 :       DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
     817         350 :       CALL timestop(handle2)
     818             : 
     819             :       ! 1) define the map for sending data in the subgroup starting with L1_mu_i
     820         350 :       CALL timeset(routineN//"_subinfo", handle2)
     821        1050 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
     822         716 :       map_send_size = 0
     823        1620 :       DO jjB = 1, ncol_local_1i
     824        1270 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     825       25460 :          DO iiB = 1, nrow_local_1i
     826       23840 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     827       23840 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     828       23840 :             proc_send_sub = pos_info(proc_send)
     829       25110 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     830             :          END DO
     831             :       END DO
     832             :       ! and the same for L2_nu_a
     833        5682 :       DO jjB = 1, ncol_local_2a
     834        5332 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     835      123674 :          DO iiB = 1, nrow_local_2a
     836      117992 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     837      117992 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     838      117992 :             proc_send_sub = pos_info(proc_send)
     839      123324 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     840             :          END DO
     841             :       END DO
     842             :       ! and exchange data in order to create map_rec_size
     843        1050 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
     844         716 :       map_rec_size = 0
     845         350 :       CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
     846         350 :       CALL timestop(handle2)
     847             : 
     848             :       ! 2) reorder data in sending buffer
     849         350 :       CALL timeset(routineN//"_sub_Bsend", handle2)
     850             :       ! count the number of messages (include myself)
     851         350 :       number_of_send = 0
     852         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     853         366 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     854         716 :          IF (map_send_size(proc_send) > 0) THEN
     855         350 :             number_of_send = number_of_send + 1
     856             :          END IF
     857             :       END DO
     858             :       ! allocate the structure that will hold the messages to be sent
     859        1400 :       ALLOCATE (buffer_send(number_of_send))
     860         350 :       send_counter = 0
     861        1050 :       ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
     862         716 :       proc_2_send_pos = 0
     863         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     864         366 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     865         366 :          size_send_buffer = map_send_size(proc_send)
     866         716 :          IF (map_send_size(proc_send) > 0) THEN
     867         350 :             send_counter = send_counter + 1
     868             :             ! allocate the sending buffer (msg)
     869        1050 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     870      142182 :             buffer_send(send_counter)%msg = 0.0_dp
     871         350 :             buffer_send(send_counter)%proc = proc_send
     872         350 :             proc_2_send_pos(proc_send) = send_counter
     873             :          END IF
     874             :       END DO
     875             :       ! loop over the locally held data and fill the buffer_send
     876             :       ! for doing that we need an array that keep track if the
     877             :       ! sequential increase of the index for each message
     878        1050 :       ALLOCATE (iii_vet(number_of_send))
     879         700 :       iii_vet = 0
     880        1620 :       DO jjB = 1, ncol_local_1i
     881        1270 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     882       25460 :          DO iiB = 1, nrow_local_1i
     883       23840 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     884       23840 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     885       23840 :             proc_send_sub = pos_info(proc_send)
     886       23840 :             send_counter = proc_2_send_pos(proc_send_sub)
     887       23840 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     888       23840 :             iii = iii_vet(send_counter)
     889       25110 :             buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
     890             :          END DO
     891             :       END DO
     892             :       ! release the local data of L1_mu_i
     893         350 :       DEALLOCATE (L1_mu_i%local_data)
     894             :       ! and the same for L2_nu_a
     895        5682 :       DO jjB = 1, ncol_local_2a
     896        5332 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     897      123674 :          DO iiB = 1, nrow_local_2a
     898      117992 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     899      117992 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     900      117992 :             proc_send_sub = pos_info(proc_send)
     901      117992 :             send_counter = proc_2_send_pos(proc_send_sub)
     902      117992 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     903      117992 :             iii = iii_vet(send_counter)
     904      123324 :             buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
     905             :          END DO
     906             :       END DO
     907         350 :       DEALLOCATE (L2_nu_a%local_data)
     908         350 :       DEALLOCATE (proc_2_send_pos)
     909         350 :       DEALLOCATE (iii_vet)
     910         350 :       CALL timestop(handle2)
     911             : 
     912             :       ! 3) create the buffer for receive, post the message with irecv
     913             :       !    and send the messages non-blocking
     914         350 :       CALL timeset(routineN//"_sub_isendrecv", handle2)
     915             :       ! count the number of messages to be received
     916         350 :       number_of_rec = 0
     917         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     918         366 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     919         716 :          IF (map_rec_size(proc_receive) > 0) THEN
     920         350 :             number_of_rec = number_of_rec + 1
     921             :          END IF
     922             :       END DO
     923        1400 :       ALLOCATE (buffer_rec(number_of_rec))
     924         350 :       rec_counter = 0
     925         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     926         366 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     927         366 :          size_rec_buffer = map_rec_size(proc_receive)
     928         716 :          IF (map_rec_size(proc_receive) > 0) THEN
     929         350 :             rec_counter = rec_counter + 1
     930             :             ! prepare the buffer for receive
     931        1050 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     932      142182 :             buffer_rec(rec_counter)%msg = 0.0_dp
     933         350 :             buffer_rec(rec_counter)%proc = proc_receive
     934             :             ! post the message to be received (not need to send to myself)
     935         350 :             IF (proc_receive /= para_env_sub%mepos) THEN
     936             :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     937           0 :                                        buffer_rec(rec_counter)%msg_req)
     938             :             END IF
     939             :          END IF
     940             :       END DO
     941             :       ! send messages
     942        1400 :       ALLOCATE (req_send(number_of_send))
     943         700 :       req_send = mp_request_null
     944         350 :       send_counter = 0
     945         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     946         366 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     947         716 :          IF (map_send_size(proc_send) > 0) THEN
     948         350 :             send_counter = send_counter + 1
     949         350 :             IF (proc_send == para_env_sub%mepos) THEN
     950      142182 :                buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
     951             :             ELSE
     952             :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
     953           0 :                                        buffer_send(send_counter)%msg_req)
     954           0 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
     955             :             END IF
     956             :          END IF
     957             :       END DO
     958         350 :       DEALLOCATE (map_send_size)
     959         350 :       CALL timestop(handle2)
     960             : 
     961             :       ! 4) (if memory is a problem we should move this part after point 5)
     962             :       !    Here we create the new buffer for cyclic(ring) communication and
     963             :       !    we fill it with the data received from the other member of the
     964             :       !    subgroup
     965         350 :       CALL timeset(routineN//"_Bcyclic", handle2)
     966             :       ! first allocata new structure
     967        1734 :       ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
     968        1034 :       DO iproc = 0, para_env_exchange%num_pe - 1
     969         684 :          rec_row_size = sizes(1, iproc)
     970         684 :          rec_col_size = sizes(2, iproc)
     971        2736 :          ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
     972      155690 :          buffer_cyclic(iproc)%msg = 0.0_dp
     973             :       END DO
     974             :       ! now collect data from other member of the subgroup and fill
     975             :       ! buffer_cyclic
     976         350 :       rec_counter = 0
     977         716 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     978         366 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     979         366 :          size_rec_buffer = map_rec_size(proc_receive)
     980         716 :          IF (map_rec_size(proc_receive) > 0) THEN
     981         350 :             rec_counter = rec_counter + 1
     982             : 
     983             :             ! wait for the message
     984         350 :             IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
     985             : 
     986         350 :             CALL timeset(routineN//"_fill", handle3)
     987         350 :             iii = 0
     988        1620 :             DO jjB = 1, sizes_1i(2, proc_receive)
     989        1270 :                send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
     990        1270 :                j_local = col_indeces_info_1i(2, jjB, proc_receive)
     991       25460 :                DO iiB = 1, sizes_1i(1, proc_receive)
     992       23840 :                   send_prow = row_indeces_info_1i(1, iiB, proc_receive)
     993       23840 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
     994       23840 :                   proc_send_sub = pos_info(proc_send)
     995       23840 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
     996       23840 :                   iii = iii + 1
     997       23840 :                   i_local = row_indeces_info_1i(2, iiB, proc_receive)
     998       23840 :                   proc_send_ex = pos_info_ex(proc_send)
     999       25110 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1000             :                END DO
    1001             :             END DO
    1002             :             ! and the same for L2_nu_a
    1003        5682 :             DO jjB = 1, sizes_2a(2, proc_receive)
    1004        5332 :                send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
    1005        5332 :                j_local = col_indeces_info_2a(2, jjB, proc_receive)
    1006      123674 :                DO iiB = 1, sizes_2a(1, proc_receive)
    1007      117992 :                   send_prow = row_indeces_info_2a(1, iiB, proc_receive)
    1008      117992 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
    1009      117992 :                   proc_send_sub = pos_info(proc_send)
    1010      117992 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1011      117992 :                   iii = iii + 1
    1012      117992 :                   i_local = row_indeces_info_2a(2, iiB, proc_receive)
    1013      117992 :                   proc_send_ex = pos_info_ex(proc_send)
    1014      123324 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1015             :                END DO
    1016             :             END DO
    1017         350 :             CALL timestop(handle3)
    1018             : 
    1019             :             ! deallocate the received message
    1020         350 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1021             :          END IF
    1022             :       END DO
    1023         350 :       DEALLOCATE (row_indeces_info_1i)
    1024         350 :       DEALLOCATE (col_indeces_info_1i)
    1025         350 :       DEALLOCATE (row_indeces_info_2a)
    1026         350 :       DEALLOCATE (col_indeces_info_2a)
    1027         700 :       DEALLOCATE (buffer_rec)
    1028         350 :       DEALLOCATE (map_rec_size)
    1029         350 :       CALL timestop(handle2)
    1030             : 
    1031             :       ! 5)  Wait for all messeges to be sent in the subgroup
    1032         350 :       CALL timeset(routineN//"_sub_waitall", handle2)
    1033         350 :       CALL mp_waitall(req_send(:))
    1034         700 :       DO send_counter = 1, number_of_send
    1035         700 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1036             :       END DO
    1037         700 :       DEALLOCATE (buffer_send)
    1038         350 :       DEALLOCATE (req_send)
    1039         350 :       CALL timestop(handle2)
    1040             : 
    1041             :       ! 6) Start with ring communication
    1042         350 :       CALL timeset(routineN//"_ring", handle2)
    1043         350 :       proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
    1044         350 :       proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
    1045        1034 :       max_row_size = MAXVAL(sizes(1, :))
    1046        1034 :       max_col_size = MAXVAL(sizes(2, :))
    1047        1400 :       ALLOCATE (mat_send(max_row_size, max_col_size))
    1048        1050 :       ALLOCATE (mat_rec(max_row_size, max_col_size))
    1049       85950 :       mat_send = 0.0_dp
    1050       80131 :       mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
    1051         350 :       DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
    1052         684 :       DO proc_shift = 1, para_env_exchange%num_pe - 1
    1053         334 :          proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
    1054             : 
    1055         334 :          rec_row_size = sizes(1, proc_receive)
    1056         334 :          rec_col_size = sizes(2, proc_receive)
    1057             : 
    1058       81028 :          mat_rec = 0.0_dp
    1059             :          CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1060         334 :                                          mat_rec, proc_receive_static)
    1061             : 
    1062       81028 :          mat_send = 0.0_dp
    1063             :          mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
    1064       75209 :                                                     buffer_cyclic(proc_receive)%msg(:, :)
    1065             : 
    1066         684 :          DEALLOCATE (buffer_cyclic(proc_receive)%msg)
    1067             :       END DO
    1068             :       ! and finally
    1069             :       CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1070         350 :                                       mat_rec, proc_receive_static)
    1071       80131 :       L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
    1072        1384 :       DEALLOCATE (buffer_cyclic)
    1073         350 :       DEALLOCATE (mat_send)
    1074         350 :       DEALLOCATE (mat_rec)
    1075         350 :       CALL timestop(handle2)
    1076             : 
    1077             :       ! release para_env_exchange
    1078         350 :       CALL mp_para_env_release(para_env_exchange)
    1079             : 
    1080         350 :       CALL cp_fm_release(L1_mu_i)
    1081         350 :       CALL cp_fm_release(L2_nu_a)
    1082         350 :       DEALLOCATE (pos_info_ex)
    1083         350 :       DEALLOCATE (grid_2_mepos)
    1084         350 :       DEALLOCATE (sizes)
    1085         350 :       DEALLOCATE (sizes_1i)
    1086         350 :       DEALLOCATE (sizes_2a)
    1087             : 
    1088             :       ! update the P_ij block of P_MP2 with the
    1089             :       ! non-singular ij pairs
    1090         350 :       CALL timeset(routineN//"_Pij", handle2)
    1091         350 :       NULLIFY (fm_struct_tmp)
    1092             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1093         350 :                                nrow_global=homo, ncol_global=homo)
    1094         350 :       CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
    1095         350 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1096         350 :       CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
    1097             : 
    1098             :       ! we have it, update P_ij local
    1099             :       CALL cp_fm_get_info(matrix=fm_P_ij, &
    1100             :                           nrow_local=nrow_local, &
    1101             :                           ncol_local=ncol_local, &
    1102             :                           row_indices=row_indices, &
    1103         350 :                           col_indices=col_indices)
    1104             : 
    1105         350 :       IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
    1106             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, 1.0_dp, &
    1107             :                             mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
    1108             :                             a_first_col=1, &
    1109             :                             a_first_row=1, &
    1110             :                             b_first_col=1, &
    1111             :                             b_first_row=1, &
    1112             :                             c_first_col=1, &
    1113         326 :                             c_first_row=1)
    1114             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp, &
    1115             :                             L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
    1116             :                             a_first_col=1, &
    1117             :                             a_first_row=1, &
    1118             :                             b_first_col=1, &
    1119             :                             b_first_row=1, &
    1120             :                             c_first_col=1, &
    1121         326 :                             c_first_row=1)
    1122             : 
    1123        1504 :          DO jjB = 1, ncol_local
    1124        1178 :             j_global = col_indices(jjB)
    1125        3761 :             DO iiB = 1, nrow_local
    1126        2257 :                i_global = row_indices(iiB)
    1127             :                ! diagonal elements and nearly degenerate ij pairs already updated
    1128        3435 :                IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1129         671 :                   fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1130             :                ELSE
    1131             :                   fm_P_ij%local_data(iiB, jjB) = &
    1132        1586 :                      factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
    1133             :                END IF
    1134             :             END DO
    1135             :          END DO
    1136             :       ELSE
    1137         116 :          DO jjB = 1, ncol_local
    1138          92 :             j_global = col_indices(jjB)
    1139         294 :             DO iiB = 1, nrow_local
    1140         178 :                i_global = row_indices(iiB)
    1141         270 :                fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1142             :             END DO
    1143             :          END DO
    1144             :       END IF
    1145             :       ! deallocate the local P_ij
    1146         350 :       DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
    1147         350 :       CALL timestop(handle2)
    1148             : 
    1149             :       ! Now create and fill the P matrix (MO)
    1150             :       ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
    1151         350 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
    1152        1142 :          ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1153             :       END IF
    1154             : 
    1155         350 :       CALL timeset(routineN//"_PMO", handle2)
    1156         350 :       NULLIFY (fm_struct_tmp)
    1157             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1158         350 :                                nrow_global=dimen, ncol_global=dimen)
    1159         350 :       CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
    1160         350 :       CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
    1161             : 
    1162             :       ! start with the (easy) occ-occ block and locally held P_ab elements
    1163         350 :       itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
    1164         350 :       my_B_virtual_start = itmp(1)
    1165         350 :       my_B_virtual_end = itmp(2)
    1166             : 
    1167             :       ! Fill occ-occ block
    1168         350 :       CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
    1169         350 :       CALL cp_fm_release(fm_P_ij)
    1170             : 
    1171             :       CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
    1172             :                           nrow_local=nrow_local, &
    1173             :                           ncol_local=ncol_local, &
    1174             :                           row_indices=row_indices, &
    1175         350 :                           col_indices=col_indices)
    1176             : 
    1177         350 :       IF (mp2_env%method == ri_mp2_laplace) THEN
    1178             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, &
    1179             :                             mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1180             :                             a_first_col=homo + 1, &
    1181             :                             a_first_row=1, &
    1182             :                             b_first_col=homo + 1, &
    1183             :                             b_first_row=1, &
    1184             :                             c_first_col=homo + 1, &
    1185          24 :                             c_first_row=homo + 1)
    1186             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, -2.0_dp, &
    1187             :                             L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1188             :                             a_first_col=homo + 1, &
    1189             :                             a_first_row=1, &
    1190             :                             b_first_col=homo + 1, &
    1191             :                             b_first_row=1, &
    1192             :                             c_first_col=homo + 1, &
    1193          24 :                             c_first_row=homo + 1)
    1194             :       END IF
    1195             : 
    1196         350 :       IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
    1197             :          ! With MP2 and RPA, we have already calculated the density matrix elements
    1198        6336 :          DO jjB = 1, ncol_local
    1199        6010 :             j_global = col_indices(jjB)
    1200        6010 :             IF (j_global <= homo) CYCLE
    1201       59745 :             DO iiB = 1, nrow_local
    1202       54587 :                i_global = row_indices(iiB)
    1203       60597 :                IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1204             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1205       45209 :                      mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1206             :                END IF
    1207             :             END DO
    1208             :          END DO
    1209          24 :       ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
    1210             :          ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
    1211         616 :          DO jjB = 1, ncol_local
    1212         592 :             j_global = col_indices(jjB)
    1213         592 :             IF (j_global <= homo) CYCLE
    1214        6764 :             DO iiB = 1, nrow_local
    1215        6240 :                i_global = row_indices(iiB)
    1216        6832 :                IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1217         565 :                   IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1218             :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1219         552 :                         mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1220             :                   ELSE
    1221          13 :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
    1222             :                   END IF
    1223             :                ELSE
    1224             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1225             :                      factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
    1226        5675 :                      (Eigenval(i_global) - Eigenval(j_global))
    1227             :                END IF
    1228             :             END DO
    1229             :          END DO
    1230             :       ELSE
    1231           0 :          CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
    1232             :       END IF
    1233             : 
    1234             :       ! send around the sub_group the local data and check if we
    1235             :       ! have to update our block with external elements
    1236        1050 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1237        1050 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1238             : 
    1239        1050 :       ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
    1240        1050 :       CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
    1241             : 
    1242        1400 :       ALLOCATE (ab_rec(nrow_local, ncol_local))
    1243         366 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1244          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1245          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1246             : 
    1247          16 :          send_prow = mepos_2_grid(1, proc_send)
    1248          16 :          send_pcol = mepos_2_grid(2, proc_send)
    1249             : 
    1250          16 :          send_row_size = sizes(1, proc_send)
    1251          16 :          send_col_size = sizes(2, proc_send)
    1252             : 
    1253          64 :          ALLOCATE (ab_send(send_row_size, send_col_size))
    1254        4922 :          ab_send = 0.0_dp
    1255             : 
    1256             :          ! first loop over row since in this way we can cycle
    1257         206 :          DO iiB = 1, send_row_size
    1258         190 :             i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow)
    1259         190 :             IF (i_global <= homo) CYCLE
    1260         154 :             i_global = i_global - homo
    1261         154 :             IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
    1262         655 :             DO jjB = 1, send_col_size
    1263         614 :                j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol)
    1264         614 :                IF (j_global <= homo) CYCLE
    1265         487 :                j_global = j_global - homo
    1266         804 :                ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
    1267             :             END DO
    1268             :          END DO
    1269             : 
    1270        4922 :          ab_rec = 0.0_dp
    1271             :          CALL para_env_sub%sendrecv(ab_send, proc_send, &
    1272          16 :                                     ab_rec, proc_receive)
    1273             :          mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
    1274             :             mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
    1275        4922 :             ab_rec(1:nrow_local, 1:ncol_local)
    1276             : 
    1277         366 :          DEALLOCATE (ab_send)
    1278             :       END DO
    1279         350 :       DEALLOCATE (ab_rec)
    1280         350 :       DEALLOCATE (mepos_2_grid)
    1281         350 :       DEALLOCATE (sizes)
    1282             : 
    1283             :       ! deallocate the local P_ab
    1284         350 :       DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
    1285         350 :       CALL timestop(handle2)
    1286             : 
    1287             :       ! create also W_MP2_MO
    1288         350 :       CALL timeset(routineN//"_WMO", handle2)
    1289         350 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
    1290        1142 :          ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1291             :       END IF
    1292             : 
    1293         350 :       CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
    1294         350 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1295             : 
    1296             :       ! all block
    1297             :       CALL parallel_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, &
    1298             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1299             :                          a_first_col=1, &
    1300             :                          a_first_row=1, &
    1301             :                          b_first_col=1, &
    1302             :                          b_first_row=1, &
    1303             :                          c_first_col=1, &
    1304         350 :                          c_first_row=1)
    1305             : 
    1306             :       ! occ-occ block
    1307             :       CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, &
    1308             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1309             :                          a_first_col=1, &
    1310             :                          a_first_row=1, &
    1311             :                          b_first_col=1, &
    1312             :                          b_first_row=1, &
    1313             :                          c_first_col=1, &
    1314         350 :                          c_first_row=1)
    1315             : 
    1316             :       ! occ-virt block
    1317             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1318             :                          mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1319             :                          a_first_col=1, &
    1320             :                          a_first_row=1, &
    1321             :                          b_first_col=homo + 1, &
    1322             :                          b_first_row=1, &
    1323             :                          c_first_col=homo + 1, &
    1324         350 :                          c_first_row=1)
    1325         350 :       CALL timestop(handle2)
    1326             : 
    1327             :       ! Calculate occ-virt block of the lagrangian in MO
    1328         350 :       CALL timeset(routineN//"_Ljb", handle2)
    1329         350 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
    1330        1142 :          ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1331             :       END IF
    1332             : 
    1333             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1334         350 :                                nrow_global=homo, ncol_global=virtual)
    1335         350 :       CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb")
    1336         350 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1337             : 
    1338             :       ! first Virtual
    1339             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1340             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1341             :                          a_first_col=1, &
    1342             :                          a_first_row=1, &
    1343             :                          b_first_col=homo + 1, &
    1344             :                          b_first_row=1, &
    1345             :                          c_first_col=1, &
    1346         350 :                          c_first_row=1)
    1347             :       ! then occupied
    1348             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1349             :                          mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1350             :                          a_first_col=1, &
    1351             :                          a_first_row=1, &
    1352             :                          b_first_col=homo + 1, &
    1353             :                          b_first_row=1, &
    1354             :                          c_first_col=1, &
    1355         350 :                          c_first_row=1)
    1356             : 
    1357             :       ! finally release L_mu_q
    1358         350 :       CALL cp_fm_release(L_mu_q)
    1359         350 :       CALL timestop(handle2)
    1360             : 
    1361             :       ! here we should be done next CPHF
    1362             : 
    1363         350 :       DEALLOCATE (pos_info)
    1364             : 
    1365         350 :       CALL timestop(handle)
    1366             : 
    1367        7000 :    END SUBROUTINE create_W_P
    1368             : 
    1369             : END MODULE mp2_ri_grad

Generated by: LCOV version 1.15