LCOV - code coverage report
Current view: top level - src - mp2_ri_grad.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b8e0b09) Lines: 607 615 98.7 %
Date: 2024-08-31 06:31:37 Functions: 5 5 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
      10             : !> \par History
      11             : !>      10.2013 created [Mauro Del Ben]
      12             : ! **************************************************************************************************
      13             : MODULE mp2_ri_grad
      14             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      15             :                                               get_atomic_kind_set
      16             :    USE cell_types,                      ONLY: cell_type
      17             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      18             :    USE cp_control_types,                ONLY: dft_control_type
      19             :    USE cp_dbcsr_api,                    ONLY: &
      20             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
      21             :         dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
      22             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      23             :                                               dbcsr_deallocate_matrix_set
      24             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param
      25             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      26             :                                               cp_fm_struct_release,&
      27             :                                               cp_fm_struct_type
      28             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      29             :                                               cp_fm_get_info,&
      30             :                                               cp_fm_release,&
      31             :                                               cp_fm_set_all,&
      32             :                                               cp_fm_to_fm_submat,&
      33             :                                               cp_fm_type
      34             :    USE input_constants,                 ONLY: do_eri_gpw,&
      35             :                                               do_eri_mme,&
      36             :                                               ri_mp2_laplace,&
      37             :                                               ri_mp2_method_gpw,&
      38             :                                               ri_rpa_method_gpw
      39             :    USE kinds,                           ONLY: dp
      40             :    USE libint_2c_3c,                    ONLY: compare_potential_types
      41             :    USE message_passing,                 ONLY: mp_para_env_release,&
      42             :                                               mp_para_env_type,&
      43             :                                               mp_request_null,&
      44             :                                               mp_request_type,&
      45             :                                               mp_waitall
      46             :    USE mp2_eri,                         ONLY: mp2_eri_2c_integrate,&
      47             :                                               mp2_eri_3c_integrate,&
      48             :                                               mp2_eri_deallocate_forces,&
      49             :                                               mp2_eri_force
      50             :    USE mp2_eri_gpw,                     ONLY: cleanup_gpw,&
      51             :                                               integrate_potential_forces_2c,&
      52             :                                               integrate_potential_forces_3c_1c,&
      53             :                                               integrate_potential_forces_3c_2c,&
      54             :                                               prepare_gpw
      55             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      56             :                                               integ_mat_buffer_type_2D,&
      57             :                                               mp2_type
      58             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      59             :    USE particle_types,                  ONLY: particle_type
      60             :    USE pw_env_types,                    ONLY: pw_env_type
      61             :    USE pw_poisson_types,                ONLY: pw_poisson_type
      62             :    USE pw_pool_types,                   ONLY: pw_pool_type
      63             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      64             :                                               pw_r3d_rs_type
      65             :    USE qs_environment_types,            ONLY: get_qs_env,&
      66             :                                               qs_environment_type
      67             :    USE qs_force_types,                  ONLY: allocate_qs_force,&
      68             :                                               qs_force_type,&
      69             :                                               zero_qs_force
      70             :    USE qs_kind_types,                   ONLY: qs_kind_type
      71             :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      72             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      73             :    USE task_list_types,                 ONLY: task_list_type
      74             :    USE util,                            ONLY: get_limit
      75             :    USE virial_types,                    ONLY: virial_type
      76             : #include "./base/base_uses.f90"
      77             : 
      78             :    IMPLICIT NONE
      79             : 
      80             :    PRIVATE
      81             : 
      82             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
      83             : 
      84             :    PUBLIC :: calc_ri_mp2_nonsep
      85             : 
      86             : CONTAINS
      87             : 
      88             : ! **************************************************************************************************
      89             : !> \brief Calculate the non-separable part of the gradients and update the
      90             : !>        Lagrangian
      91             : !> \param qs_env ...
      92             : !> \param mp2_env ...
      93             : !> \param para_env ...
      94             : !> \param para_env_sub ...
      95             : !> \param cell ...
      96             : !> \param particle_set ...
      97             : !> \param atomic_kind_set ...
      98             : !> \param qs_kind_set ...
      99             : !> \param mo_coeff ...
     100             : !> \param nmo ...
     101             : !> \param homo ...
     102             : !> \param dimen_RI ...
     103             : !> \param Eigenval ...
     104             : !> \param my_group_L_start ...
     105             : !> \param my_group_L_end ...
     106             : !> \param my_group_L_size ...
     107             : !> \param sab_orb_sub ...
     108             : !> \param mat_munu ...
     109             : !> \param blacs_env_sub ...
     110             : !> \author Mauro Del Ben
     111             : ! **************************************************************************************************
     112         260 :    SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
     113         260 :                                  atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, &
     114             :                                  my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
     115             :                                  blacs_env_sub)
     116             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     117             :       TYPE(mp2_type)                                     :: mp2_env
     118             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     119             :       TYPE(cell_type), POINTER                           :: cell
     120             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     121             :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     122             :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     123             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
     124             :       INTEGER, INTENT(IN)                                :: nmo
     125             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: homo
     126             :       INTEGER, INTENT(IN)                                :: dimen_RI
     127             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: Eigenval
     128             :       INTEGER, INTENT(IN)                                :: my_group_L_start, my_group_L_end, &
     129             :                                                             my_group_L_size
     130             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     131             :          POINTER                                         :: sab_orb_sub
     132             :       TYPE(dbcsr_p_type), INTENT(INOUT)                  :: mat_munu
     133             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env_sub
     134             : 
     135             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
     136             : 
     137             :       INTEGER                                            :: dimen, eri_method, handle, handle2, i, &
     138             :                                                             ikind, ispin, itmp(2), L_counter, LLL, &
     139             :                                                             my_P_end, my_P_size, my_P_start, nspins
     140         260 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, kind_of, natom_of_kind, &
     141         260 :                                                             virtual
     142             :       LOGICAL                                            :: alpha_beta, use_virial
     143             :       REAL(KIND=dp)                                      :: cutoff_old, eps_filter, factor, &
     144             :                                                             factor_2c, relative_cutoff_old
     145         260 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: e_cutoff_old
     146         260 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: G_PQ_local, G_PQ_local_2
     147             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: h_stress, pv_virial
     148         260 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: I_tmp2
     149             :       TYPE(cp_eri_mme_param), POINTER                    :: eri_param
     150             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     151         260 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: L1_mu_i, L2_nu_a
     152             :       TYPE(dbcsr_p_type)                                 :: matrix_P_munu
     153             :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:)      :: mo_coeff_o, mo_coeff_v
     154         260 :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :)   :: G_P_ia
     155         260 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_munu_local, matrix_P_munu_local
     156             :       TYPE(dbcsr_type)                                   :: matrix_P_munu_nosym
     157         260 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
     158             :       TYPE(dft_control_type), POINTER                    :: dft_control
     159         260 :       TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:)     :: force_2c, force_2c_RI, force_3c_aux, &
     160         260 :                                                             force_3c_orb_mu, force_3c_orb_nu
     161        1040 :       TYPE(pw_c1d_gs_type)                               :: dvg(3), pot_g, rho_g, rho_g_copy
     162             :       TYPE(pw_env_type), POINTER                         :: pw_env_sub
     163             :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
     164             :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
     165             :       TYPE(pw_r3d_rs_type)                               :: psi_L, rho_r
     166         260 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force, mp2_force
     167             :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     168             :       TYPE(task_list_type), POINTER                      :: task_list_sub
     169             :       TYPE(virial_type), POINTER                         :: virial
     170             : 
     171         260 :       CALL timeset(routineN, handle)
     172             : 
     173         260 :       eri_method = mp2_env%eri_method
     174         260 :       eri_param => mp2_env%eri_mme_param
     175             : 
     176             :       ! Find out whether we have a closed or open shell
     177         260 :       nspins = SIZE(homo)
     178         260 :       alpha_beta = (nspins == 2)
     179             : 
     180         260 :       dimen = nmo
     181         780 :       ALLOCATE (virtual(nspins))
     182         604 :       virtual(:) = dimen - homo(:)
     183         260 :       eps_filter = mp2_env%mp2_gpw%eps_filter
     184       29139 :       ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
     185         604 :       DO ispin = 1, nspins
     186         344 :          mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
     187         344 :          mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
     188       15835 :          DO LLL = 1, my_group_L_size
     189       15575 :             G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
     190             :          END DO
     191             :       END DO
     192         260 :       DEALLOCATE (mp2_env%ri_grad%G_P_ia)
     193             : 
     194         260 :       itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
     195         260 :       my_P_start = itmp(1)
     196         260 :       my_P_end = itmp(2)
     197         260 :       my_P_size = itmp(2) - itmp(1) + 1
     198             : 
     199        1040 :       ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
     200      981164 :       G_PQ_local = 0.0_dp
     201      927730 :       G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
     202         260 :       DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
     203      927730 :       G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
     204         260 :       CALL para_env_sub%sum(G_PQ_local)
     205         260 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     206          36 :          ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
     207       41844 :          G_PQ_local_2 = 0.0_dp
     208       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
     209          12 :          DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
     210       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
     211          12 :          CALL para_env_sub%sum(G_PQ_local_2)
     212             :       END IF
     213             : 
     214             :       ! create matrix holding the back transformation (G_P_inu)
     215        1124 :       ALLOCATE (matrix_P_inu(nspins))
     216         604 :       DO ispin = 1, nspins
     217         604 :          CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
     218             :       END DO
     219             : 
     220             :       ! non symmetric matrix
     221             :       CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
     222         260 :                         matrix_type=dbcsr_type_no_symmetry)
     223             : 
     224             :       ! create Lagrangian matrices in mixed AO/MO formalism
     225         864 :       ALLOCATE (Lag_mu_i_1(nspins))
     226         604 :       DO ispin = 1, nspins
     227         344 :          CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
     228         604 :          CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
     229             :       END DO
     230             : 
     231         864 :       ALLOCATE (Lag_nu_a_2(nspins))
     232         604 :       DO ispin = 1, nspins
     233         344 :          CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
     234         604 :          CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
     235             :       END DO
     236             : 
     237             :       ! get forces
     238         260 :       NULLIFY (force, virial)
     239         260 :       CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
     240             : 
     241             :       ! check if we want to calculate the virial
     242         260 :       use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
     243             : 
     244         260 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
     245         260 :       NULLIFY (mp2_force)
     246         260 :       CALL allocate_qs_force(mp2_force, natom_of_kind)
     247         260 :       DEALLOCATE (natom_of_kind)
     248         260 :       CALL zero_qs_force(mp2_force)
     249         260 :       mp2_env%ri_grad%mp2_force => mp2_force
     250             : 
     251         260 :       factor_2c = -4.0_dp
     252         260 :       IF (mp2_env%method == ri_rpa_method_gpw) THEN
     253          20 :          factor_2c = -1.0_dp
     254          20 :          IF (alpha_beta) factor_2c = -2.0_dp
     255             :       END IF
     256             : 
     257             :       ! prepare integral derivatives with mme method
     258         260 :       IF (eri_method .EQ. do_eri_mme) THEN
     259        1184 :          ALLOCATE (matrix_P_munu_local(my_group_L_size))
     260        1158 :          ALLOCATE (mat_munu_local(my_group_L_size))
     261          26 :          L_counter = 0
     262        1132 :          DO LLL = my_group_L_start, my_group_L_end
     263        1106 :             L_counter = L_counter + 1
     264        1106 :             ALLOCATE (mat_munu_local(L_counter)%matrix)
     265             :             CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
     266        1106 :                               matrix_type=dbcsr_type_symmetric)
     267        1106 :             CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
     268        1106 :             CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
     269             : 
     270             :             CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     271             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     272        1132 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     273             :          END DO
     274             : 
     275          78 :          ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
     276       95790 :          I_tmp2(:, :) = 0.0_dp
     277             :          CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
     278             :                                    basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     279             :                                    hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     280          26 :                                    eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
     281          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     282           0 :             I_tmp2(:, :) = 0.0_dp
     283             :             CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     284             :                                       basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     285             :                                       hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     286           0 :                                       eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
     287             :          END IF
     288          26 :          DEALLOCATE (I_tmp2)
     289             : 
     290             :          CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     291             :                                    first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
     292             :                                    basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
     293             :                                    sab_nl=sab_orb_sub, eri_method=eri_method, &
     294             :                                    pabc=matrix_P_munu_local, &
     295          26 :                                    force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
     296             : 
     297          26 :          L_counter = 0
     298        1132 :          DO LLL = my_group_L_start, my_group_L_end
     299        1106 :             L_counter = L_counter + 1
     300        2432 :             DO ispin = 1, nspins
     301             :                CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
     302        2432 :                                    0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     303             :             END DO
     304             : 
     305             :             ! The matrices of G_P_ia are deallocated here
     306             :             CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
     307             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     308        1132 :                                    eps_filter)
     309             :          END DO
     310             : 
     311          72 :          DO ikind = 1, SIZE(force)
     312             :             mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
     313             :                                                  force_3c_orb_mu(ikind)%forces(:, :) + &
     314             :                                                  force_3c_orb_nu(ikind)%forces(:, :) + &
     315         334 :                                                  force_3c_aux(ikind)%forces(:, :)
     316             : 
     317          72 :             IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     318           0 :                mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
     319             :             END IF
     320             :          END DO
     321             : 
     322          26 :          CALL mp2_eri_deallocate_forces(force_2c)
     323          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     324           0 :             CALL mp2_eri_deallocate_forces(force_2c_RI)
     325             :          END IF
     326          26 :          CALL mp2_eri_deallocate_forces(force_3c_aux)
     327          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
     328          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
     329          26 :          CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
     330          26 :          CALL dbcsr_deallocate_matrix_set(mat_munu_local)
     331             : 
     332         234 :       ELSEIF (eri_method == do_eri_gpw) THEN
     333         234 :          CALL get_qs_env(qs_env, ks_env=ks_env)
     334             : 
     335         234 :          CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
     336             : 
     337             :          ! Supporting stuff for GPW
     338             :          CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     339         234 :                           auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
     340             : 
     341             :          ! in case virial is required we need auxiliary pw
     342             :          ! for calculate the MP2-volume contribution to the virial
     343             :          ! (hartree potential derivatives)
     344         234 :          IF (use_virial) THEN
     345          46 :             CALL auxbas_pw_pool%create_pw(rho_g_copy)
     346         184 :             DO i = 1, 3
     347         184 :                CALL auxbas_pw_pool%create_pw(dvg(i))
     348             :             END DO
     349             :          END IF
     350             : 
     351             :          ! start main loop over auxiliary basis functions
     352         234 :          CALL timeset(routineN//"_loop", handle2)
     353             : 
     354         234 :          IF (use_virial) h_stress = 0.0_dp
     355             : 
     356         234 :          L_counter = 0
     357       10528 :          DO LLL = my_group_L_start, my_group_L_end
     358       10294 :             L_counter = L_counter + 1
     359             : 
     360             :             CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     361             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     362       10294 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     363             : 
     364             :             CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     365             :                                                qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     366             :                                                pot_g, mp2_env%potential_parameter, use_virial, &
     367             :                                                rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
     368       10294 :                                                mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     369             : 
     370       10294 :             IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     371             : 
     372             :                CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     373             :                                                   qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     374             :                                                   pot_g, mp2_env%ri_metric, use_virial, &
     375             :                                                   rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
     376         498 :                                                   mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     377             :             END IF
     378             : 
     379       34042 :             IF (use_virial) pv_virial = virial%pv_virial
     380             :             CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
     381       10294 :                                                   task_list_sub)
     382       10294 :             IF (use_virial) THEN
     383       25727 :                h_stress = h_stress + (virial%pv_virial - pv_virial)
     384       25727 :                virial%pv_virial = pv_virial
     385             :             END IF
     386             : 
     387             :             ! The matrices of G_P_ia are deallocated here
     388             :             CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
     389             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     390       10294 :                                    eps_filter)
     391             : 
     392             :             CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
     393             :                                                   mp2_env%ri_metric, &
     394             :                                                   ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
     395             :                                                   h_stress, para_env_sub, kind_of, atom_of_kind, &
     396       10528 :                                                   qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
     397             :          END DO
     398             : 
     399         234 :          CALL timestop(handle2)
     400             : 
     401         234 :          DEALLOCATE (kind_of)
     402         234 :          DEALLOCATE (atom_of_kind)
     403             : 
     404         234 :          IF (use_virial) THEN
     405          46 :             CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
     406         184 :             DO i = 1, 3
     407         184 :                CALL auxbas_pw_pool%give_back_pw(dvg(i))
     408             :             END DO
     409             :          END IF
     410             : 
     411             :          CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     412         234 :                           task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
     413             : 
     414         234 :          CALL dbcsr_release(matrix_P_munu%matrix)
     415         234 :          DEALLOCATE (matrix_P_munu%matrix)
     416             : 
     417             :       END IF
     418             : 
     419         812 :       IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
     420             : 
     421         260 :       DEALLOCATE (G_PQ_local)
     422         260 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
     423             : 
     424         260 :       CALL dbcsr_release(matrix_P_munu_nosym)
     425             : 
     426         604 :       DO ispin = 1, nspins
     427         604 :          CALL dbcsr_release(matrix_P_inu(ispin))
     428             :       END DO
     429         260 :       DEALLOCATE (matrix_P_inu, G_P_ia)
     430             : 
     431             :       ! move the forces in the correct place
     432         260 :       IF (eri_method .EQ. do_eri_gpw) THEN
     433         692 :          DO ikind = 1, SIZE(mp2_force)
     434        6218 :             mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
     435        3572 :             force(ikind)%rho_elec(:, :) = 0.0_dp
     436             :          END DO
     437             :       END IF
     438             : 
     439             :       ! Now we move from the local matrices to the global ones
     440             :       ! defined over all MPI tasks
     441             :       ! Start with moving from the DBCSR to FM for the lagrangians
     442             : 
     443        1728 :       ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
     444         604 :       DO ispin = 1, nspins
     445             :          ! Now we move from the local matrices to the global ones
     446             :          ! defined over all MPI tasks
     447             :          ! Start with moving from the DBCSR to FM for the lagrangians
     448         344 :          NULLIFY (fm_struct_tmp)
     449             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     450         344 :                                   nrow_global=dimen, ncol_global=homo(ispin))
     451         344 :          CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
     452         344 :          CALL cp_fm_struct_release(fm_struct_tmp)
     453         344 :          CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
     454         344 :          CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
     455             : 
     456             :          ! release Lag_mu_i_1
     457         344 :          CALL dbcsr_release(Lag_mu_i_1(ispin))
     458             : 
     459         344 :          NULLIFY (fm_struct_tmp)
     460             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     461         344 :                                   nrow_global=dimen, ncol_global=virtual(ispin))
     462         344 :          CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
     463         344 :          CALL cp_fm_struct_release(fm_struct_tmp)
     464         344 :          CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
     465         344 :          CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
     466             : 
     467             :          ! release Lag_nu_a_2
     468         604 :          CALL dbcsr_release(Lag_nu_a_2(ispin))
     469             :       END DO
     470         260 :       DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
     471             : 
     472             :       ! Set the factor to multiply P_ij (depends on the open or closed shell)
     473         260 :       factor = 1.0_dp
     474         260 :       IF (alpha_beta) factor = 0.50_dp
     475             : 
     476         604 :       DO ispin = 1, nspins
     477             :          CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), dimen, para_env, &
     478             :                          para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
     479         604 :                          factor, ispin)
     480             :       END DO
     481         260 :       DEALLOCATE (L1_mu_i, L2_nu_a)
     482             : 
     483         260 :       CALL timestop(handle)
     484             : 
     485         520 :    END SUBROUTINE calc_ri_mp2_nonsep
     486             : 
     487             : ! **************************************************************************************************
     488             : !> \brief Transforms G_P_ia to G_P_munu
     489             : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
     490             : !> \param G_P_munu_nosym ...
     491             : !> \param mat_munu ...
     492             : !> \param G_P_ia ...
     493             : !> \param G_P_inu ...
     494             : !> \param mo_coeff_v ...
     495             : !> \param mo_coeff_o ...
     496             : !> \param eps_filter ...
     497             : ! **************************************************************************************************
     498       11400 :    SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
     499       11400 :                                      mo_coeff_v, mo_coeff_o, eps_filter)
     500             :       TYPE(dbcsr_type), POINTER                          :: G_P_munu
     501             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu_nosym, mat_munu
     502             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     503             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     504             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     505             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     506             : 
     507             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
     508             : 
     509             :       INTEGER                                            :: handle
     510             : 
     511       11400 :       IF (.NOT. ASSOCIATED(G_P_munu)) THEN
     512        1340 :          ALLOCATE (G_P_munu)
     513             :          CALL dbcsr_create(G_P_munu, template=mat_munu, &
     514        1340 :                            matrix_type=dbcsr_type_symmetric)
     515             :       END IF
     516             : 
     517       11400 :       CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
     518             : 
     519             :       ! symmetrize
     520       11400 :       CALL timeset(routineN//"_symmetrize", handle)
     521       11400 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     522       11400 :       CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
     523             :       CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
     524       11400 :                      alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
     525             :       ! this is a trick to avoid that integrate_v_rspace starts to cry
     526       11400 :       CALL dbcsr_copy(mat_munu, G_P_munu, keep_sparsity=.TRUE.)
     527       11400 :       CALL dbcsr_copy(G_P_munu, mat_munu)
     528             : 
     529       11400 :       CALL timestop(handle)
     530             : 
     531       11400 :    END SUBROUTINE G_P_transform_MO_to_AO
     532             : 
     533             : ! **************************************************************************************************
     534             : !> \brief ...
     535             : !> \param G_P_ia ...
     536             : !> \param G_P_inu ...
     537             : !> \param G_P_munu ...
     538             : !> \param mo_coeff_v ...
     539             : !> \param mo_coeff_o ...
     540             : !> \param eps_filter ...
     541             : ! **************************************************************************************************
     542       11400 :    SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
     543             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     544             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     545             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu
     546             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     547             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     548             : 
     549             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
     550             : 
     551             :       INTEGER                                            :: handle, ispin
     552             :       REAL(KIND=dp)                                      :: factor
     553             : 
     554       11400 :       CALL timeset(routineN, handle)
     555             : 
     556       11400 :       factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
     557             : 
     558       11400 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     559             : 
     560       26631 :       DO ispin = 1, SIZE(G_P_ia)
     561             :          ! first back-transformation a->nu
     562             :          CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
     563       15231 :                              0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
     564             : 
     565             :          ! second back-transformation i->mu
     566             :          CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
     567       26631 :                              1.0_dp, G_P_munu, filter_eps=eps_filter)
     568             :       END DO
     569             : 
     570       11400 :       CALL timestop(handle)
     571             : 
     572       11400 :    END SUBROUTINE G_P_transform_alpha_beta
     573             : 
     574             : ! **************************************************************************************************
     575             : !> \brief ...
     576             : !> \param mat_munu ...
     577             : !> \param matrix_P_inu ...
     578             : !> \param Lag_mu_i_1 ...
     579             : !> \param G_P_ia ...
     580             : !> \param mo_coeff_o ...
     581             : !> \param Lag_nu_a_2 ...
     582             : !> \param eps_filter ...
     583             : ! **************************************************************************************************
     584       11400 :    SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
     585       22800 :                                 G_P_ia, mo_coeff_o, Lag_nu_a_2, &
     586             :                                 eps_filter)
     587             :       TYPE(dbcsr_type), INTENT(IN)                       :: mat_munu
     588             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: matrix_P_inu, Lag_mu_i_1
     589             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: G_P_ia
     590             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_o
     591             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: Lag_nu_a_2
     592             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     593             : 
     594             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'update_lagrangian'
     595             : 
     596             :       INTEGER                                            :: handle, ispin
     597             : 
     598             :       ! update lagrangian
     599       11400 :       CALL timeset(routineN, handle)
     600             : 
     601       26631 :       DO ispin = 1, SIZE(G_P_ia)
     602             :          ! first contract mat_munu with the half back transformed Gamma_i_nu
     603             :          ! in order to update Lag_mu_i_1
     604             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
     605       15231 :                              1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
     606             : 
     607             :          ! transform first index of mat_munu and store the result into matrix_P_inu
     608       15231 :          CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
     609             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
     610       15231 :                              0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     611             : 
     612             :          ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
     613             :          ! in order to update Lag_nu_a_2
     614             :          CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
     615       15231 :                              1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
     616             : 
     617             :          ! release the actual gamma_P_ia
     618       15231 :          CALL dbcsr_release(G_P_ia(ispin)%matrix)
     619       26631 :          DEALLOCATE (G_P_ia(ispin)%matrix)
     620             :       END DO
     621             : 
     622       11400 :       CALL timestop(handle)
     623             : 
     624       11400 :    END SUBROUTINE update_lagrangian
     625             : 
     626             : ! **************************************************************************************************
     627             : !> \brief ...
     628             : !> \param qs_env ...
     629             : !> \param mp2_env ...
     630             : !> \param mo_coeff ...
     631             : !> \param homo ...
     632             : !> \param virtual ...
     633             : !> \param dimen ...
     634             : !> \param para_env ...
     635             : !> \param para_env_sub ...
     636             : !> \param Eigenval ...
     637             : !> \param L1_mu_i ...
     638             : !> \param L2_nu_a ...
     639             : !> \param factor ...
     640             : !> \param kspin ...
     641             : ! **************************************************************************************************
     642         344 :    SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, &
     643         344 :                          Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
     644             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     645             :       TYPE(mp2_type)                                     :: mp2_env
     646             :       TYPE(cp_fm_type), INTENT(IN)                       :: mo_coeff
     647             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen
     648             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     649             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
     650             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: L1_mu_i, L2_nu_a
     651             :       REAL(KIND=dp), INTENT(IN)                          :: factor
     652             :       INTEGER, INTENT(IN)                                :: kspin
     653             : 
     654             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'create_W_P'
     655             : 
     656             :       INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
     657             :          iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
     658             :          my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_local, ncol_local_1i, &
     659             :          ncol_local_2a, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, number_of_rec, &
     660             :          number_of_send, proc_receive, proc_receive_static, proc_send, proc_send_ex, &
     661             :          proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, rec_row_size, &
     662             :          send_col_size, send_counter, send_pcol, send_prow, send_row_size, size_rec_buffer, &
     663             :          size_send_buffer
     664         344 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size, &
     665         344 :                                                             pos_info, pos_info_ex, proc_2_send_pos
     666         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
     667         344 :          my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
     668         344 :          sizes_2a
     669         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: col_indeces_info_1i, &
     670         344 :                                                             col_indeces_info_2a, &
     671         344 :                                                             row_indeces_info_1i, &
     672         344 :                                                             row_indeces_info_2a
     673         344 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, col_indices_1i, &
     674         344 :                                                             col_indices_2a, row_indices, &
     675         344 :                                                             row_indices_1i, row_indices_2a
     676         344 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ab_rec, ab_send, mat_rec, mat_send
     677             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     678             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     679             :       TYPE(cp_fm_type)                                   :: fm_P_ij, L_mu_q
     680             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     681         344 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     682             :       TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
     683         344 :          DIMENSION(:)                                    :: buffer_cyclic
     684             :       TYPE(mp_para_env_type), POINTER                    :: para_env_exchange
     685         344 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     686             : 
     687         344 :       CALL timeset(routineN, handle)
     688             : 
     689             :       ! create the globally distributed mixed lagrangian
     690         344 :       NULLIFY (blacs_env)
     691         344 :       CALL get_qs_env(qs_env, blacs_env=blacs_env)
     692             : 
     693         344 :       NULLIFY (fm_struct_tmp)
     694             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
     695         344 :                                nrow_global=dimen, ncol_global=dimen)
     696         344 :       CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
     697         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
     698         344 :       CALL cp_fm_set_all(L_mu_q, 0.0_dp)
     699             : 
     700             :       ! create all information array
     701        1032 :       ALLOCATE (pos_info(0:para_env%num_pe - 1))
     702         344 :       CALL para_env%allgather(para_env_sub%mepos, pos_info)
     703             : 
     704             :       ! get matrix information for the global
     705             :       CALL cp_fm_get_info(matrix=L_mu_q, &
     706             :                           nrow_local=nrow_local, &
     707             :                           ncol_local=ncol_local, &
     708             :                           row_indices=row_indices, &
     709         344 :                           col_indices=col_indices)
     710         344 :       myprow = L_mu_q%matrix_struct%context%mepos(1)
     711         344 :       mypcol = L_mu_q%matrix_struct%context%mepos(2)
     712         344 :       nprow = L_mu_q%matrix_struct%context%num_pe(1)
     713         344 :       npcol = L_mu_q%matrix_struct%context%num_pe(2)
     714             : 
     715        1376 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     716        1376 :       grid_2_mepos = 0
     717         344 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     718         344 :       CALL para_env%sum(grid_2_mepos)
     719             : 
     720             :       ! get matrix information for L1_mu_i
     721             :       CALL cp_fm_get_info(matrix=L1_mu_i, &
     722             :                           nrow_local=nrow_local_1i, &
     723             :                           ncol_local=ncol_local_1i, &
     724             :                           row_indices=row_indices_1i, &
     725         344 :                           col_indices=col_indices_1i)
     726             : 
     727        1032 :       ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
     728        1032 :       CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
     729             : 
     730             :       ! get matrix information for L2_nu_a
     731             :       CALL cp_fm_get_info(matrix=L2_nu_a, &
     732             :                           nrow_local=nrow_local_2a, &
     733             :                           ncol_local=ncol_local_2a, &
     734             :                           row_indices=row_indices_2a, &
     735         344 :                           col_indices=col_indices_2a)
     736             : 
     737        1032 :       ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
     738        1032 :       CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
     739             : 
     740             :       ! Here we perform a ring communication scheme taking into account
     741             :       ! for the sub-group distribution of the source matrices.
     742             :       ! as a first step we need to redistribute the data within
     743             :       ! the subgroup.
     744             :       ! In order to do so we have to allocate the structure
     745             :       ! that will hold the local data involved in communication, this
     746             :       ! structure will be the same for processes in different subgroups
     747             :       ! sharing the same position in the subgroup.
     748             :       ! -1) create the exchange para_env
     749         344 :       color_exchange = para_env_sub%mepos
     750         344 :       ALLOCATE (para_env_exchange)
     751         344 :       CALL para_env_exchange%from_split(para_env, color_exchange)
     752        1032 :       ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
     753         344 :       CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
     754        1032 :       ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
     755        1032 :       CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
     756             : 
     757             :       ! 0) store some info about indeces of the fm matrices (subgroup)
     758         344 :       CALL timeset(routineN//"_inx", handle2)
     759             :       ! matrix L1_mu_i
     760         704 :       max_row_size = MAXVAL(sizes_1i(1, :))
     761         704 :       max_col_size = MAXVAL(sizes_1i(2, :))
     762        1376 :       ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
     763        1376 :       ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
     764        1032 :       ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
     765        1032 :       ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
     766       20426 :       row_indeces_info_1i = 0
     767        4664 :       col_indeces_info_1i = 0
     768         344 :       dummy_proc = 0
     769             :       ! row
     770        6716 :       DO iiB = 1, nrow_local_1i
     771        6372 :          i_global = row_indices_1i(iiB)
     772        6372 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     773        6372 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     774        6372 :          my_row_indeces_info_1i(1, iiB) = send_prow
     775        6716 :          my_row_indeces_info_1i(2, iiB) = i_local
     776             :       END DO
     777             :       ! col
     778        1592 :       DO jjB = 1, ncol_local_1i
     779        1248 :          j_global = col_indices_1i(jjB)
     780        1248 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     781        1248 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     782        1248 :          my_col_indeces_info_1i(1, jjB) = send_pcol
     783        1592 :          my_col_indeces_info_1i(2, jjB) = j_local
     784             :       END DO
     785         344 :       CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
     786         344 :       CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
     787         344 :       DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
     788             : 
     789             :       ! matrix L2_nu_a
     790         704 :       max_row_size = MAXVAL(sizes_2a(1, :))
     791         704 :       max_col_size = MAXVAL(sizes_2a(2, :))
     792        1376 :       ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
     793        1376 :       ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
     794        1032 :       ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
     795        1032 :       ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
     796       20426 :       row_indeces_info_2a = 0
     797       17570 :       col_indeces_info_2a = 0
     798             :       ! row
     799        6716 :       DO iiB = 1, nrow_local_2a
     800        6372 :          i_global = row_indices_2a(iiB)
     801        6372 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     802        6372 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     803        6372 :          my_row_indeces_info_2a(1, iiB) = send_prow
     804        6716 :          my_row_indeces_info_2a(2, iiB) = i_local
     805             :       END DO
     806             :       ! col
     807        5658 :       DO jjB = 1, ncol_local_2a
     808        5314 :          j_global = col_indices_2a(jjB) + homo
     809        5314 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     810        5314 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     811        5314 :          my_col_indeces_info_2a(1, jjB) = send_pcol
     812        5658 :          my_col_indeces_info_2a(2, jjB) = j_local
     813             :       END DO
     814         344 :       CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
     815         344 :       CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
     816         344 :       DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
     817         344 :       CALL timestop(handle2)
     818             : 
     819             :       ! 1) define the map for sending data in the subgroup starting with L1_mu_i
     820         344 :       CALL timeset(routineN//"_subinfo", handle2)
     821        1032 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
     822         704 :       map_send_size = 0
     823        1592 :       DO jjB = 1, ncol_local_1i
     824        1248 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     825       25286 :          DO iiB = 1, nrow_local_1i
     826       23694 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     827       23694 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     828       23694 :             proc_send_sub = pos_info(proc_send)
     829       24942 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     830             :          END DO
     831             :       END DO
     832             :       ! and the same for L2_nu_a
     833        5658 :       DO jjB = 1, ncol_local_2a
     834        5314 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     835      123528 :          DO iiB = 1, nrow_local_2a
     836      117870 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     837      117870 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     838      117870 :             proc_send_sub = pos_info(proc_send)
     839      123184 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     840             :          END DO
     841             :       END DO
     842             :       ! and exchange data in order to create map_rec_size
     843        1032 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
     844         704 :       map_rec_size = 0
     845         344 :       CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
     846         344 :       CALL timestop(handle2)
     847             : 
     848             :       ! 2) reorder data in sending buffer
     849         344 :       CALL timeset(routineN//"_sub_Bsend", handle2)
     850             :       ! count the number of messages (include myself)
     851         344 :       number_of_send = 0
     852         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     853         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     854         704 :          IF (map_send_size(proc_send) > 0) THEN
     855         344 :             number_of_send = number_of_send + 1
     856             :          END IF
     857             :       END DO
     858             :       ! allocate the structure that will hold the messages to be sent
     859        1376 :       ALLOCATE (buffer_send(number_of_send))
     860         344 :       send_counter = 0
     861        1032 :       ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
     862         704 :       proc_2_send_pos = 0
     863         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     864         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     865         360 :          size_send_buffer = map_send_size(proc_send)
     866         704 :          IF (map_send_size(proc_send) > 0) THEN
     867         344 :             send_counter = send_counter + 1
     868             :             ! allocate the sending buffer (msg)
     869        1032 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     870      141908 :             buffer_send(send_counter)%msg = 0.0_dp
     871         344 :             buffer_send(send_counter)%proc = proc_send
     872         344 :             proc_2_send_pos(proc_send) = send_counter
     873             :          END IF
     874             :       END DO
     875             :       ! loop over the locally held data and fill the buffer_send
     876             :       ! for doing that we need an array that keep track if the
     877             :       ! sequential increase of the index for each message
     878        1032 :       ALLOCATE (iii_vet(number_of_send))
     879         688 :       iii_vet = 0
     880        1592 :       DO jjB = 1, ncol_local_1i
     881        1248 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     882       25286 :          DO iiB = 1, nrow_local_1i
     883       23694 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     884       23694 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     885       23694 :             proc_send_sub = pos_info(proc_send)
     886       23694 :             send_counter = proc_2_send_pos(proc_send_sub)
     887       23694 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     888       23694 :             iii = iii_vet(send_counter)
     889       24942 :             buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
     890             :          END DO
     891             :       END DO
     892             :       ! release the local data of L1_mu_i
     893         344 :       DEALLOCATE (L1_mu_i%local_data)
     894             :       ! and the same for L2_nu_a
     895        5658 :       DO jjB = 1, ncol_local_2a
     896        5314 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     897      123528 :          DO iiB = 1, nrow_local_2a
     898      117870 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     899      117870 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     900      117870 :             proc_send_sub = pos_info(proc_send)
     901      117870 :             send_counter = proc_2_send_pos(proc_send_sub)
     902      117870 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     903      117870 :             iii = iii_vet(send_counter)
     904      123184 :             buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
     905             :          END DO
     906             :       END DO
     907         344 :       DEALLOCATE (L2_nu_a%local_data)
     908         344 :       DEALLOCATE (proc_2_send_pos)
     909         344 :       DEALLOCATE (iii_vet)
     910         344 :       CALL timestop(handle2)
     911             : 
     912             :       ! 3) create the buffer for receive, post the message with irecv
     913             :       !    and send the messages non-blocking
     914         344 :       CALL timeset(routineN//"_sub_isendrecv", handle2)
     915             :       ! count the number of messages to be received
     916         344 :       number_of_rec = 0
     917         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     918         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     919         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     920         344 :             number_of_rec = number_of_rec + 1
     921             :          END IF
     922             :       END DO
     923        1376 :       ALLOCATE (buffer_rec(number_of_rec))
     924         344 :       rec_counter = 0
     925         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     926         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     927         360 :          size_rec_buffer = map_rec_size(proc_receive)
     928         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     929         344 :             rec_counter = rec_counter + 1
     930             :             ! prepare the buffer for receive
     931        1032 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     932      141908 :             buffer_rec(rec_counter)%msg = 0.0_dp
     933         344 :             buffer_rec(rec_counter)%proc = proc_receive
     934             :             ! post the message to be received (not need to send to myself)
     935         344 :             IF (proc_receive /= para_env_sub%mepos) THEN
     936             :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     937           0 :                                        buffer_rec(rec_counter)%msg_req)
     938             :             END IF
     939             :          END IF
     940             :       END DO
     941             :       ! send messages
     942        1376 :       ALLOCATE (req_send(number_of_send))
     943         688 :       req_send = mp_request_null
     944         344 :       send_counter = 0
     945         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     946         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     947         704 :          IF (map_send_size(proc_send) > 0) THEN
     948         344 :             send_counter = send_counter + 1
     949         344 :             IF (proc_send == para_env_sub%mepos) THEN
     950      141908 :                buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
     951             :             ELSE
     952             :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
     953           0 :                                        buffer_send(send_counter)%msg_req)
     954           0 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
     955             :             END IF
     956             :          END IF
     957             :       END DO
     958         344 :       DEALLOCATE (map_send_size)
     959         344 :       CALL timestop(handle2)
     960             : 
     961             :       ! 4) (if memory is a problem we should move this part after point 5)
     962             :       !    Here we create the new buffer for cyclic(ring) communication and
     963             :       !    we fill it with the data received from the other member of the
     964             :       !    subgroup
     965         344 :       CALL timeset(routineN//"_Bcyclic", handle2)
     966             :       ! first allocata new structure
     967        1704 :       ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
     968        1016 :       DO iproc = 0, para_env_exchange%num_pe - 1
     969         672 :          rec_row_size = sizes(1, iproc)
     970         672 :          rec_col_size = sizes(2, iproc)
     971        2688 :          ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
     972      155324 :          buffer_cyclic(iproc)%msg = 0.0_dp
     973             :       END DO
     974             :       ! now collect data from other member of the subgroup and fill
     975             :       ! buffer_cyclic
     976         344 :       rec_counter = 0
     977         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     978         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     979         360 :          size_rec_buffer = map_rec_size(proc_receive)
     980         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     981         344 :             rec_counter = rec_counter + 1
     982             : 
     983             :             ! wait for the message
     984         344 :             IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
     985             : 
     986         344 :             CALL timeset(routineN//"_fill", handle3)
     987         344 :             iii = 0
     988        1592 :             DO jjB = 1, sizes_1i(2, proc_receive)
     989        1248 :                send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
     990        1248 :                j_local = col_indeces_info_1i(2, jjB, proc_receive)
     991       25286 :                DO iiB = 1, sizes_1i(1, proc_receive)
     992       23694 :                   send_prow = row_indeces_info_1i(1, iiB, proc_receive)
     993       23694 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
     994       23694 :                   proc_send_sub = pos_info(proc_send)
     995       23694 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
     996       23694 :                   iii = iii + 1
     997       23694 :                   i_local = row_indeces_info_1i(2, iiB, proc_receive)
     998       23694 :                   proc_send_ex = pos_info_ex(proc_send)
     999       24942 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1000             :                END DO
    1001             :             END DO
    1002             :             ! and the same for L2_nu_a
    1003        5658 :             DO jjB = 1, sizes_2a(2, proc_receive)
    1004        5314 :                send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
    1005        5314 :                j_local = col_indeces_info_2a(2, jjB, proc_receive)
    1006      123528 :                DO iiB = 1, sizes_2a(1, proc_receive)
    1007      117870 :                   send_prow = row_indeces_info_2a(1, iiB, proc_receive)
    1008      117870 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
    1009      117870 :                   proc_send_sub = pos_info(proc_send)
    1010      117870 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1011      117870 :                   iii = iii + 1
    1012      117870 :                   i_local = row_indeces_info_2a(2, iiB, proc_receive)
    1013      117870 :                   proc_send_ex = pos_info_ex(proc_send)
    1014      123184 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1015             :                END DO
    1016             :             END DO
    1017         344 :             CALL timestop(handle3)
    1018             : 
    1019             :             ! deallocate the received message
    1020         344 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1021             :          END IF
    1022             :       END DO
    1023         344 :       DEALLOCATE (row_indeces_info_1i)
    1024         344 :       DEALLOCATE (col_indeces_info_1i)
    1025         344 :       DEALLOCATE (row_indeces_info_2a)
    1026         344 :       DEALLOCATE (col_indeces_info_2a)
    1027         688 :       DEALLOCATE (buffer_rec)
    1028         344 :       DEALLOCATE (map_rec_size)
    1029         344 :       CALL timestop(handle2)
    1030             : 
    1031             :       ! 5)  Wait for all messeges to be sent in the subgroup
    1032         344 :       CALL timeset(routineN//"_sub_waitall", handle2)
    1033         344 :       CALL mp_waitall(req_send(:))
    1034         688 :       DO send_counter = 1, number_of_send
    1035         688 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1036             :       END DO
    1037         688 :       DEALLOCATE (buffer_send)
    1038         344 :       DEALLOCATE (req_send)
    1039         344 :       CALL timestop(handle2)
    1040             : 
    1041             :       ! 6) Start with ring communication
    1042         344 :       CALL timeset(routineN//"_ring", handle2)
    1043         344 :       proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
    1044         344 :       proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
    1045        1016 :       max_row_size = MAXVAL(sizes(1, :))
    1046        1016 :       max_col_size = MAXVAL(sizes(2, :))
    1047        1376 :       ALLOCATE (mat_send(max_row_size, max_col_size))
    1048        1032 :       ALLOCATE (mat_rec(max_row_size, max_col_size))
    1049       81732 :       mat_send = 0.0_dp
    1050       79951 :       mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
    1051         344 :       DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
    1052         672 :       DO proc_shift = 1, para_env_exchange%num_pe - 1
    1053         328 :          proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
    1054             : 
    1055         328 :          rec_row_size = sizes(1, proc_receive)
    1056         328 :          rec_col_size = sizes(2, proc_receive)
    1057             : 
    1058       76810 :          mat_rec = 0.0_dp
    1059             :          CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1060         328 :                                          mat_rec, proc_receive_static)
    1061             : 
    1062       76810 :          mat_send = 0.0_dp
    1063             :          mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
    1064       75029 :                                                     buffer_cyclic(proc_receive)%msg(:, :)
    1065             : 
    1066         672 :          DEALLOCATE (buffer_cyclic(proc_receive)%msg)
    1067             :       END DO
    1068             :       ! and finally
    1069             :       CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1070         344 :                                       mat_rec, proc_receive_static)
    1071       79951 :       L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
    1072        1360 :       DEALLOCATE (buffer_cyclic)
    1073         344 :       DEALLOCATE (mat_send)
    1074         344 :       DEALLOCATE (mat_rec)
    1075         344 :       CALL timestop(handle2)
    1076             : 
    1077             :       ! release para_env_exchange
    1078         344 :       CALL mp_para_env_release(para_env_exchange)
    1079             : 
    1080         344 :       CALL cp_fm_release(L1_mu_i)
    1081         344 :       CALL cp_fm_release(L2_nu_a)
    1082         344 :       DEALLOCATE (pos_info_ex)
    1083         344 :       DEALLOCATE (grid_2_mepos)
    1084         344 :       DEALLOCATE (sizes)
    1085         344 :       DEALLOCATE (sizes_1i)
    1086         344 :       DEALLOCATE (sizes_2a)
    1087             : 
    1088             :       ! update the P_ij block of P_MP2 with the
    1089             :       ! non-singular ij pairs
    1090         344 :       CALL timeset(routineN//"_Pij", handle2)
    1091         344 :       NULLIFY (fm_struct_tmp)
    1092             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1093         344 :                                nrow_global=homo, ncol_global=homo)
    1094         344 :       CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
    1095         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1096         344 :       CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
    1097             : 
    1098             :       ! we have it, update P_ij local
    1099             :       CALL cp_fm_get_info(matrix=fm_P_ij, &
    1100             :                           nrow_local=nrow_local, &
    1101             :                           ncol_local=ncol_local, &
    1102             :                           row_indices=row_indices, &
    1103         344 :                           col_indices=col_indices)
    1104             : 
    1105         344 :       IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
    1106             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, 1.0_dp, &
    1107             :                             mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
    1108             :                             a_first_col=1, &
    1109             :                             a_first_row=1, &
    1110             :                             b_first_col=1, &
    1111             :                             b_first_row=1, &
    1112             :                             c_first_col=1, &
    1113         320 :                             c_first_row=1)
    1114             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp, &
    1115             :                             L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
    1116             :                             a_first_col=1, &
    1117             :                             a_first_row=1, &
    1118             :                             b_first_col=1, &
    1119             :                             b_first_row=1, &
    1120             :                             c_first_col=1, &
    1121         320 :                             c_first_row=1)
    1122             : 
    1123        1476 :          DO jjB = 1, ncol_local
    1124        1156 :             j_global = col_indices(jjB)
    1125        3692 :             DO iiB = 1, nrow_local
    1126        2216 :                i_global = row_indices(iiB)
    1127             :                ! diagonal elements and nearly degenerate ij pairs already updated
    1128        3372 :                IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1129         656 :                   fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1130             :                ELSE
    1131             :                   fm_P_ij%local_data(iiB, jjB) = &
    1132        1560 :                      factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
    1133             :                END IF
    1134             :             END DO
    1135             :          END DO
    1136             :       ELSE
    1137         116 :          DO jjB = 1, ncol_local
    1138          92 :             j_global = col_indices(jjB)
    1139         294 :             DO iiB = 1, nrow_local
    1140         178 :                i_global = row_indices(iiB)
    1141         270 :                fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1142             :             END DO
    1143             :          END DO
    1144             :       END IF
    1145             :       ! deallocate the local P_ij
    1146         344 :       DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
    1147         344 :       CALL timestop(handle2)
    1148             : 
    1149             :       ! Now create and fill the P matrix (MO)
    1150             :       ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
    1151         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
    1152        1124 :          ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1153             :       END IF
    1154             : 
    1155         344 :       CALL timeset(routineN//"_PMO", handle2)
    1156         344 :       NULLIFY (fm_struct_tmp)
    1157             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1158         344 :                                nrow_global=dimen, ncol_global=dimen)
    1159         344 :       CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
    1160         344 :       CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
    1161             : 
    1162             :       ! start with the (easy) occ-occ block and locally held P_ab elements
    1163         344 :       itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
    1164         344 :       my_B_virtual_start = itmp(1)
    1165         344 :       my_B_virtual_end = itmp(2)
    1166             : 
    1167             :       ! Fill occ-occ block
    1168         344 :       CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
    1169         344 :       CALL cp_fm_release(fm_P_ij)
    1170             : 
    1171             :       CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
    1172             :                           nrow_local=nrow_local, &
    1173             :                           ncol_local=ncol_local, &
    1174             :                           row_indices=row_indices, &
    1175         344 :                           col_indices=col_indices)
    1176             : 
    1177         344 :       IF (mp2_env%method == ri_mp2_laplace) THEN
    1178             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, &
    1179             :                             mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1180             :                             a_first_col=homo + 1, &
    1181             :                             a_first_row=1, &
    1182             :                             b_first_col=homo + 1, &
    1183             :                             b_first_row=1, &
    1184             :                             c_first_col=homo + 1, &
    1185          24 :                             c_first_row=homo + 1)
    1186             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, -2.0_dp, &
    1187             :                             L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1188             :                             a_first_col=homo + 1, &
    1189             :                             a_first_row=1, &
    1190             :                             b_first_col=homo + 1, &
    1191             :                             b_first_row=1, &
    1192             :                             c_first_col=homo + 1, &
    1193          24 :                             c_first_row=homo + 1)
    1194             :       END IF
    1195             : 
    1196         344 :       IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
    1197             :          ! With MP2 and RPA, we have already calculated the density matrix elements
    1198        6290 :          DO jjB = 1, ncol_local
    1199        5970 :             j_global = col_indices(jjB)
    1200        5970 :             IF (j_global <= homo) CYCLE
    1201       59660 :             DO iiB = 1, nrow_local
    1202       54526 :                i_global = row_indices(iiB)
    1203       60496 :                IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1204             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1205       45300 :                      mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1206             :                END IF
    1207             :             END DO
    1208             :          END DO
    1209          24 :       ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
    1210             :          ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
    1211         616 :          DO jjB = 1, ncol_local
    1212         592 :             j_global = col_indices(jjB)
    1213         592 :             IF (j_global <= homo) CYCLE
    1214        6764 :             DO iiB = 1, nrow_local
    1215        6240 :                i_global = row_indices(iiB)
    1216        6832 :                IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1217         565 :                   IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1218             :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1219         552 :                         mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1220             :                   ELSE
    1221          13 :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
    1222             :                   END IF
    1223             :                ELSE
    1224             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1225             :                      factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
    1226        5675 :                      (Eigenval(i_global) - Eigenval(j_global))
    1227             :                END IF
    1228             :             END DO
    1229             :          END DO
    1230             :       ELSE
    1231           0 :          CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
    1232             :       END IF
    1233             : 
    1234             :       ! send around the sub_group the local data and check if we
    1235             :       ! have to update our block with external elements
    1236        1032 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1237        1032 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1238             : 
    1239        1032 :       ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
    1240        1032 :       CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
    1241             : 
    1242        1376 :       ALLOCATE (ab_rec(nrow_local, ncol_local))
    1243         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1244          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1245          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1246             : 
    1247          16 :          send_prow = mepos_2_grid(1, proc_send)
    1248          16 :          send_pcol = mepos_2_grid(2, proc_send)
    1249             : 
    1250          16 :          send_row_size = sizes(1, proc_send)
    1251          16 :          send_col_size = sizes(2, proc_send)
    1252             : 
    1253          64 :          ALLOCATE (ab_send(send_row_size, send_col_size))
    1254        4922 :          ab_send = 0.0_dp
    1255             : 
    1256             :          ! first loop over row since in this way we can cycle
    1257         206 :          DO iiB = 1, send_row_size
    1258         190 :             i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow)
    1259         190 :             IF (i_global <= homo) CYCLE
    1260         154 :             i_global = i_global - homo
    1261         154 :             IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
    1262         493 :             DO jjB = 1, send_col_size
    1263         458 :                j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol)
    1264         458 :                IF (j_global <= homo) CYCLE
    1265         367 :                j_global = j_global - homo
    1266         648 :                ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
    1267             :             END DO
    1268             :          END DO
    1269             : 
    1270        4922 :          ab_rec = 0.0_dp
    1271             :          CALL para_env_sub%sendrecv(ab_send, proc_send, &
    1272          16 :                                     ab_rec, proc_receive)
    1273             :          mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
    1274             :             mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
    1275        4922 :             ab_rec(1:nrow_local, 1:ncol_local)
    1276             : 
    1277         360 :          DEALLOCATE (ab_send)
    1278             :       END DO
    1279         344 :       DEALLOCATE (ab_rec)
    1280         344 :       DEALLOCATE (mepos_2_grid)
    1281         344 :       DEALLOCATE (sizes)
    1282             : 
    1283             :       ! deallocate the local P_ab
    1284         344 :       DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
    1285         344 :       CALL timestop(handle2)
    1286             : 
    1287             :       ! create also W_MP2_MO
    1288         344 :       CALL timeset(routineN//"_WMO", handle2)
    1289         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
    1290        1124 :          ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1291             :       END IF
    1292             : 
    1293         344 :       CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
    1294         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1295             : 
    1296             :       ! all block
    1297             :       CALL parallel_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, &
    1298             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1299             :                          a_first_col=1, &
    1300             :                          a_first_row=1, &
    1301             :                          b_first_col=1, &
    1302             :                          b_first_row=1, &
    1303             :                          c_first_col=1, &
    1304         344 :                          c_first_row=1)
    1305             : 
    1306             :       ! occ-occ block
    1307             :       CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, &
    1308             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1309             :                          a_first_col=1, &
    1310             :                          a_first_row=1, &
    1311             :                          b_first_col=1, &
    1312             :                          b_first_row=1, &
    1313             :                          c_first_col=1, &
    1314         344 :                          c_first_row=1)
    1315             : 
    1316             :       ! occ-virt block
    1317             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1318             :                          mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1319             :                          a_first_col=1, &
    1320             :                          a_first_row=1, &
    1321             :                          b_first_col=homo + 1, &
    1322             :                          b_first_row=1, &
    1323             :                          c_first_col=homo + 1, &
    1324         344 :                          c_first_row=1)
    1325         344 :       CALL timestop(handle2)
    1326             : 
    1327             :       ! Calculate occ-virt block of the lagrangian in MO
    1328         344 :       CALL timeset(routineN//"_Ljb", handle2)
    1329         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
    1330        1124 :          ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1331             :       END IF
    1332             : 
    1333             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1334         344 :                                nrow_global=homo, ncol_global=virtual)
    1335         344 :       CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb")
    1336         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1337             : 
    1338             :       ! first Virtual
    1339             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1340             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1341             :                          a_first_col=1, &
    1342             :                          a_first_row=1, &
    1343             :                          b_first_col=homo + 1, &
    1344             :                          b_first_row=1, &
    1345             :                          c_first_col=1, &
    1346         344 :                          c_first_row=1)
    1347             :       ! then occupied
    1348             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1349             :                          mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1350             :                          a_first_col=1, &
    1351             :                          a_first_row=1, &
    1352             :                          b_first_col=homo + 1, &
    1353             :                          b_first_row=1, &
    1354             :                          c_first_col=1, &
    1355         344 :                          c_first_row=1)
    1356             : 
    1357             :       ! finally release L_mu_q
    1358         344 :       CALL cp_fm_release(L_mu_q)
    1359         344 :       CALL timestop(handle2)
    1360             : 
    1361             :       ! here we should be done next CPHF
    1362             : 
    1363         344 :       DEALLOCATE (pos_info)
    1364             : 
    1365         344 :       CALL timestop(handle)
    1366             : 
    1367        6880 :    END SUBROUTINE create_W_P
    1368             : 
    1369             : END MODULE mp2_ri_grad

Generated by: LCOV version 1.15