LCOV - code coverage report
Current view: top level - src/pw - ps_wavelet_base.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 741 772 96.0 %
Date: 2024-11-22 07:00:40 Functions: 25 26 96.2 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
      10             : !> \author Florian Schiffmann (09.2007,fschiff)
      11             : ! **************************************************************************************************
      12             : MODULE ps_wavelet_base
      13             : 
      14             :    USE kinds,                           ONLY: dp
      15             :    USE message_passing,                 ONLY: mp_comm_type
      16             :    USE ps_wavelet_fft3d,                ONLY: ctrig,&
      17             :                                               ctrig_length,&
      18             :                                               fftstp
      19             : #include "../base/base_uses.f90"
      20             : 
      21             :    IMPLICIT NONE
      22             : 
      23             :    PRIVATE
      24             : 
      25             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_base'
      26             : 
      27             :    PUBLIC :: scramble_unpack, p_poissonsolver, s_poissonsolver, f_poissonsolver
      28             : 
      29             : CONTAINS
      30             : 
      31             : ! **************************************************************************************************
      32             : !> \brief ...
      33             : !> \param n1 ...
      34             : !> \param n2 ...
      35             : !> \param n3 ...
      36             : !> \param nd1 ...
      37             : !> \param nd2 ...
      38             : !> \param nd3 ...
      39             : !> \param md1 ...
      40             : !> \param md2 ...
      41             : !> \param md3 ...
      42             : !> \param nproc ...
      43             : !> \param iproc ...
      44             : !> \param zf ...
      45             : !> \param scal ...
      46             : !> \param hx ...
      47             : !> \param hy ...
      48             : !> \param hz ...
      49             : !> \param mpi_group ...
      50             : ! **************************************************************************************************
      51       16561 :    SUBROUTINE P_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf &
      52             :                               , scal, hx, hy, hz, mpi_group)
      53             :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
      54             :                                                             md3, nproc, iproc
      55             :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
      56             :          INTENT(inout)                                   :: zf
      57             :       REAL(KIND=dp), INTENT(in)                          :: scal, hx, hy, hz
      58             : 
      59             :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
      60             : 
      61             :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
      62             : 
      63             :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
      64             :                                                             J2stb, J2stf, j3, Jp2stb, Jp2stf, lot, &
      65             :                                                             lzt, ma, mb, ncache, nfft
      66       16561 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
      67       16561 :                                                             before2, before3, now1, now2, now3
      68       16561 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, ftrig1, ftrig2, &
      69       16561 :                                                             ftrig3
      70       16561 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
      71             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
      72             :       REAL(KIND=dp), ALLOCATABLE, &
      73       16561 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
      74             : 
      75       16561 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
      76       16561 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
      77       16561 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
      78       16561 :       IF (md1 .LT. n1) CPABORT("Parallel convolution:ERROR:md1")
      79       16561 :       IF (md2 .LT. n2) CPABORT("Parallel convolution:ERROR:md2")
      80       16561 :       IF (md3 .LT. n3) CPABORT("Parallel convolution:ERROR:md3")
      81       16561 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
      82       16561 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
      83             : 
      84             :       !defining work arrays dimensions
      85       16561 :       ncache = ncache_optimal
      86       16561 :       IF (ncache <= MAX(n1, n2, n3)*4) ncache = MAX(n1, n2, n3)*4
      87             : 
      88       16561 :       lzt = n2
      89       16561 :       IF (MOD(n2, 2) .EQ. 0) lzt = lzt + 1
      90       16561 :       IF (MOD(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
      91             : 
      92             :       !Allocations
      93       16561 :       ALLOCATE (btrig1(2, ctrig_length))
      94       16561 :       ALLOCATE (ftrig1(2, ctrig_length))
      95       16561 :       ALLOCATE (after1(7))
      96       16561 :       ALLOCATE (now1(7))
      97       16561 :       ALLOCATE (before1(7))
      98       16561 :       ALLOCATE (btrig2(2, ctrig_length))
      99       16561 :       ALLOCATE (ftrig2(2, ctrig_length))
     100       16561 :       ALLOCATE (after2(7))
     101       16561 :       ALLOCATE (now2(7))
     102       16561 :       ALLOCATE (before2(7))
     103       16561 :       ALLOCATE (btrig3(2, ctrig_length))
     104       16561 :       ALLOCATE (ftrig3(2, ctrig_length))
     105       16561 :       ALLOCATE (after3(7))
     106       16561 :       ALLOCATE (now3(7))
     107       16561 :       ALLOCATE (before3(7))
     108       66244 :       ALLOCATE (zw(2, ncache/4, 2))
     109       66244 :       ALLOCATE (zt(2, lzt, n1))
     110       82805 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
     111       40101 :       IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
     112             : 
     113             :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
     114       16561 :       CALL ctrig(n3, btrig3, after3, before3, now3, 1, ic3)
     115       16561 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
     116       16561 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
     117      360028 :       DO j = 1, n1
     118      343467 :          ftrig1(1, j) = btrig1(1, j)
     119      360028 :          ftrig1(2, j) = -btrig1(2, j)
     120             :       END DO
     121      360028 :       DO j = 1, n2
     122      343467 :          ftrig2(1, j) = btrig2(1, j)
     123      360028 :          ftrig2(2, j) = -btrig2(2, j)
     124             :       END DO
     125      360028 :       DO j = 1, n3
     126      343467 :          ftrig3(1, j) = btrig3(1, j)
     127      360028 :          ftrig3(2, j) = -btrig3(2, j)
     128             :       END DO
     129             : 
     130             :       ! transform along z axis
     131       16561 :       lot = ncache/(4*n3)
     132       16561 :       IF (lot .LT. 1) THEN
     133             :          WRITE (*, *) &
     134             :             'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     135             :             'least one 1-d FFT of this size even though this will'// &
     136           0 :             'reduce the performance for shorter transform lengths'
     137           0 :          CPABORT("")
     138             :       END IF
     139      306338 :       DO j2 = 1, md2/nproc
     140             :          !this condition ensures that we manage only the interesting part for the FFT
     141      306338 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     142      587324 :             DO i1 = 1, n1, lot
     143      298171 :                ma = i1
     144      298171 :                mb = MIN(i1 + (lot - 1), n1)
     145      298171 :                nfft = mb - ma + 1
     146             :                !inserting real data into complex array of half length
     147      298171 :                CALL P_fill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
     148             : 
     149             :                !performing FFT
     150             :                !input: I1,I3,J2,(Jp2)
     151      298171 :                inzee = 1
     152      931908 :                DO i = 1, ic3
     153             :                   CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     154      633737 :                               btrig3, after3(i), now3(i), before3(i), 1)
     155      931908 :                   inzee = 3 - inzee
     156             :                END DO
     157             : 
     158             :                !output: I1,i3,J2,(Jp2)
     159             :                !exchanging components
     160             :                !input: I1,i3,J2,(Jp2)
     161      587324 :                CALL scramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2)
     162             :                !output: I1,J2,i3,(Jp2)
     163             :             END DO
     164             :          END IF
     165             :       END DO
     166             : 
     167             :       !Interprocessor data transposition
     168             :       !input: I1,J2,j3,jp3,(Jp2)
     169       16561 :       IF (nproc .GT. 1) THEN
     170             :          !communication scheduling
     171        4708 :          CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
     172             :       END IF
     173             :       !output: I1,J2,j3,Jp2,(jp3)
     174             : 
     175             :       !now each process perform complete convolution of its planes
     176      174643 :       DO j3 = 1, nd3/nproc
     177             :          !this condition ensures that we manage only the interesting part for the FFT
     178      174643 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
     179      157193 :             Jp2stb = 1
     180      157193 :             J2stb = 1
     181      157193 :             Jp2stf = 1
     182      157193 :             J2stf = 1
     183             : 
     184             :             ! transform along x axis
     185      157193 :             lot = ncache/(4*n1)
     186      157193 :             IF (lot .LT. 1) THEN
     187             :                WRITE (*, *) &
     188             :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     189             :                   'least one 1-d FFT of this size even though this will'// &
     190           0 :                   'reduce the performance for shorter transform lengths'
     191           0 :                CPABORT("")
     192             :             END IF
     193             : 
     194      319062 :             DO j = 1, n2, lot
     195      161869 :                ma = j
     196      161869 :                mb = MIN(j + (lot - 1), n2)
     197      161869 :                nfft = mb - ma + 1
     198             : 
     199             :                !reverse index ordering, leaving the planes to be transformed at the end
     200             :                !input: I1,J2,j3,Jp2,(jp3)
     201      161869 :                IF (nproc .EQ. 1) THEN
     202      130318 :                   CALL P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
     203             :                ELSE
     204       31551 :                   CALL P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
     205             :                END IF
     206             :                !output: J2,Jp2,I1,j3,(jp3)
     207             : 
     208             :                !performing FFT
     209             :                !input: I2,I1,j3,(jp3)
     210      161869 :                inzee = 1
     211      343294 :                DO i = 1, ic1 - 1
     212             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     213      181425 :                               btrig1, after1(i), now1(i), before1(i), 1)
     214      343294 :                   inzee = 3 - inzee
     215             :                END DO
     216             : 
     217             :                !storing the last step into zt array
     218      161869 :                i = ic1
     219             :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
     220      319062 :                            btrig1, after1(i), now1(i), before1(i), 1)
     221             :                !output: I2,i1,j3,(jp3)
     222             :             END DO
     223             : 
     224             :             !transform along y axis
     225      157193 :             lot = ncache/(4*n2)
     226      157193 :             IF (lot .LT. 1) THEN
     227             :                WRITE (*, *) &
     228             :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     229             :                   'least one 1-d FFT of this size even though this will'// &
     230           0 :                   'reduce the performance for shorter transform lengths'
     231           0 :                CPABORT("")
     232             :             END IF
     233             : 
     234      319062 :             DO j = 1, n1, lot
     235      161869 :                ma = j
     236      161869 :                mb = MIN(j + (lot - 1), n1)
     237      161869 :                nfft = mb - ma + 1
     238             : 
     239             :                !reverse ordering
     240             :                !input: I2,i1,j3,(jp3)
     241      161869 :                CALL P_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
     242             :                !output: i1,I2,j3,(jp3)
     243             : 
     244             :                !performing FFT
     245             :                !input: i1,I2,j3,(jp3)
     246      161869 :                inzee = 1
     247      505163 :                DO i = 1, ic2
     248             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     249      343294 :                               btrig2, after2(i), now2(i), before2(i), 1)
     250      505163 :                   inzee = 3 - inzee
     251             :                END DO
     252             :                !output: i1,i2,j3,(jp3)
     253             : 
     254             :                !Multiply with kernel in fourier space
     255      161869 :                i3 = iproc*(nd3/nproc) + j3
     256      161869 :                CALL P_multkernel(n1, n2, n3, lot, nfft, j, i3, zw(1, 1, inzee), hx, hy, hz)
     257             : 
     258             :                !TRANSFORM BACK IN REAL SPACE
     259             : 
     260             :                !transform along y axis
     261             :                !input: i1,i2,j3,(jp3)
     262      505163 :                DO i = 1, ic2
     263             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     264      343294 :                               ftrig2, after2(i), now2(i), before2(i), -1)
     265      505163 :                   inzee = 3 - inzee
     266             :                END DO
     267             : 
     268             :                !reverse ordering
     269             :                !input: i1,I2,j3,(jp3)
     270      319062 :                CALL P_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
     271             :                !output: I2,i1,j3,(jp3)
     272             :             END DO
     273             : 
     274             :             !transform along x axis
     275             :             !input: I2,i1,j3,(jp3)
     276      157193 :             lot = ncache/(4*n1)
     277      319062 :             DO j = 1, n2, lot
     278      161869 :                ma = j
     279      161869 :                mb = MIN(j + (lot - 1), n2)
     280      161869 :                nfft = mb - ma + 1
     281             : 
     282             :                !performing FFT
     283      161869 :                i = 1
     284             :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
     285      161869 :                            ftrig1, after1(i), now1(i), before1(i), -1)
     286             : 
     287      161869 :                inzee = 1
     288      343294 :                DO i = 2, ic1
     289             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     290      181425 :                               ftrig1, after1(i), now1(i), before1(i), -1)
     291      343294 :                   inzee = 3 - inzee
     292             :                END DO
     293             :                !output: I2,I1,j3,(jp3)
     294             : 
     295             :                !reverse ordering
     296             :                !input: J2,Jp2,I1,j3,(jp3)
     297      319062 :                IF (nproc .EQ. 1) THEN
     298      130318 :                   CALL P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
     299             :                ELSE
     300       31551 :                   CALL P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
     301             :                END IF
     302             :                ! output: I1,J2,j3,Jp2,(jp3)
     303             :             END DO
     304             :          END IF
     305             :       END DO
     306             : 
     307             :       !Interprocessor data transposition
     308             :       !input: I1,J2,j3,Jp2,(jp3)
     309       16561 :       IF (nproc .GT. 1) THEN
     310             :          !communication scheduling
     311        4708 :          CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
     312             :       END IF
     313             :       !output: I1,J2,j3,jp3,(Jp2)
     314             :       !transform along z axis
     315             :       !input: I1,J2,i3,(Jp2)
     316       16561 :       lot = ncache/(4*n3)
     317      306338 :       DO j2 = 1, md2/nproc
     318             :          !this condition ensures that we manage only the interesting part for the FFT
     319      306338 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     320      587324 :             DO i1 = 1, n1, lot
     321      298171 :                ma = i1
     322      298171 :                mb = MIN(i1 + (lot - 1), n1)
     323      298171 :                nfft = mb - ma + 1
     324             : 
     325             :                !reverse ordering
     326             :                !input: I1,J2,i3,(Jp2)
     327      298171 :                CALL unscramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1))
     328             :                !output: I1,i3,J2,(Jp2)
     329             : 
     330             :                !performing FFT
     331             :                !input: I1,i3,J2,(Jp2)
     332      298171 :                inzee = 1
     333      931908 :                DO i = 1, ic3
     334             :                   CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     335      633737 :                               ftrig3, after3(i), now3(i), before3(i), -1)
     336      931908 :                   inzee = 3 - inzee
     337             :                END DO
     338             :                !output: I1,I3,J2,(Jp2)
     339             : 
     340             :                !rebuild the output array
     341      587324 :                CALL P_unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2), scal)
     342             : 
     343             :             END DO
     344             :          END IF
     345             :       END DO
     346             : 
     347             :       !De-allocations
     348       16561 :       DEALLOCATE (btrig1)
     349       16561 :       DEALLOCATE (ftrig1)
     350       16561 :       DEALLOCATE (after1)
     351       16561 :       DEALLOCATE (now1)
     352       16561 :       DEALLOCATE (before1)
     353       16561 :       DEALLOCATE (btrig2)
     354       16561 :       DEALLOCATE (ftrig2)
     355       16561 :       DEALLOCATE (after2)
     356       16561 :       DEALLOCATE (now2)
     357       16561 :       DEALLOCATE (before2)
     358       16561 :       DEALLOCATE (btrig3)
     359       16561 :       DEALLOCATE (ftrig3)
     360       16561 :       DEALLOCATE (after3)
     361       16561 :       DEALLOCATE (now3)
     362       16561 :       DEALLOCATE (before3)
     363       16561 :       DEALLOCATE (zmpi2)
     364       16561 :       DEALLOCATE (zw)
     365       16561 :       DEALLOCATE (zt)
     366       16561 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
     367             :       !  call timing(iproc,'PSolv_comput  ','OF')
     368       16561 :    END SUBROUTINE P_PoissonSolver
     369             : 
     370             : ! **************************************************************************************************
     371             : !> \brief ...
     372             : !> \param j3 ...
     373             : !> \param nfft ...
     374             : !> \param Jp2stb ...
     375             : !> \param J2stb ...
     376             : !> \param lot ...
     377             : !> \param n1 ...
     378             : !> \param md2 ...
     379             : !> \param nd3 ...
     380             : !> \param nproc ...
     381             : !> \param zmpi1 ...
     382             : !> \param zw ...
     383             : ! **************************************************************************************************
     384      161869 :    SUBROUTINE P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
     385             :       INTEGER, INTENT(in)                                :: j3, nfft
     386             :       INTEGER, INTENT(inout)                             :: Jp2stb, J2stb
     387             :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
     388             :       REAL(KIND=dp), &
     389             :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
     390             :          INTENT(in)                                      :: zmpi1
     391             :       REAL(KIND=dp), DIMENSION(2, lot, n1), &
     392             :          INTENT(inout)                                   :: zw
     393             : 
     394             :       INTEGER                                            :: I1, J2, Jp2, mfft
     395             : 
     396      161869 :       mfft = 0
     397      340096 :       DO Jp2 = Jp2stb, nproc
     398     3697451 :          DO J2 = J2stb, md2/nproc
     399     3519224 :             mfft = mfft + 1
     400     3519224 :             IF (mfft .GT. nfft) THEN
     401       12841 :                Jp2stb = Jp2
     402       12841 :                J2stb = J2
     403       12841 :                RETURN
     404             :             END IF
     405    93233601 :             DO I1 = 1, n1
     406    89548991 :                zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
     407    93055374 :                zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
     408             :             END DO
     409             :          END DO
     410      327255 :          J2stb = 1
     411             :       END DO
     412             :    END SUBROUTINE P_mpiswitch_upcorn
     413             : 
     414             : ! **************************************************************************************************
     415             : !> \brief ...
     416             : !> \param nfft ...
     417             : !> \param n2 ...
     418             : !> \param lot ...
     419             : !> \param n1 ...
     420             : !> \param lzt ...
     421             : !> \param zt ...
     422             : !> \param zw ...
     423             : ! **************************************************************************************************
     424      161869 :    SUBROUTINE P_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
     425             :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
     426             :       REAL(KIND=dp), DIMENSION(2, lzt, n1), INTENT(in)   :: zt
     427             :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     428             :          INTENT(inout)                                   :: zw
     429             : 
     430             :       INTEGER                                            :: i, j
     431             : 
     432     3668252 :       DO j = 1, nfft
     433    93217243 :          DO i = 1, n2
     434    89548991 :             zw(1, j, i) = zt(1, i, j)
     435    93055374 :             zw(2, j, i) = zt(2, i, j)
     436             :          END DO
     437             :       END DO
     438             : 
     439      161869 :    END SUBROUTINE P_switch_upcorn
     440             : 
     441             : ! **************************************************************************************************
     442             : !> \brief ...
     443             : !> \param nfft ...
     444             : !> \param n2 ...
     445             : !> \param lot ...
     446             : !> \param n1 ...
     447             : !> \param lzt ...
     448             : !> \param zw ...
     449             : !> \param zt ...
     450             : ! **************************************************************************************************
     451      161869 :    SUBROUTINE P_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
     452             :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
     453             :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zw
     454             :       REAL(KIND=dp), DIMENSION(2, lzt, n1), &
     455             :          INTENT(inout)                                   :: zt
     456             : 
     457             :       INTEGER                                            :: i, j
     458             : 
     459     3668252 :       DO j = 1, nfft
     460    93217243 :          DO i = 1, n2
     461    89548991 :             zt(1, i, j) = zw(1, j, i)
     462    93055374 :             zt(2, i, j) = zw(2, j, i)
     463             :          END DO
     464             :       END DO
     465             : 
     466      161869 :    END SUBROUTINE P_unswitch_downcorn
     467             : 
     468             : ! **************************************************************************************************
     469             : !> \brief ...
     470             : !> \param j3 ...
     471             : !> \param nfft ...
     472             : !> \param Jp2stf ...
     473             : !> \param J2stf ...
     474             : !> \param lot ...
     475             : !> \param n1 ...
     476             : !> \param md2 ...
     477             : !> \param nd3 ...
     478             : !> \param nproc ...
     479             : !> \param zw ...
     480             : !> \param zmpi1 ...
     481             : ! **************************************************************************************************
     482      161869 :    SUBROUTINE P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
     483             :       INTEGER, INTENT(in)                                :: j3, nfft
     484             :       INTEGER, INTENT(inout)                             :: Jp2stf, J2stf
     485             :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
     486             :       REAL(KIND=dp), DIMENSION(2, lot, n1), INTENT(in)   :: zw
     487             :       REAL(KIND=dp), &
     488             :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
     489             :          INTENT(inout)                                   :: zmpi1
     490             : 
     491             :       INTEGER                                            :: I1, J2, Jp2, mfft
     492             : 
     493      161869 :       mfft = 0
     494      340096 :       DO Jp2 = Jp2stf, nproc
     495     3697451 :          DO J2 = J2stf, md2/nproc
     496     3519224 :             mfft = mfft + 1
     497     3519224 :             IF (mfft .GT. nfft) THEN
     498       12841 :                Jp2stf = Jp2
     499       12841 :                J2stf = J2
     500       12841 :                RETURN
     501             :             END IF
     502    93233601 :             DO I1 = 1, n1
     503    89548991 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
     504    93055374 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
     505             :             END DO
     506             :          END DO
     507      327255 :          J2stf = 1
     508             :       END DO
     509             :    END SUBROUTINE P_unmpiswitch_downcorn
     510             : 
     511             : ! **************************************************************************************************
     512             : !> \brief (Based on suitable modifications of S.Goedecker routines)
     513             : !>      Restore data into output array
     514             : !> \param md1 Dimensions of the undistributed part of the real grid
     515             : !> \param md3 Dimensions of the undistributed part of the real grid
     516             : !> \param lot ...
     517             : !> \param nfft number of planes
     518             : !> \param n3 (twice the) dimension of the last FFTtransform
     519             : !> \param zw FFT work array
     520             : !> \param zf Original distributed density as well as
     521             : !>                   Distributed solution of the poisson equation (inout)
     522             : !> \param scal Needed to achieve unitarity and correct dimensions
     523             : !> \date February 2006
     524             : !> \author S. Goedecker, L. Genovese
     525             : !> \note Assuming that high frequencies are in the corners
     526             : !>      and that n3 is multiple of 4
     527             : !>
     528             : !>  RESTRICTIONS on USAGE
     529             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     530             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     531             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     532             : !>      This file is distributed under the terms of the
     533             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     534             : ! **************************************************************************************************
     535      298171 :    SUBROUTINE P_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
     536             :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
     537             :       REAL(KIND=dp), DIMENSION(2, lot, n3), INTENT(in)   :: zw
     538             :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
     539             :       REAL(KIND=dp), INTENT(in)                          :: scal
     540             : 
     541             :       INTEGER                                            :: i1, i3
     542             :       REAL(KIND=dp)                                      :: pot1
     543             : 
     544     7275604 :       DO i3 = 1, n3
     545   174441101 :          DO i1 = 1, nfft
     546   167165497 :             pot1 = scal*zw(1, i1, i3)
     547   174142930 :             zf(i1, i3) = pot1
     548             :          END DO
     549             :       END DO
     550             : 
     551      298171 :    END SUBROUTINE P_unfill_downcorn
     552             : 
     553             : ! **************************************************************************************************
     554             : !> \brief ...
     555             : !> \param md1 ...
     556             : !> \param md3 ...
     557             : !> \param lot ...
     558             : !> \param nfft ...
     559             : !> \param n3 ...
     560             : !> \param zf ...
     561             : !> \param zw ...
     562             : ! **************************************************************************************************
     563      298171 :    SUBROUTINE P_fill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
     564             :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
     565             :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(in)     :: zf
     566             :       REAL(KIND=dp), DIMENSION(2, lot, n3), &
     567             :          INTENT(inout)                                   :: zw
     568             : 
     569             :       INTEGER                                            :: i1, i3
     570             : 
     571     7275604 :       DO i3 = 1, n3
     572   174441101 :          DO i1 = 1, nfft
     573   167165497 :             zw(1, i1, i3) = zf(i1, i3)
     574   174142930 :             zw(2, i1, i3) = 0._dp
     575             :          END DO
     576             :       END DO
     577             : 
     578      298171 :    END SUBROUTINE P_fill_upcorn
     579             : 
     580             : ! **************************************************************************************************
     581             : !> \brief (Based on suitable modifications of S.Goedecker routines)
     582             : !>      Assign the correct planes to the work array zmpi2
     583             : !>      in order to prepare for interprocessor data transposition.
     584             : !> \param i1 Starting points of the plane and number of remaining lines
     585             : !> \param j2 Starting points of the plane and number of remaining lines
     586             : !> \param lot Starting points of the plane and number of remaining lines
     587             : !> \param nfft Starting points of the plane and number of remaining lines
     588             : !> \param n1 logical dimension of the FFT transform, reference for work arrays
     589             : !> \param n3 logical dimension of the FFT transform, reference for work arrays
     590             : !> \param md2 Dimensions of real grid
     591             : !> \param nproc ...
     592             : !> \param nd3 Dimensions of the kernel
     593             : !> \param zw Work array (input)
     594             : !> \param zmpi2 Work array for multiprocessor manipulation (output)
     595             : !> \date February 2006
     596             : !> \author S. Goedecker, L. Genovese
     597             : !> \note
     598             : !>  RESTRICTIONS on USAGE
     599             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     600             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     601             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     602             : !>      This file is distributed under the terms of the
     603             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     604             : ! **************************************************************************************************
     605      298171 :    SUBROUTINE scramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2)
     606             :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
     607             :                                                             nd3
     608             :       REAL(KIND=dp), DIMENSION(2, lot, n3), INTENT(in)   :: zw
     609             :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
     610             :          INTENT(inout)                                   :: zmpi2
     611             : 
     612             :       INTEGER                                            :: i, i3
     613             : 
     614     4057058 :       DO i3 = 1, n3/2 + 1
     615    93606049 :          DO i = 0, nfft - 1
     616    89548991 :             zmpi2(1, i1 + i, j2, i3) = zw(1, i + 1, i3)
     617    93307878 :             zmpi2(2, i1 + i, j2, i3) = zw(2, i + 1, i3)
     618             :          END DO
     619             :       END DO
     620             : 
     621      298171 :    END SUBROUTINE scramble_P
     622             : 
     623             : ! **************************************************************************************************
     624             : !> \brief (Based on suitable modifications of S.Goedecker routines)
     625             : !>      Insert the correct planes of the work array zmpi2
     626             : !>      in order to prepare for backward FFT transform
     627             : !> \param i1 Starting points of the plane and number of remaining lines
     628             : !> \param j2 Starting points of the plane and number of remaining lines
     629             : !> \param lot Starting points of the plane and number of remaining lines
     630             : !> \param nfft Starting points of the plane and number of remaining lines
     631             : !> \param n1 logical dimension of the FFT transform, reference for work arrays
     632             : !> \param n3 logical dimension of the FFT transform, reference for work arrays
     633             : !> \param md2 Dimensions of real grid
     634             : !> \param nproc ...
     635             : !> \param nd3 Dimensions of the kernel
     636             : !> \param zmpi2 Work array for multiprocessor manipulation (output)
     637             : !> \param zw Work array (input)
     638             : !> \date February 2006
     639             : !> \author S. Goedecker, L. Genovese
     640             : !> \note
     641             : !>  RESTRICTIONS on USAGE
     642             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     643             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     644             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     645             : !>      This file is distributed under the terms of the
     646             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     647             : ! **************************************************************************************************
     648      298171 :    SUBROUTINE unscramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw)
     649             :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
     650             :                                                             nd3
     651             :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
     652             :          INTENT(in)                                      :: zmpi2
     653             :       REAL(KIND=dp), DIMENSION(2, lot, n3), &
     654             :          INTENT(inout)                                   :: zw
     655             : 
     656             :       INTEGER                                            :: i, i3, j3
     657             : 
     658      298171 :       i3 = 1
     659     6788632 :       DO i = 0, nfft - 1
     660     6490461 :          zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
     661     6788632 :          zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
     662             :       END DO
     663             : 
     664     3758887 :       DO i3 = 2, n3/2 + 1
     665     3460716 :          j3 = n3 + 2 - i3
     666    86817417 :          DO i = 0, nfft - 1
     667    83058530 :             zw(1, i + 1, j3) = zmpi2(1, i1 + i, j2, i3)
     668    83058530 :             zw(2, i + 1, j3) = -zmpi2(2, i1 + i, j2, i3)
     669    83058530 :             zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
     670    86519246 :             zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
     671             :          END DO
     672             :       END DO
     673             : 
     674      298171 :    END SUBROUTINE unscramble_P
     675             : 
     676             : ! **************************************************************************************************
     677             : !> \brief (Based on suitable modifications of S.Goedecker routines)
     678             : !>      Multiply with the kernel taking into account its symmetry
     679             : !>      Conceived to be used into convolution loops
     680             : !> \param n1 ...
     681             : !> \param n2 ...
     682             : !> \param n3 ...
     683             : !> \param lot ...
     684             : !> \param nfft ...
     685             : !> \param jS ...
     686             : !> \param i3 ...
     687             : !> \param zw Work array (input/output)
     688             : !>      n1,n2:      logical dimension of the FFT transform, reference for zw
     689             : !>      nd1,nd2:    Dimensions of POT
     690             : !>      jS,j3,nfft: starting point of the plane and number of remaining lines
     691             : !>
     692             : !>  RESTRICTIONS on USAGE
     693             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     694             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     695             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     696             : !>      This file is distributed under the terms of the
     697             : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     698             : !> \param hx ...
     699             : !> \param hy ...
     700             : !> \param hz ...
     701             : !> \date February 2006
     702             : !> \author S. Goedecker, L. Genovese
     703             : ! **************************************************************************************************
     704      161869 :    SUBROUTINE P_multkernel(n1, n2, n3, lot, nfft, jS, i3, zw, hx, hy, hz)
     705             :       INTEGER, INTENT(in)                                :: n1, n2, n3, lot, nfft, jS, i3
     706             :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     707             :          INTENT(inout)                                   :: zw
     708             :       REAL(KIND=dp), INTENT(in)                          :: hx, hy, hz
     709             : 
     710             :       INTEGER                                            :: i1, i2, j1, j2, j3
     711             :       REAL(KIND=dp)                                      :: fourpi2, ker, mu3, p1, p2, pi
     712             : 
     713      161869 :       pi = 4._dp*ATAN(1._dp)
     714      161869 :       fourpi2 = 4._dp*pi**2
     715      161869 :       j3 = i3 !n3/2+1-abs(n3/2+2-i3)
     716      161869 :       mu3 = REAL(j3 - 1, KIND=dp)/REAL(n3, KIND=dp)
     717      161869 :       mu3 = (mu3/hy)**2 !beware of the exchanged dimension
     718             :       !Body
     719             :       !generic case
     720     3920756 :       DO i2 = 1, n2
     721    93469747 :          DO i1 = 1, nfft
     722    89548991 :             j1 = i1 + jS - 1
     723    89548991 :             j1 = j1 - (j1/(n1/2 + 2))*n1 !n1/2+1-abs(n1/2+2-jS-i1)
     724    89548991 :             j2 = i2 - (i2/(n2/2 + 2))*n2 !n2/2+1-abs(n2/2+1-i2)
     725    89548991 :             p1 = REAL(j1 - 1, KIND=dp)/REAL(n1, KIND=dp)
     726    89548991 :             p2 = REAL(j2 - 1, KIND=dp)/REAL(n2, KIND=dp)
     727    89548991 :             ker = -fourpi2*((p1/hx)**2 + (p2/hz)**2 + mu3) !beware of the exchanged dimension
     728    89548991 :             IF (ker /= 0._dp) ker = 1._dp/ker
     729    89548991 :             zw(1, i1, i2) = zw(1, i1, i2)*ker
     730    93307878 :             zw(2, i1, i2) = zw(2, i1, i2)*ker
     731             :          END DO
     732             :       END DO
     733             : 
     734      161869 :    END SUBROUTINE P_multkernel
     735             : 
     736             : ! **************************************************************************************************
     737             : !> \brief (Based on suitable modifications of S.Goedecker routines)
     738             : !>      Multiply with the kernel taking into account its symmetry
     739             : !>      Conceived to be used into convolution loops
     740             : !> \param nd1 ...
     741             : !> \param nd2 ...
     742             : !> \param n1 ...
     743             : !> \param n2 ...
     744             : !> \param lot ...
     745             : !> \param nfft ...
     746             : !> \param jS ...
     747             : !> \param pot Kernel, symmetric and real, half the length
     748             : !> \param zw Work array (input/output)
     749             : !>      n1,n2:    logical dimension of the FFT transform, reference for zw
     750             : !>      nd1,nd2:  Dimensions of POT
     751             : !>      jS, nfft: starting point of the plane and number of remaining lines
     752             : !>
     753             : !>  RESTRICTIONS on USAGE
     754             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     755             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     756             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     757             : !>      This file is distributed under the terms of the
     758             : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     759             : !> \date February 2006
     760             : !> \author S. Goedecker, L. Genovese
     761             : ! **************************************************************************************************
     762     1701650 :    SUBROUTINE multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
     763             :       INTEGER, INTENT(in)                                :: nd1, nd2, n1, n2, lot, nfft, jS
     764             :       REAL(KIND=dp), DIMENSION(nd1, nd2), INTENT(in)     :: pot
     765             :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     766             :          INTENT(inout)                                   :: zw
     767             : 
     768             :       INTEGER                                            :: i2, j, j1, j2
     769             : 
     770    34628440 :       DO j = 1, nfft
     771    32926790 :          j1 = j + jS - 1
     772    32926790 :          j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     773    32926790 :          zw(1, j, 1) = zw(1, j, 1)*pot(j1, 1)
     774    34628440 :          zw(2, j, 1) = zw(2, j, 1)*pot(j1, 1)
     775             :       END DO
     776             : 
     777             :       !generic case
     778    82273442 :       DO i2 = 2, n2/2
     779  1521396758 :          DO j = 1, nfft
     780  1439123316 :             j1 = j + jS - 1
     781  1439123316 :             j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     782  1439123316 :             j2 = n2 + 2 - i2
     783  1439123316 :             zw(1, j, i2) = zw(1, j, i2)*pot(j1, i2)
     784  1439123316 :             zw(2, j, i2) = zw(2, j, i2)*pot(j1, i2)
     785  1439123316 :             zw(1, j, j2) = zw(1, j, j2)*pot(j1, i2)
     786  1519695108 :             zw(2, j, j2) = zw(2, j, j2)*pot(j1, i2)
     787             :          END DO
     788             :       END DO
     789             : 
     790             :       !case i2=n2/2+1
     791    34628440 :       DO j = 1, nfft
     792    32926790 :          j1 = j + jS - 1
     793    32926790 :          j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     794    32926790 :          j2 = n2/2 + 1
     795    32926790 :          zw(1, j, j2) = zw(1, j, j2)*pot(j1, j2)
     796    34628440 :          zw(2, j, j2) = zw(2, j, j2)*pot(j1, j2)
     797             :       END DO
     798             : 
     799     1701650 :    END SUBROUTINE multkernel
     800             : 
     801             : ! **************************************************************************************************
     802             : !> \brief !HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
     803             : !> ****h* BigDFT/S_PoissonSolver
     804             : !>      (Based on suitable modifications of S.Goedecker routines)
     805             : !>      Applies the local FFT space Kernel to the density in Real space.
     806             : !>      Does NOT calculate the LDA exchange-correlation terms
     807             : !> \param n1 logical dimension of the transform.
     808             : !> \param n2 logical dimension of the transform.
     809             : !> \param n3 logical dimension of the transform.
     810             : !> \param nd1 Dimension of POT
     811             : !> \param nd2 Dimension of POT
     812             : !> \param nd3 Dimension of POT
     813             : !> \param md1 Dimension of ZF
     814             : !> \param md2 Dimension of ZF
     815             : !> \param md3 Dimension of ZF
     816             : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
     817             : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
     818             : !> \param pot Kernel, only the distributed part (REAL)
     819             : !>                   POT(i1,i2,i3)
     820             : !>                   i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
     821             : !> \param zf Density (input/output)
     822             : !>                   ZF(i1,i3,i2)
     823             : !>                   i1=1,md1 , i2=1,md2/nproc , i3=1,md3
     824             : !> \param scal factor of renormalization of the FFT in order to acheve unitarity
     825             : !>                   and the correct dimension
     826             : !> \param mpi_group ...
     827             : !> \date October 2006
     828             : !> \author S. Goedecker, L. Genovese
     829             : !> \note   As transform lengths most products of the prime factors 2,3,5 are allowed.
     830             : !>                   The detailed table with allowed transform lengths can
     831             : !>                   be found in subroutine CTRIG
     832             : !> \note
     833             : !>  RESTRICTIONS on USAGE
     834             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     835             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     836             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     837             : !>      This file is distributed under the terms of the
     838             : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     839             : ! **************************************************************************************************
     840          18 :    SUBROUTINE S_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
     841             :                               scal, mpi_group)
     842             :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
     843             :                                                             md3, nproc, iproc
     844             :       REAL(KIND=dp), DIMENSION(nd1, nd2, nd3/nproc), &
     845             :          INTENT(in)                                      :: pot
     846             :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
     847             :          INTENT(inout)                                   :: zf
     848             :       REAL(KIND=dp), INTENT(in)                          :: scal
     849             : 
     850             :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
     851             : 
     852             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'S_PoissonSolver'
     853             :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
     854             : 
     855             :       INTEGER                                            :: handle, i, i1, i3, ic1, ic2, ic3, inzee, &
     856             :                                                             j, j2, J2stb, J2stf, j3, Jp2stb, &
     857             :                                                             Jp2stf, lot, lzt, ma, mb, ncache, nfft
     858          18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
     859          18 :                                                             before2, before3, now1, now2, now3
     860             :       REAL(kind=dp)                                      :: twopion
     861          18 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, cosinarr, &
     862          18 :                                                             ftrig1, ftrig2, ftrig3
     863          18 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
     864             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
     865             :       REAL(KIND=dp), ALLOCATABLE, &
     866          18 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
     867             : 
     868          18 :       CALL timeset(routineN, handle)
     869             :       ! check input
     870          18 :       IF (MOD(n3, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n3")
     871          18 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
     872          18 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
     873          18 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
     874          18 :       IF (md1 .LT. n1) CPABORT("Parallel convolution:ERROR:md1")
     875          18 :       IF (md2 .LT. n2) CPABORT("Parallel convolution:ERROR:md2")
     876          18 :       IF (md3 .LT. n3/2) CPABORT("Parallel convolution:ERROR:md3")
     877          18 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
     878          18 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
     879             : 
     880             :       !defining work arrays dimensions
     881          18 :       ncache = ncache_optimal
     882          18 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
     883             : 
     884             :       !  if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
     885             : 
     886          18 :       lzt = n2
     887          18 :       IF (MOD(n2, 2) .EQ. 0) lzt = lzt + 1
     888          18 :       IF (MOD(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
     889             : 
     890             :       !Allocations
     891          18 :       ALLOCATE (btrig1(2, ctrig_length))
     892          18 :       ALLOCATE (ftrig1(2, ctrig_length))
     893          18 :       ALLOCATE (after1(7))
     894          18 :       ALLOCATE (now1(7))
     895          18 :       ALLOCATE (before1(7))
     896          18 :       ALLOCATE (btrig2(2, ctrig_length))
     897          18 :       ALLOCATE (ftrig2(2, ctrig_length))
     898          18 :       ALLOCATE (after2(7))
     899          18 :       ALLOCATE (now2(7))
     900          18 :       ALLOCATE (before2(7))
     901          18 :       ALLOCATE (btrig3(2, ctrig_length))
     902          18 :       ALLOCATE (ftrig3(2, ctrig_length))
     903          18 :       ALLOCATE (after3(7))
     904          18 :       ALLOCATE (now3(7))
     905          18 :       ALLOCATE (before3(7))
     906          72 :       ALLOCATE (zw(2, ncache/4, 2))
     907          72 :       ALLOCATE (zt(2, lzt, n1))
     908          90 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
     909          72 :       ALLOCATE (cosinarr(2, n3/2))
     910          18 :       IF (nproc .GT. 1) THEN
     911         108 :          ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
     912     4437270 :          zmpi1 = 0.0_dp
     913             :       END IF
     914     4437234 :       zmpi2 = 0.0_dp
     915      221238 :       zw = 0.0_dp
     916      161370 :       zt = 0.0_dp
     917             : 
     918             :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
     919          18 :       CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
     920          18 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
     921          18 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
     922         990 :       DO j = 1, n1
     923         972 :          ftrig1(1, j) = btrig1(1, j)
     924         990 :          ftrig1(2, j) = -btrig1(2, j)
     925             :       END DO
     926         990 :       DO j = 1, n2
     927         972 :          ftrig2(1, j) = btrig2(1, j)
     928         990 :          ftrig2(2, j) = -btrig2(2, j)
     929             :       END DO
     930         990 :       DO j = 1, n3/2
     931         972 :          ftrig3(1, j) = btrig3(1, j)
     932         990 :          ftrig3(2, j) = -btrig3(2, j)
     933             :       END DO
     934             : 
     935             :       !Calculating array of phases for HalFFT decoding
     936          18 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
     937         990 :       DO i3 = 1, n3/2
     938         972 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
     939         990 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
     940             :       END DO
     941             : 
     942             :       !initializing integral
     943             :       !ehartree=0._dp
     944             : 
     945             :       ! transform along z axis
     946          18 :       lot = ncache/(2*n3)
     947          18 :       IF (lot .LT. 1) THEN
     948             :          WRITE (*, *) &
     949             :             'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     950             :             'least one 1-d FFT of this size even though this will'// &
     951           0 :             'reduce the performance for shorter transform lengths', n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc
     952           0 :          CPABORT("")
     953             :       END IF
     954             : 
     955         504 :       DO j2 = 1, md2/nproc
     956             :          !this condition ensures that we manage only the interesting part for the FFT
     957         504 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     958        1458 :             DO i1 = 1, n1, lot
     959         972 :                ma = i1
     960         972 :                mb = MIN(i1 + (lot - 1), n1)
     961         972 :                nfft = mb - ma + 1
     962             : 
     963             :                !inserting real data into complex array of half length
     964         972 :                CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
     965             : 
     966             :                !performing FFT
     967             :                !input: I1,I3,J2,(Jp2)
     968         972 :                inzee = 1
     969        3888 :                DO i = 1, ic3
     970             :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     971        2916 :                               btrig3, after3(i), now3(i), before3(i), 1)
     972        3888 :                   inzee = 3 - inzee
     973             :                END DO
     974             :                !output: I1,i3,J2,(Jp2)
     975             :                !unpacking FFT in order to restore correct result,
     976             :                !while exchanging components
     977             :                !input: I1,i3,J2,(Jp2)
     978        1458 :                CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
     979             :                !output: I1,J2,i3,(Jp2)
     980             :             END DO
     981             :          END IF
     982             :       END DO
     983             :       !Interprocessor data transposition
     984             :       !input: I1,J2,j3,jp3,(Jp2)
     985          18 :       IF (nproc .GT. 1) THEN
     986          18 :          CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
     987             :       END IF
     988             :       !output: I1,J2,j3,Jp2,(jp3)
     989             : 
     990             :       !now each process perform complete convolution of its planes
     991         522 :       DO j3 = 1, nd3/nproc
     992             :          !this condition ensures that we manage only the interesting part for the FFT
     993         522 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
     994         495 :             Jp2stb = 1
     995         495 :             J2stb = 1
     996         495 :             Jp2stf = 1
     997         495 :             J2stf = 1
     998             : 
     999             :             ! transform along x axis
    1000         495 :             lot = ncache/(4*n1)
    1001         495 :             IF (lot .LT. 1) THEN
    1002             :                WRITE (*, *) &
    1003             :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
    1004             :                   'least one 1-d FFT of this size even though this will'// &
    1005           0 :                   'reduce the performance for shorter transform lengths'
    1006           0 :                CPABORT("")
    1007             :             END IF
    1008             : 
    1009        1485 :             DO j = 1, n2, lot
    1010         990 :                ma = j
    1011         990 :                mb = MIN(j + (lot - 1), n2)
    1012         990 :                nfft = mb - ma + 1
    1013             : 
    1014             :                !reverse index ordering, leaving the planes to be transformed at the end
    1015             :                !input: I1,J2,j3,Jp2,(jp3)
    1016         990 :                IF (nproc .EQ. 1) THEN
    1017           0 :                   CALL S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1018             :                ELSE
    1019         990 :                   CALL S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1020             :                END IF
    1021             :                !output: J2,Jp2,I1,j3,(jp3)
    1022             : 
    1023             :                !performing FFT
    1024             :                !input: I2,I1,j3,(jp3)
    1025         990 :                inzee = 1
    1026        2970 :                DO i = 1, ic1 - 1
    1027             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1028        1980 :                               btrig1, after1(i), now1(i), before1(i), 1)
    1029        2970 :                   inzee = 3 - inzee
    1030             :                END DO
    1031             : 
    1032             :                !storing the last step into zt array
    1033         990 :                i = ic1
    1034             :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1035        1485 :                            btrig1, after1(i), now1(i), before1(i), 1)
    1036             :                !output: I2,i1,j3,(jp3)
    1037             :             END DO
    1038             : 
    1039             :             !transform along y axis
    1040         495 :             lot = ncache/(4*n2)
    1041         495 :             IF (lot .LT. 1) THEN
    1042             :                WRITE (*, *) &
    1043             :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
    1044             :                   'least one 1-d FFT of this size even though this will'// &
    1045           0 :                   'reduce the performance for shorter transform lengths'
    1046           0 :                CPABORT("")
    1047             :             END IF
    1048             : 
    1049        1485 :             DO j = 1, n1, lot
    1050         990 :                ma = j
    1051         990 :                mb = MIN(j + (lot - 1), n1)
    1052         990 :                nfft = mb - ma + 1
    1053             : 
    1054             :                !reverse ordering
    1055             :                !input: I2,i1,j3,(jp3)
    1056         990 :                CALL S_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1057             :                !output: i1,I2,j3,(jp3)
    1058             : 
    1059             :                !performing FFT
    1060             :                !input: i1,I2,j3,(jp3)
    1061         990 :                inzee = 1
    1062        3960 :                DO i = 1, ic2
    1063             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1064        2970 :                               btrig2, after2(i), now2(i), before2(i), 1)
    1065        3960 :                   inzee = 3 - inzee
    1066             :                END DO
    1067             :                !output: i1,i2,j3,(jp3)
    1068             : 
    1069             :                !Multiply with kernel in fourier space
    1070         990 :                CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
    1071             : 
    1072             :                !TRANSFORM BACK IN REAL SPACE
    1073             : 
    1074             :                !transform along y axis
    1075             :                !input: i1,i2,j3,(jp3)
    1076        3960 :                DO i = 1, ic2
    1077             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1078        2970 :                               ftrig2, after2(i), now2(i), before2(i), -1)
    1079        3960 :                   inzee = 3 - inzee
    1080             :                END DO
    1081             : 
    1082             :                !reverse ordering
    1083             :                !input: i1,I2,j3,(jp3)
    1084        1485 :                CALL S_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
    1085             :                !output: I2,i1,j3,(jp3)
    1086             :             END DO
    1087             : 
    1088             :             !transform along x axis
    1089             :             !input: I2,i1,j3,(jp3)
    1090         495 :             lot = ncache/(4*n1)
    1091        1485 :             DO j = 1, n2, lot
    1092         990 :                ma = j
    1093         990 :                mb = MIN(j + (lot - 1), n2)
    1094         990 :                nfft = mb - ma + 1
    1095             : 
    1096             :                !performing FFT
    1097         990 :                i = 1
    1098             :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
    1099         990 :                            ftrig1, after1(i), now1(i), before1(i), -1)
    1100             : 
    1101         990 :                inzee = 1
    1102        2970 :                DO i = 2, ic1
    1103             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1104        1980 :                               ftrig1, after1(i), now1(i), before1(i), -1)
    1105        2970 :                   inzee = 3 - inzee
    1106             :                END DO
    1107             :                !output: I2,I1,j3,(jp3)
    1108             : 
    1109             :                !reverse ordering
    1110             :                !input: J2,Jp2,I1,j3,(jp3)
    1111        1485 :                IF (nproc .EQ. 1) THEN
    1112           0 :                   CALL S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
    1113             :                ELSE
    1114         990 :                   CALL S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
    1115             :                END IF
    1116             :                ! output: I1,J2,j3,Jp2,(jp3)
    1117             :             END DO
    1118             :          END IF
    1119             :       END DO
    1120             : 
    1121             :       !Interprocessor data transposition
    1122             :       !input: I1,J2,j3,Jp2,(jp3)
    1123          18 :       IF (nproc .GT. 1) THEN
    1124             :          !communication scheduling
    1125          18 :          CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
    1126             :       END IF
    1127             : 
    1128             :       !output: I1,J2,j3,jp3,(Jp2)
    1129             : 
    1130             :       !transform along z axis
    1131             :       !input: I1,J2,i3,(Jp2)
    1132          18 :       lot = ncache/(2*n3)
    1133         504 :       DO j2 = 1, md2/nproc
    1134             :          !this condition ensures that we manage only the interesting part for the FFT
    1135         504 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
    1136        1458 :             DO i1 = 1, n1, lot
    1137         972 :                ma = i1
    1138         972 :                mb = MIN(i1 + (lot - 1), n1)
    1139         972 :                nfft = mb - ma + 1
    1140             : 
    1141             :                !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
    1142             :                !input: I1,J2,i3,(Jp2)
    1143         972 :                CALL unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
    1144             :                !output: I1,i3,J2,(Jp2)
    1145             : 
    1146             :                !performing FFT
    1147             :                !input: I1,i3,J2,(Jp2)
    1148         972 :                inzee = 1
    1149        3888 :                DO i = 1, ic3
    1150             :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1151        2916 :                               ftrig3, after3(i), now3(i), before3(i), -1)
    1152        3888 :                   inzee = 3 - inzee
    1153             :                END DO
    1154             :                !output: I1,I3,J2,(Jp2)
    1155             : 
    1156             :                !rebuild the output array
    1157             :                CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
    1158        1458 :                                     , scal)
    1159             : 
    1160             :                !integrate local pieces together
    1161             :                !ehartree=ehartree+0.5_dp*ehartreetmp*hx*hy*hz
    1162             :             END DO
    1163             :          END IF
    1164             :       END DO
    1165             : 
    1166             :       !De-allocations
    1167          18 :       DEALLOCATE (btrig1)
    1168          18 :       DEALLOCATE (ftrig1)
    1169          18 :       DEALLOCATE (after1)
    1170          18 :       DEALLOCATE (now1)
    1171          18 :       DEALLOCATE (before1)
    1172          18 :       DEALLOCATE (btrig2)
    1173          18 :       DEALLOCATE (ftrig2)
    1174          18 :       DEALLOCATE (after2)
    1175          18 :       DEALLOCATE (now2)
    1176          18 :       DEALLOCATE (before2)
    1177          18 :       DEALLOCATE (btrig3)
    1178          18 :       DEALLOCATE (ftrig3)
    1179          18 :       DEALLOCATE (after3)
    1180          18 :       DEALLOCATE (now3)
    1181          18 :       DEALLOCATE (before3)
    1182          18 :       DEALLOCATE (zmpi2)
    1183          18 :       DEALLOCATE (zw)
    1184          18 :       DEALLOCATE (zt)
    1185          18 :       DEALLOCATE (cosinarr)
    1186          18 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
    1187             : 
    1188             :       !  call timing(iproc,'PSolv_comput  ','OF')
    1189          18 :       CALL timestop(handle)
    1190          36 :    END SUBROUTINE S_PoissonSolver
    1191             : 
    1192             : ! **************************************************************************************************
    1193             : !> \brief ...
    1194             : !> \param j3 ...
    1195             : !> \param nfft ...
    1196             : !> \param Jp2stb ...
    1197             : !> \param J2stb ...
    1198             : !> \param lot ...
    1199             : !> \param n1 ...
    1200             : !> \param md2 ...
    1201             : !> \param nd3 ...
    1202             : !> \param nproc ...
    1203             : !> \param zmpi1 ...
    1204             : !> \param zw ...
    1205             : ! **************************************************************************************************
    1206         990 :    SUBROUTINE S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
    1207             :       INTEGER, INTENT(in)                                :: j3, nfft
    1208             :       INTEGER, INTENT(inout)                             :: Jp2stb, J2stb
    1209             :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
    1210             :       REAL(KIND=dp), &
    1211             :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
    1212             :          INTENT(in)                                      :: zmpi1
    1213             :       REAL(KIND=dp), DIMENSION(2, lot, n1), &
    1214             :          INTENT(inout)                                   :: zw
    1215             : 
    1216             :       INTEGER                                            :: I1, J2, Jp2, mfft
    1217             : 
    1218         990 :       mfft = 0
    1219        1980 :       DO Jp2 = Jp2stb, nproc
    1220       28215 :          DO J2 = J2stb, md2/nproc
    1221       27225 :             mfft = mfft + 1
    1222       27225 :             IF (mfft .GT. nfft) THEN
    1223         495 :                Jp2stb = Jp2
    1224         495 :                J2stb = J2
    1225         495 :                RETURN
    1226             :             END IF
    1227     1471140 :             DO I1 = 1, n1
    1228     1443420 :                zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
    1229     1470150 :                zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
    1230             :             END DO
    1231             :          END DO
    1232        1485 :          J2stb = 1
    1233             :       END DO
    1234             :    END SUBROUTINE S_mpiswitch_upcorn
    1235             : 
    1236             : ! **************************************************************************************************
    1237             : !> \brief ...
    1238             : !> \param nfft ...
    1239             : !> \param n2 ...
    1240             : !> \param lot ...
    1241             : !> \param n1 ...
    1242             : !> \param lzt ...
    1243             : !> \param zt ...
    1244             : !> \param zw ...
    1245             : ! **************************************************************************************************
    1246         990 :    SUBROUTINE S_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
    1247             :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
    1248             :       REAL(KIND=dp), DIMENSION(2, lzt, n1), INTENT(in)   :: zt
    1249             :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
    1250             :          INTENT(inout)                                   :: zw
    1251             : 
    1252             :       INTEGER                                            :: i, j
    1253             : 
    1254       27720 :       DO j = 1, nfft
    1255     1471140 :          DO i = 1, n2
    1256     1443420 :             zw(1, j, i) = zt(1, i, j)
    1257     1470150 :             zw(2, j, i) = zt(2, i, j)
    1258             :          END DO
    1259             :       END DO
    1260         990 :    END SUBROUTINE S_switch_upcorn
    1261             : 
    1262             : ! **************************************************************************************************
    1263             : !> \brief ...
    1264             : !> \param nfft ...
    1265             : !> \param n2 ...
    1266             : !> \param lot ...
    1267             : !> \param n1 ...
    1268             : !> \param lzt ...
    1269             : !> \param zw ...
    1270             : !> \param zt ...
    1271             : ! **************************************************************************************************
    1272         990 :    SUBROUTINE S_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
    1273             :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
    1274             :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zw
    1275             :       REAL(KIND=dp), DIMENSION(2, lzt, n1), &
    1276             :          INTENT(inout)                                   :: zt
    1277             : 
    1278             :       INTEGER                                            :: i, j
    1279             : 
    1280       27720 :       DO j = 1, nfft
    1281     1471140 :          DO i = 1, n2
    1282     1443420 :             zt(1, i, j) = zw(1, j, i)
    1283     1470150 :             zt(2, i, j) = zw(2, j, i)
    1284             :          END DO
    1285             :       END DO
    1286         990 :    END SUBROUTINE S_unswitch_downcorn
    1287             : 
    1288             : ! **************************************************************************************************
    1289             : !> \brief ...
    1290             : !> \param j3 ...
    1291             : !> \param nfft ...
    1292             : !> \param Jp2stf ...
    1293             : !> \param J2stf ...
    1294             : !> \param lot ...
    1295             : !> \param n1 ...
    1296             : !> \param md2 ...
    1297             : !> \param nd3 ...
    1298             : !> \param nproc ...
    1299             : !> \param zw ...
    1300             : !> \param zmpi1 ...
    1301             : ! **************************************************************************************************
    1302         990 :    SUBROUTINE S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
    1303             :       INTEGER, INTENT(in)                                :: j3, nfft
    1304             :       INTEGER, INTENT(inout)                             :: Jp2stf, J2stf
    1305             :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
    1306             :       REAL(KIND=dp), DIMENSION(2, lot, n1), INTENT(in)   :: zw
    1307             :       REAL(KIND=dp), &
    1308             :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
    1309             :          INTENT(inout)                                   :: zmpi1
    1310             : 
    1311             :       INTEGER                                            :: I1, J2, Jp2, mfft
    1312             : 
    1313         990 :       mfft = 0
    1314        1980 :       DO Jp2 = Jp2stf, nproc
    1315       28215 :          DO J2 = J2stf, md2/nproc
    1316       27225 :             mfft = mfft + 1
    1317       27225 :             IF (mfft .GT. nfft) THEN
    1318         495 :                Jp2stf = Jp2
    1319         495 :                J2stf = J2
    1320         495 :                RETURN
    1321             :             END IF
    1322     1471140 :             DO I1 = 1, n1
    1323     1443420 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
    1324     1470150 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
    1325             :             END DO
    1326             :          END DO
    1327        1485 :          J2stf = 1
    1328             :       END DO
    1329             :    END SUBROUTINE S_unmpiswitch_downcorn
    1330             : 
    1331             : ! **************************************************************************************************
    1332             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1333             : !>      Restore data into output array, calculating in the meanwhile
    1334             : !>      Hartree energy of the potential
    1335             : !> \param md1 Dimensions of the undistributed part of the real grid
    1336             : !> \param md3 Dimensions of the undistributed part of the real grid
    1337             : !> \param lot ...
    1338             : !> \param nfft number of planes
    1339             : !> \param n3 (twice the) dimension of the last FFTtransform.
    1340             : !> \param zw FFT work array
    1341             : !> \param zf Original distributed density as well as
    1342             : !>                   Distributed solution of the poisson equation (inout)
    1343             : !> \param scal Needed to achieve unitarity and correct dimensions
    1344             : !> \date February 2006
    1345             : !> \author S. Goedecker, L. Genovese
    1346             : !> \note Assuming that high frequencies are in the corners
    1347             : !>      and that n3 is multiple of 4
    1348             : !>
    1349             : !>  RESTRICTIONS on USAGE
    1350             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1351             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1352             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1353             : !>      This file is distributed under the terms of the
    1354             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1355             : ! **************************************************************************************************
    1356      542732 :    SUBROUTINE unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
    1357             :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
    1358             :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    1359             :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
    1360             :       REAL(KIND=dp), INTENT(in)                          :: scal
    1361             : 
    1362             :       INTEGER                                            :: i1, i3
    1363             :       REAL(KIND=dp)                                      :: pot1
    1364             : 
    1365    13101356 :       DO i3 = 1, n3/4
    1366   373748418 :          DO i1 = 1, nfft
    1367   360647062 :             pot1 = scal*zw(1, i1, i3)
    1368             :             !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3-1)
    1369   360647062 :             zf(i1, 2*i3 - 1) = pot1
    1370   360647062 :             pot1 = scal*zw(2, i1, i3)
    1371             :             !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3)
    1372   373205686 :             zf(i1, 2*i3) = pot1
    1373             :          END DO
    1374             :       END DO
    1375      542732 :    END SUBROUTINE unfill_downcorn
    1376             : 
    1377             : ! **************************************************************************************************
    1378             : !> \brief ...
    1379             : !> \param md1 ...
    1380             : !> \param md3 ...
    1381             : !> \param lot ...
    1382             : !> \param nfft ...
    1383             : !> \param n3 ...
    1384             : !> \param zf ...
    1385             : !> \param zw ...
    1386             : ! **************************************************************************************************
    1387      542732 :    SUBROUTINE halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
    1388             :       INTEGER                                            :: md1, md3, lot, nfft, n3
    1389             :       REAL(KIND=dp)                                      :: zf(md1, md3), zw(2, lot, n3/2)
    1390             : 
    1391             :       INTEGER                                            :: i1, i3
    1392             : 
    1393    13101356 :       DO i3 = 1, n3/4
    1394             :          ! WARNING: Assuming that high frequencies are in the corners
    1395             :          !          and that n3 is multiple of 4
    1396             :          !in principle we can relax this condition
    1397   373748418 :          DO i1 = 1, nfft
    1398   360647062 :             zw(1, i1, i3) = 0._dp
    1399   373205686 :             zw(2, i1, i3) = 0._dp
    1400             :          END DO
    1401             :       END DO
    1402    13101356 :       DO i3 = n3/4 + 1, n3/2
    1403   373748418 :          DO i1 = 1, nfft
    1404   360647062 :             zw(1, i1, i3) = zf(i1, 2*i3 - 1 - n3/2)
    1405   373205686 :             zw(2, i1, i3) = zf(i1, 2*i3 - n3/2)
    1406             :          END DO
    1407             :       END DO
    1408             : 
    1409      542732 :    END SUBROUTINE halfill_upcorn
    1410             : 
    1411             : ! **************************************************************************************************
    1412             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1413             : !>      Assign the correct planes to the work array zmpi2
    1414             : !>      in order to prepare for interprocessor data transposition.
    1415             : !>      In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
    1416             : !>      multiplication with the kernel
    1417             : !> \param i1 Starting points of the plane and number of remaining lines
    1418             : !> \param j2 Starting points of the plane and number of remaining lines
    1419             : !> \param lot Starting points of the plane and number of remaining lines
    1420             : !> \param nfft Starting points of the plane and number of remaining lines
    1421             : !> \param n1 logical dimension of the FFT transform, reference for work arrays
    1422             : !> \param n3 logical dimension of the FFT transform, reference for work arrays
    1423             : !> \param md2 Dimensions of real grid
    1424             : !> \param nproc ...
    1425             : !> \param nd3 Dimensions of the kernel
    1426             : !> \param zw Work array (input)
    1427             : !> \param zmpi2 Work array for multiprocessor manipulation (output)
    1428             : !> \param cosinarr Array of the phases needed for unpacking
    1429             : !> \date February 2006
    1430             : !> \author S. Goedecker, L. Genovese
    1431             : !> \note
    1432             : !>  RESTRICTIONS on USAGE
    1433             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1434             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1435             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1436             : !>      This file is distributed under the terms of the
    1437             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1438             : ! **************************************************************************************************
    1439      614864 :    SUBROUTINE scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
    1440             :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
    1441             :                                                             nd3
    1442             :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    1443             :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
    1444             :          INTENT(inout)                                   :: zmpi2
    1445             :       REAL(KIND=dp), DIMENSION(2, n3/2), INTENT(in)      :: cosinarr
    1446             : 
    1447             :       INTEGER                                            :: i, i3, ind1, ind2
    1448             :       REAL(KIND=dp)                                      :: a, b, c, cp, d, feI, feR, fI, foI, foR, &
    1449             :                                                             fR, sp
    1450             : 
    1451             : !case i3=1 and i3=n3/2+1
    1452             : 
    1453    18862834 :       DO i = 0, nfft - 1
    1454    18247970 :          a = zw(1, i + 1, 1)
    1455    18247970 :          b = zw(2, i + 1, 1)
    1456    18247970 :          zmpi2(1, i1 + i, j2, 1) = a + b
    1457    18247970 :          zmpi2(2, i1 + i, j2, 1) = 0._dp
    1458    18247970 :          zmpi2(1, i1 + i, j2, n3/2 + 1) = a - b
    1459    18862834 :          zmpi2(2, i1 + i, j2, n3/2 + 1) = 0._dp
    1460             :       END DO
    1461             :       !case 2<=i3<=n3/2
    1462    29130144 :       DO i3 = 2, n3/2
    1463    28515280 :          ind1 = i3
    1464    28515280 :          ind2 = n3/2 - i3 + 2
    1465    28515280 :          cp = cosinarr(1, i3)
    1466    28515280 :          sp = cosinarr(2, i3)
    1467   856427058 :          DO i = 0, nfft - 1
    1468   827296914 :             a = zw(1, i + 1, ind1)
    1469   827296914 :             b = zw(2, i + 1, ind1)
    1470   827296914 :             c = zw(1, i + 1, ind2)
    1471   827296914 :             d = zw(2, i + 1, ind2)
    1472   827296914 :             feR = .5_dp*(a + c)
    1473   827296914 :             feI = .5_dp*(b - d)
    1474   827296914 :             foR = .5_dp*(a - c)
    1475   827296914 :             foI = .5_dp*(b + d)
    1476   827296914 :             fR = feR + cp*foI - sp*foR
    1477   827296914 :             fI = feI - cp*foR - sp*foI
    1478   827296914 :             zmpi2(1, i1 + i, j2, ind1) = fR
    1479   855812194 :             zmpi2(2, i1 + i, j2, ind1) = fI
    1480             :          END DO
    1481             :       END DO
    1482             : 
    1483      614864 :    END SUBROUTINE scramble_unpack
    1484             : 
    1485             : ! **************************************************************************************************
    1486             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1487             : !>      Insert the correct planes of the work array zmpi2
    1488             : !>      in order to prepare for backward FFT transform
    1489             : !>      In the meanwhile, it packs the data in order to be transformed with the HalFFT
    1490             : !>      procedure
    1491             : !> \param i1 Starting points of the plane and number of remaining lines
    1492             : !> \param j2 Starting points of the plane and number of remaining lines
    1493             : !> \param lot Starting points of the plane and number of remaining lines
    1494             : !> \param nfft Starting points of the plane and number of remaining lines
    1495             : !> \param n1 logical dimension of the FFT transform, reference for work arrays
    1496             : !> \param n3 logical dimension of the FFT transform, reference for work arrays
    1497             : !> \param md2 Dimensions of real grid
    1498             : !> \param nproc ...
    1499             : !> \param nd3 Dimensions of the kernel
    1500             : !> \param zmpi2 Work array for multiprocessor manipulation (output)
    1501             : !> \param zw Work array (inout)
    1502             : !> \param cosinarr Array of the phases needed for packing
    1503             : !> \date February 2006
    1504             : !> \author S. Goedecker, L. Genovese
    1505             : !> \note
    1506             : !>  RESTRICTIONS on USAGE
    1507             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1508             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1509             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1510             : !>      This file is distributed under the terms of the
    1511             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1512             : ! **************************************************************************************************
    1513      542732 :    SUBROUTINE unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
    1514             :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
    1515             :                                                             nd3
    1516             :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
    1517             :          INTENT(in)                                      :: zmpi2
    1518             :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), &
    1519             :          INTENT(inout)                                   :: zw
    1520             :       REAL(KIND=dp), DIMENSION(2, n3/2), INTENT(in)      :: cosinarr
    1521             : 
    1522             :       INTEGER                                            :: i, i3, indA, indB
    1523             :       REAL(KIND=dp)                                      :: a, b, c, cp, d, ie, ih, io, re, rh, ro, &
    1524             :                                                             sp
    1525             : 
    1526    25659980 :       DO i3 = 1, n3/2
    1527    25117248 :          indA = i3
    1528    25117248 :          indB = n3/2 + 2 - i3
    1529    25117248 :          cp = cosinarr(1, i3)
    1530    25117248 :          sp = cosinarr(2, i3)
    1531   746954104 :          DO i = 0, nfft - 1
    1532   721294124 :             a = zmpi2(1, i1 + i, j2, indA)
    1533   721294124 :             b = zmpi2(2, i1 + i, j2, indA)
    1534   721294124 :             c = zmpi2(1, i1 + i, j2, indB)
    1535   721294124 :             d = -zmpi2(2, i1 + i, j2, indB)
    1536   721294124 :             re = (a + c)
    1537   721294124 :             ie = (b + d)
    1538   721294124 :             ro = (a - c)*cp - (b - d)*sp
    1539   721294124 :             io = (a - c)*sp + (b - d)*cp
    1540   721294124 :             rh = re - io
    1541   721294124 :             ih = ie + ro
    1542   721294124 :             zw(1, i + 1, indA) = rh
    1543   746411372 :             zw(2, i + 1, indA) = ih
    1544             :          END DO
    1545             :       END DO
    1546             : 
    1547      542732 :    END SUBROUTINE unscramble_pack
    1548             : 
    1549             : ! **************************************************************************************************
    1550             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1551             : !>      Applies the local FFT space Kernel to the density in Real space.
    1552             : !>      Calculates also the LDA exchange-correlation terms
    1553             : !> \param n1 logical dimension of the transform.
    1554             : !> \param n2 logical dimension of the transform.
    1555             : !> \param n3 logical dimension of the transform.
    1556             : !> \param nd1 Dimension of POT
    1557             : !> \param nd2 Dimension of POT
    1558             : !> \param nd3 Dimension of POT
    1559             : !> \param md1 Dimension of ZF
    1560             : !> \param md2 Dimension of ZF
    1561             : !> \param md3 Dimension of ZF
    1562             : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
    1563             : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
    1564             : !> \param pot Kernel, only the distributed part (REAL)
    1565             : !>                   POT(i1,i2,i3)
    1566             : !>                   i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
    1567             : !> \param zf Density (input/output)
    1568             : !>                   ZF(i1,i3,i2)
    1569             : !>                   i1=1,md1 , i2=1,md2/nproc , i3=1,md3
    1570             : !> \param scal factor of renormalization of the FFT in order to acheve unitarity
    1571             : !>                   and the correct dimension
    1572             : !> \param mpi_group ...
    1573             : !> \date February 2006
    1574             : !> \author S. Goedecker, L. Genovese
    1575             : !> \note As transform lengths
    1576             : !>                     most products of the prime factors 2,3,5 are allowed.
    1577             : !>                   The detailed table with allowed transform lengths can
    1578             : !>                   be found in subroutine CTRIG
    1579             : !> \note
    1580             : !>  RESTRICTIONS on USAGE
    1581             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1582             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1583             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1584             : !>      This file is distributed under the terms of the
    1585             : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1586             : ! **************************************************************************************************
    1587       15250 :    SUBROUTINE F_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
    1588             :                               scal, mpi_group)
    1589             :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
    1590             :                                                             md3, nproc, iproc
    1591             :       REAL(KIND=dp), DIMENSION(nd1, nd2, nd3/nproc), &
    1592             :          INTENT(in)                                      :: pot
    1593             :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
    1594             :          INTENT(inout)                                   :: zf
    1595             :       REAL(KIND=dp), INTENT(in)                          :: scal
    1596             : 
    1597             :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
    1598             : 
    1599             :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
    1600             : 
    1601             :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
    1602             :                                                             J2stb, J2stf, j3, Jp2stb, Jp2stf, lot, &
    1603             :                                                             lzt, ma, mb, ncache, nfft
    1604       15250 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
    1605       15250 :                                                             before2, before3, now1, now2, now3
    1606             :       REAL(kind=dp)                                      :: twopion
    1607       15250 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, cosinarr, &
    1608       15250 :                                                             ftrig1, ftrig2, ftrig3
    1609       15250 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
    1610             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
    1611             :       REAL(KIND=dp), ALLOCATABLE, &
    1612       15250 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
    1613             : 
    1614       15250 :       IF (MOD(n1, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n1")
    1615       15250 :       IF (MOD(n2, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n2")
    1616       15250 :       IF (MOD(n3, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n3")
    1617       15250 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
    1618       15250 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
    1619       15250 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
    1620       15250 :       IF (md1 .LT. n1/2) CPABORT("Parallel convolution:ERROR:md1")
    1621       15250 :       IF (md2 .LT. n2/2) CPABORT("Parallel convolution:ERROR:md2")
    1622       15250 :       IF (md3 .LT. n3/2) CPABORT("Parallel convolution:ERROR:md3")
    1623       15250 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
    1624       15250 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
    1625             : 
    1626             :       !defining work arrays dimensions
    1627             : 
    1628       15250 :       ncache = ncache_optimal
    1629       15250 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
    1630       15250 :       lzt = n2/2
    1631       15250 :       IF (MOD(n2/2, 2) .EQ. 0) lzt = lzt + 1
    1632       15250 :       IF (MOD(n2/2, 4) .EQ. 0) lzt = lzt + 1
    1633             : 
    1634             :       !Allocations
    1635       15250 :       ALLOCATE (btrig1(2, ctrig_length))
    1636       15250 :       ALLOCATE (ftrig1(2, ctrig_length))
    1637       15250 :       ALLOCATE (after1(7))
    1638       15250 :       ALLOCATE (now1(7))
    1639       15250 :       ALLOCATE (before1(7))
    1640       15250 :       ALLOCATE (btrig2(2, ctrig_length))
    1641       15250 :       ALLOCATE (ftrig2(2, ctrig_length))
    1642       15250 :       ALLOCATE (after2(7))
    1643       15250 :       ALLOCATE (now2(7))
    1644       15250 :       ALLOCATE (before2(7))
    1645       15250 :       ALLOCATE (btrig3(2, ctrig_length))
    1646       15250 :       ALLOCATE (ftrig3(2, ctrig_length))
    1647       15250 :       ALLOCATE (after3(7))
    1648       15250 :       ALLOCATE (now3(7))
    1649       15250 :       ALLOCATE (before3(7))
    1650       61000 :       ALLOCATE (zw(2, ncache/4, 2))
    1651       61000 :       ALLOCATE (zt(2, lzt, n1))
    1652       76250 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
    1653  4489667096 :       zmpi2 = 1.0E99_dp ! init mpi'ed buffers to silence warnings under valgrind
    1654       61000 :       ALLOCATE (cosinarr(2, n3/2))
    1655       53940 :       IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
    1656             : 
    1657             :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
    1658       15250 :       CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
    1659       15250 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
    1660       15250 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
    1661     1146722 :       DO j = 1, n1
    1662     1131472 :          ftrig1(1, j) = btrig1(1, j)
    1663     1146722 :          ftrig1(2, j) = -btrig1(2, j)
    1664             :       END DO
    1665     1146722 :       DO j = 1, n2
    1666     1131472 :          ftrig2(1, j) = btrig2(1, j)
    1667     1146722 :          ftrig2(2, j) = -btrig2(2, j)
    1668             :       END DO
    1669     1169930 :       DO j = 1, n3
    1670     1154680 :          ftrig3(1, j) = btrig3(1, j)
    1671     1169930 :          ftrig3(2, j) = -btrig3(2, j)
    1672             :       END DO
    1673             : 
    1674             :       !Calculating array of phases for HalFFT decoding
    1675       15250 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
    1676      592590 :       DO i3 = 1, n3/2
    1677      577340 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
    1678      592590 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
    1679             :       END DO
    1680             : 
    1681             :       ! transform along z axis
    1682       15250 :       lot = ncache/(2*n3)
    1683       15250 :       IF (lot .LT. 1) THEN
    1684             :          WRITE (*, *) &
    1685             :             'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1686             :             'least one 1-d FFT of this size even though this will'// &
    1687           0 :             'reduce the performance for shorter transform lengths'
    1688           0 :          CPABORT("")
    1689             :       END IF
    1690             : 
    1691      411022 :       DO j2 = 1, md2/nproc
    1692             :          !this condition ensures that we manage only the interesting part for the FFT
    1693      411022 :          IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
    1694      936466 :             DO i1 = 1, (n1/2), lot
    1695      541760 :                ma = i1
    1696      541760 :                mb = MIN(i1 + (lot - 1), (n1/2))
    1697      541760 :                nfft = mb - ma + 1
    1698             : 
    1699             :                !inserting real data into complex array of half length
    1700      541760 :                CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
    1701             : 
    1702             :                !performing FFT
    1703             :                !input: I1,I3,J2,(Jp2)
    1704      541760 :                inzee = 1
    1705     1912076 :                DO i = 1, ic3
    1706             :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1707     1370316 :                               btrig3, after3(i), now3(i), before3(i), 1)
    1708     1912076 :                   inzee = 3 - inzee
    1709             :                END DO
    1710             :                !output: I1,i3,J2,(Jp2)
    1711             : 
    1712             :                !unpacking FFT in order to restore correct result,
    1713             :                !while exchanging components
    1714             :                !input: I1,i3,J2,(Jp2)
    1715      936466 :                CALL scramble_unpack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
    1716             :                !output: I1,J2,i3,(Jp2)
    1717             :             END DO
    1718             :          END IF
    1719             :       END DO
    1720             : 
    1721             :       !Interprocessor data transposition
    1722             :       !input: I1,J2,j3,jp3,(Jp2)
    1723       15250 :       IF (nproc .GT. 1) THEN
    1724             :          !communication scheduling
    1725        7738 :          CALL mpi_group%alltoall(zmpi2, zmpi1, n1*(md2/nproc)*(nd3/nproc))
    1726             :       END IF
    1727             :       !output: I1,J2,j3,Jp2,(jp3)
    1728             : 
    1729             :       !now each process perform complete convolution of its planes
    1730      433432 :       DO j3 = 1, nd3/nproc
    1731             :          !this condition ensures that we manage only the interesting part for the FFT
    1732      433432 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
    1733      414313 :             Jp2stb = 1
    1734      414313 :             J2stb = 1
    1735      414313 :             Jp2stf = 1
    1736      414313 :             J2stf = 1
    1737             : 
    1738             :             ! transform along x axis
    1739      414313 :             lot = ncache/(4*n1)
    1740      414313 :             IF (lot .LT. 1) THEN
    1741             :                WRITE (*, *) &
    1742             :                   'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1743             :                   'least one 1-d FFT of this size even though this will'// &
    1744           0 :                   'reduce the performance for shorter transform lengths'
    1745           0 :                CPABORT("")
    1746             :             END IF
    1747             : 
    1748     1329862 :             DO j = 1, n2/2, lot
    1749      915549 :                ma = j
    1750      915549 :                mb = MIN(j + (lot - 1), n2/2)
    1751      915549 :                nfft = mb - ma + 1
    1752             : 
    1753             :                !reverse index ordering, leaving the planes to be transformed at the end
    1754             :                !input: I1,J2,j3,Jp2,(jp3)
    1755      915549 :                IF (nproc .EQ. 1) THEN
    1756      412311 :                   CALL mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1757             :                ELSE
    1758      503238 :                   CALL mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1759             :                END IF
    1760             :                !output: J2,Jp2,I1,j3,(jp3)
    1761             : 
    1762             :                !performing FFT
    1763             :                !input: I2,I1,j3,(jp3)
    1764      915549 :                inzee = 1
    1765     2910285 :                DO i = 1, ic1 - 1
    1766             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1767     1994736 :                               btrig1, after1(i), now1(i), before1(i), 1)
    1768     2910285 :                   inzee = 3 - inzee
    1769             :                END DO
    1770             : 
    1771             :                !storing the last step into zt array
    1772      915549 :                i = ic1
    1773             :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1774     1329862 :                            btrig1, after1(i), now1(i), before1(i), 1)
    1775             :                !output: I2,i1,j3,(jp3)
    1776             :             END DO
    1777             : 
    1778             :             !transform along y axis
    1779      414313 :             lot = ncache/(4*n2)
    1780      414313 :             IF (lot .LT. 1) THEN
    1781             :                WRITE (*, *) &
    1782             :                   'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1783             :                   'least one 1-d FFT of this size even though this will'// &
    1784           0 :                   'reduce the performance for shorter transform lengths'
    1785           0 :                CPABORT("")
    1786             :             END IF
    1787             : 
    1788     2114973 :             DO j = 1, n1, lot
    1789     1700660 :                ma = j
    1790     1700660 :                mb = MIN(j + (lot - 1), n1)
    1791     1700660 :                nfft = mb - ma + 1
    1792             : 
    1793             :                !reverse ordering
    1794             :                !input: I2,i1,j3,(jp3)
    1795     1700660 :                CALL switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1796             :                !output: i1,I2,j3,(jp3)
    1797             : 
    1798             :                !performing FFT
    1799             :                !input: i1,I2,j3,(jp3)
    1800     1700660 :                inzee = 1
    1801     7139300 :                DO i = 1, ic2
    1802             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1803     5438640 :                               btrig2, after2(i), now2(i), before2(i), 1)
    1804     7139300 :                   inzee = 3 - inzee
    1805             :                END DO
    1806             :                !output: i1,i2,j3,(jp3)
    1807             : 
    1808             :                !Multiply with kernel in fourier space
    1809     1700660 :                CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
    1810             : 
    1811             :                !TRANSFORM BACK IN REAL SPACE
    1812             : 
    1813             :                !transform along y axis
    1814             :                !input: i1,i2,j3,(jp3)
    1815     7139300 :                DO i = 1, ic2
    1816             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1817     5438640 :                               ftrig2, after2(i), now2(i), before2(i), -1)
    1818     7139300 :                   inzee = 3 - inzee
    1819             :                END DO
    1820             : 
    1821             :                !reverse ordering
    1822             :                !input: i1,I2,j3,(jp3)
    1823     2114973 :                CALL unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
    1824             :                !output: I2,i1,j3,(jp3)
    1825             :             END DO
    1826             : 
    1827             :             !transform along x axis
    1828             :             !input: I2,i1,j3,(jp3)
    1829      414313 :             lot = ncache/(4*n1)
    1830     1329862 :             DO j = 1, n2/2, lot
    1831      915549 :                ma = j
    1832      915549 :                mb = MIN(j + (lot - 1), n2/2)
    1833      915549 :                nfft = mb - ma + 1
    1834             : 
    1835             :                !performing FFT
    1836      915549 :                i = 1
    1837             :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
    1838      915549 :                            ftrig1, after1(i), now1(i), before1(i), -1)
    1839             : 
    1840      915549 :                inzee = 1
    1841     2910285 :                DO i = 2, ic1
    1842             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1843     1994736 :                               ftrig1, after1(i), now1(i), before1(i), -1)
    1844     2910285 :                   inzee = 3 - inzee
    1845             :                END DO
    1846             :                !output: I2,I1,j3,(jp3)
    1847             : 
    1848             :                !reverse ordering
    1849             :                !input: J2,Jp2,I1,j3,(jp3)
    1850     1329862 :                IF (nproc .EQ. 1) THEN
    1851      412311 :                   CALL unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
    1852             :                ELSE
    1853      503238 :                   CALL unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
    1854             :                END IF
    1855             :                ! output: I1,J2,j3,Jp2,(jp3)
    1856             :             END DO
    1857             :          END IF
    1858             :       END DO
    1859             : 
    1860             :       !Interprocessor data transposition
    1861             :       !input: I1,J2,j3,Jp2,(jp3)
    1862       15250 :       IF (nproc .GT. 1) THEN
    1863             :          !communication scheduling
    1864        7738 :          CALL mpi_group%alltoall(zmpi1, zmpi2, n1*(md2/nproc)*(nd3/nproc))
    1865             :          !output: I1,J2,j3,jp3,(Jp2)
    1866             :       END IF
    1867             : 
    1868             :       !transform along z axis
    1869             :       !input: I1,J2,i3,(Jp2)
    1870       15250 :       lot = ncache/(2*n3)
    1871      411022 :       DO j2 = 1, md2/nproc
    1872             :          !this condition ensures that we manage only the interesting part for the FFT
    1873      411022 :          IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
    1874      936466 :             DO i1 = 1, (n1/2), lot
    1875      541760 :                ma = i1
    1876      541760 :                mb = MIN(i1 + (lot - 1), (n1/2))
    1877      541760 :                nfft = mb - ma + 1
    1878             : 
    1879             :                !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
    1880             :                !input: I1,J2,i3,(Jp2)
    1881      541760 :                CALL unscramble_pack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
    1882             :                !output: I1,i3,J2,(Jp2)
    1883             : 
    1884             :                !performing FFT
    1885             :                !input: I1,i3,J2,(Jp2)
    1886      541760 :                inzee = 1
    1887     1912076 :                DO i = 1, ic3
    1888             :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1889     1370316 :                               ftrig3, after3(i), now3(i), before3(i), -1)
    1890     1912076 :                   inzee = 3 - inzee
    1891             :                END DO
    1892             :                !output: I1,I3,J2,(Jp2)
    1893             : 
    1894             :                !calculates the exchange correlation terms locally and rebuild the output array
    1895             :                CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
    1896      936466 :                                     , scal) !,ehartreetmp)
    1897             : 
    1898             :                !integrate local pieces together
    1899             :                !ehartree=ehartree+0.5_dp*ehartreetmp*hgrid**3
    1900             :             END DO
    1901             :          END IF
    1902             :       END DO
    1903             : 
    1904             :       !De-allocations
    1905       15250 :       DEALLOCATE (btrig1)
    1906       15250 :       DEALLOCATE (ftrig1)
    1907       15250 :       DEALLOCATE (after1)
    1908       15250 :       DEALLOCATE (now1)
    1909       15250 :       DEALLOCATE (before1)
    1910       15250 :       DEALLOCATE (btrig2)
    1911       15250 :       DEALLOCATE (ftrig2)
    1912       15250 :       DEALLOCATE (after2)
    1913       15250 :       DEALLOCATE (now2)
    1914       15250 :       DEALLOCATE (before2)
    1915       15250 :       DEALLOCATE (btrig3)
    1916       15250 :       DEALLOCATE (ftrig3)
    1917       15250 :       DEALLOCATE (after3)
    1918       15250 :       DEALLOCATE (now3)
    1919       15250 :       DEALLOCATE (before3)
    1920       15250 :       DEALLOCATE (zmpi2)
    1921       15250 :       DEALLOCATE (zw)
    1922       15250 :       DEALLOCATE (zt)
    1923       15250 :       DEALLOCATE (cosinarr)
    1924       15250 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
    1925             : 
    1926       15250 :    END SUBROUTINE F_PoissonSolver
    1927             : 
    1928             : ! **************************************************************************************************
    1929             : !> \brief ...
    1930             : !> \param nfft ...
    1931             : !> \param n2 ...
    1932             : !> \param lot ...
    1933             : !> \param n1 ...
    1934             : !> \param lzt ...
    1935             : !> \param zt ...
    1936             : !> \param zw ...
    1937             : ! **************************************************************************************************
    1938     1700660 :    SUBROUTINE switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
    1939             :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    1940             :       REAL(KIND=dp)                                      :: zt(2, lzt, n1), zw(2, lot, n2)
    1941             : 
    1942             :       INTEGER                                            :: i, j
    1943             : 
    1944             : ! WARNING: Assuming that high frequencies are in the corners
    1945             : !          and that n2 is multiple of 2
    1946             : ! Low frequencies
    1947             : 
    1948    34600720 :       DO j = 1, nfft
    1949  1505929116 :          DO i = n2/2 + 1, n2
    1950  1471328396 :             zw(1, j, i) = zt(1, i - n2/2, j)
    1951  1504228456 :             zw(2, j, i) = zt(2, i - n2/2, j)
    1952             :          END DO
    1953             :       END DO
    1954             :       ! High frequencies
    1955    83947372 :       DO i = 1, n2/2
    1956  1555275768 :          DO j = 1, nfft
    1957  1471328396 :             zw(1, j, i) = 0._dp
    1958  1553575108 :             zw(2, j, i) = 0._dp
    1959             :          END DO
    1960             :       END DO
    1961     1700660 :    END SUBROUTINE switch_upcorn
    1962             : 
    1963             : ! **************************************************************************************************
    1964             : !> \brief ...
    1965             : !> \param j3 ...
    1966             : !> \param nfft ...
    1967             : !> \param Jp2stb ...
    1968             : !> \param J2stb ...
    1969             : !> \param lot ...
    1970             : !> \param n1 ...
    1971             : !> \param md2 ...
    1972             : !> \param nd3 ...
    1973             : !> \param nproc ...
    1974             : !> \param zmpi1 ...
    1975             : !> \param zw ...
    1976             : ! **************************************************************************************************
    1977      915549 :    SUBROUTINE mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
    1978             :       INTEGER                                            :: j3, nfft, Jp2stb, J2stb, lot, n1, md2, &
    1979             :                                                             nd3, nproc
    1980             :       REAL(KIND=dp) :: zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
    1981             : 
    1982             :       INTEGER                                            :: i1, j2, jp2, mfft
    1983             : 
    1984             : ! WARNING: Assuming that high frequencies are in the corners
    1985             : !          and that n1 is multiple of 2
    1986             : 
    1987      915549 :       mfft = 0
    1988     1468055 :       Main: DO Jp2 = Jp2stb, nproc
    1989    17543856 :          DO J2 = J2stb, md2/nproc
    1990    16991350 :             mfft = mfft + 1
    1991    16991350 :             IF (mfft .GT. nfft) THEN
    1992      541320 :                Jp2stb = Jp2
    1993      541320 :                J2stb = J2
    1994      541320 :                EXIT Main
    1995             :             END IF
    1996   752114228 :             DO I1 = 1, n1/2
    1997   735664198 :                zw(1, mfft, I1) = 0._dp
    1998   752114228 :                zw(2, mfft, I1) = 0._dp
    1999             :             END DO
    2000   752666734 :             DO I1 = n1/2 + 1, n1
    2001   735664198 :                zw(1, mfft, I1) = zmpi1(1, I1 - n1/2, J2, j3, Jp2)
    2002   752114228 :                zw(2, mfft, I1) = zmpi1(2, I1 - n1/2, J2, j3, Jp2)
    2003             :             END DO
    2004             :          END DO
    2005      926735 :          J2stb = 1
    2006             :       END DO Main
    2007      915549 :    END SUBROUTINE mpiswitch_upcorn
    2008             : 
    2009             : ! **************************************************************************************************
    2010             : !> \brief ...
    2011             : !> \param nfft ...
    2012             : !> \param n2 ...
    2013             : !> \param lot ...
    2014             : !> \param n1 ...
    2015             : !> \param lzt ...
    2016             : !> \param zw ...
    2017             : !> \param zt ...
    2018             : ! **************************************************************************************************
    2019     1700660 :    SUBROUTINE unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
    2020             :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    2021             :       REAL(KIND=dp)                                      :: zw(2, lot, n2), zt(2, lzt, n1)
    2022             : 
    2023             :       INTEGER                                            :: i, j
    2024             : 
    2025             : ! WARNING: Assuming that high frequencies are in the corners
    2026             : !          and that n2 is multiple of 2
    2027             : ! Low frequencies
    2028             : 
    2029    34600720 :       DO j = 1, nfft
    2030  1505929116 :          DO i = 1, n2/2
    2031  1471328396 :             zt(1, i, j) = zw(1, j, i)
    2032  1504228456 :             zt(2, i, j) = zw(2, j, i)
    2033             :          END DO
    2034             :       END DO
    2035     1700660 :       RETURN
    2036             :    END SUBROUTINE unswitch_downcorn
    2037             : 
    2038             : ! **************************************************************************************************
    2039             : !> \brief ...
    2040             : !> \param j3 ...
    2041             : !> \param nfft ...
    2042             : !> \param Jp2stf ...
    2043             : !> \param J2stf ...
    2044             : !> \param lot ...
    2045             : !> \param n1 ...
    2046             : !> \param md2 ...
    2047             : !> \param nd3 ...
    2048             : !> \param nproc ...
    2049             : !> \param zw ...
    2050             : !> \param zmpi1 ...
    2051             : ! **************************************************************************************************
    2052      915549 :    SUBROUTINE unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
    2053             :       INTEGER                                            :: j3, nfft, Jp2stf, J2stf, lot, n1, md2, &
    2054             :                                                             nd3, nproc
    2055             :       REAL(KIND=dp) :: zw(2, lot, n1), zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc)
    2056             : 
    2057             :       INTEGER                                            :: i1, j2, jp2, mfft
    2058             : 
    2059             : ! WARNING: Assuming that high frequencies are in the corners
    2060             : !          and that n1 is multiple of 2
    2061             : 
    2062      915549 :       mfft = 0
    2063     1468055 :       Main: DO Jp2 = Jp2stf, nproc
    2064    17543856 :          DO J2 = J2stf, md2/nproc
    2065    16991350 :             mfft = mfft + 1
    2066    16991350 :             IF (mfft .GT. nfft) THEN
    2067      541320 :                Jp2stf = Jp2
    2068      541320 :                J2stf = J2
    2069      541320 :                EXIT Main
    2070             :             END IF
    2071   752666734 :             DO I1 = 1, n1/2
    2072   735664198 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
    2073   752114228 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
    2074             :             END DO
    2075             :          END DO
    2076      926735 :          J2stf = 1
    2077             :       END DO Main
    2078      915549 :    END SUBROUTINE unmpiswitch_downcorn
    2079             : 
    2080             : ! **************************************************************************************************
    2081             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    2082             : !>      Restore data into output array, calculating in the meanwhile
    2083             : !>      Hartree energy of the potential
    2084             : !> \param md1 Dimensions of the undistributed part of the real grid
    2085             : !> \param md3 Dimensions of the undistributed part of the real grid
    2086             : !> \param lot ...
    2087             : !> \param nfft number of planes
    2088             : !> \param n3 (twice the) dimension of the last FFTtransform.
    2089             : !> \param zw FFT work array
    2090             : !> \param zf Original distributed density as well as
    2091             : !>                   Distributed solution of the poisson equation (inout)
    2092             : !> \param scal Needed to achieve unitarity and correct dimensions
    2093             : !> \param ehartreetmp Hartree energy
    2094             : !> \date February 2006
    2095             : !> \author S. Goedecker, L. Genovese
    2096             : !> \note Assuming that high frequencies are in the corners
    2097             : !>      and that n3 is multiple of 4
    2098             : !>
    2099             : !>  RESTRICTIONS on USAGE
    2100             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    2101             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    2102             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    2103             : !>      This file is distributed under the terms of the
    2104             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    2105             : ! **************************************************************************************************
    2106           0 :    SUBROUTINE F_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal, ehartreetmp)
    2107             :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
    2108             :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    2109             :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
    2110             :       REAL(KIND=dp), INTENT(in)                          :: scal
    2111             :       REAL(KIND=dp), INTENT(out)                         :: ehartreetmp
    2112             : 
    2113             :       INTEGER                                            :: i1, i3
    2114             :       REAL(KIND=dp)                                      :: pot1
    2115             : 
    2116           0 :       ehartreetmp = 0._dp
    2117           0 :       DO i3 = 1, n3/4
    2118           0 :          DO i1 = 1, nfft
    2119           0 :             pot1 = scal*zw(1, i1, i3)
    2120           0 :             ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3 - 1)
    2121           0 :             zf(i1, 2*i3 - 1) = pot1
    2122           0 :             pot1 = scal*zw(2, i1, i3)
    2123           0 :             ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3)
    2124           0 :             zf(i1, 2*i3) = pot1
    2125             :          END DO
    2126             :       END DO
    2127           0 :    END SUBROUTINE F_unfill_downcorn
    2128             : 
    2129             : END MODULE ps_wavelet_base

Generated by: LCOV version 1.15