LCOV - code coverage report
Current view: top level - src/pw - ps_wavelet_kernel.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 725 733 98.9 %
Date: 2024-12-21 06:28:57 Functions: 12 12 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
      10             : !> \author Florian Schiffmann (09.2007,fschiff)
      11             : ! **************************************************************************************************
      12             : MODULE ps_wavelet_kernel
      13             : 
      14             :    USE kinds,                           ONLY: dp
      15             :    USE message_passing,                 ONLY: mp_comm_type
      16             :    USE ps_wavelet_base,                 ONLY: scramble_unpack
      17             :    USE ps_wavelet_fft3d,                ONLY: ctrig,&
      18             :                                               ctrig_length,&
      19             :                                               fftstp
      20             :    USE ps_wavelet_scaling_function,     ONLY: scaling_function,&
      21             :                                               scf_recursion
      22             :    USE ps_wavelet_util,                 ONLY: F_FFT_dimensions,&
      23             :                                               S_FFT_dimensions
      24             : #include "../base/base_uses.f90"
      25             : 
      26             :    IMPLICIT NONE
      27             : 
      28             :    PRIVATE
      29             : 
      30             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_kernel'
      31             : 
      32             : ! *** Public data types ***
      33             : 
      34             :    PUBLIC :: createKernel
      35             : 
      36             : CONTAINS
      37             : 
      38             : ! **************************************************************************************************
      39             : !> \brief Allocate a pointer which corresponds to the zero-padded FFT slice needed for
      40             : !>     calculating the convolution with the kernel expressed in the interpolating scaling
      41             : !>     function basis. The kernel pointer is unallocated on input, allocated on output.
      42             : !> \param geocode Indicates the boundary conditions (BC) of the problem:
      43             : !>             'F' free BC, isolated systems.
      44             : !>                 The program calculates the solution as if the given density is
      45             : !>                 "alone" in R^3 space.
      46             : !>             'S' surface BC, isolated in y direction, periodic in xz plane
      47             : !>                 The given density is supposed to be periodic in the xz plane,
      48             : !>                 so the dimensions in these direction mus be compatible with the FFT
      49             : !>                 Beware of the fact that the isolated direction is y!
      50             : !>             'P' periodic BC.
      51             : !>                 The density is supposed to be periodic in all the three directions,
      52             : !>                 then all the dimensions must be compatible with the FFT.
      53             : !>                 No need for setting up the kernel.
      54             : !> \param n01 dimensions of the real space grid to be hit with the Poisson Solver
      55             : !> \param n02 dimensions of the real space grid to be hit with the Poisson Solver
      56             : !> \param n03 dimensions of the real space grid to be hit with the Poisson Solver
      57             : !> \param hx  grid spacings. For the isolated BC case for the moment they are supposed to
      58             : !>                 be equal in the three directions
      59             : !> \param hy grid spacings. For the isolated BC case for the moment they are supposed to
      60             : !>                 be equal in the three directions
      61             : !> \param hz grid spacings. For the isolated BC case for the moment they are supposed to
      62             : !>                 be equal in the three directions
      63             : !> \param itype_scf order of the interpolating scaling functions used in the decomposition
      64             : !> \param iproc ,nproc: number of process, number of processes
      65             : !> \param nproc ...
      66             : !> \param kernel pointer for the kernel FFT. Unallocated on input, allocated on output.
      67             : !>                 Its dimensions are equivalent to the region of the FFT space for which the
      68             : !>                 kernel is injective. This will divide by two each direction,
      69             : !>                 since the kernel for the zero-padded convolution is real and symmetric.
      70             : !> \param mpi_group ...
      71             : !> \date February 2007
      72             : !> \author Luigi Genovese
      73             : !> \note Due to the fact that the kernel dimensions are unknown before the calling, the kernel
      74             : !>     must be declared as pointer in input of this routine.
      75             : !>     To avoid that, one can properly define the kernel dimensions by adding
      76             : !>     the nd1,nd2,nd3 arguments to the PS_dim4allocation routine, then eliminating the pointer
      77             : !>     declaration.
      78             : ! **************************************************************************************************
      79         840 :    SUBROUTINE createKernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
      80             : 
      81             :       CHARACTER(len=1), INTENT(in)                       :: geocode
      82             :       INTEGER, INTENT(in)                                :: n01, n02, n03
      83             :       REAL(KIND=dp), INTENT(in)                          :: hx, hy, hz
      84             :       INTEGER, INTENT(in)                                :: itype_scf, iproc, nproc
      85             :       REAL(KIND=dp), POINTER                             :: kernel(:)
      86             : 
      87             :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
      88             : 
      89             :       INTEGER                                            :: m1, m2, m3, md1, md2, md3, n1, n2, n3, &
      90             :                                                             nd1, nd2, nd3, nlimd, nlimk
      91             :       REAL(KIND=dp)                                      :: hgrid
      92             : 
      93         840 :       hgrid = MAX(hx, hy, hz)
      94             : 
      95         840 :       IF (geocode == 'P') THEN
      96             : 
      97             :          CALL F_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
      98         312 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
      99             : 
     100         312 :          ALLOCATE (kernel(1))
     101         312 :          nlimd = n2
     102         624 :          nlimk = 0
     103             : 
     104         528 :       ELSE IF (geocode == 'S') THEN
     105             : 
     106             :          CALL S_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
     107           2 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
     108             : 
     109           6 :          ALLOCATE (kernel(nd1*nd2*nd3/nproc))
     110             : 
     111             :          !the kernel must be built and scattered to all the processes
     112             : 
     113             :          CALL Surfaces_Kernel(n1, n2, n3, m3, nd1, nd2, nd3, hx, hz, hy, &
     114           2 :                               itype_scf, kernel, iproc, nproc, mpi_group)
     115             :          !last plane calculated for the density and the kernel
     116             : 
     117           2 :          nlimd = n2
     118           4 :          nlimk = n3/2 + 1
     119         526 :       ELSE IF (geocode == 'F') THEN
     120             : 
     121             :          !Build the Kernel
     122             : 
     123             :          CALL F_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
     124         526 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
     125        1578 :          ALLOCATE (kernel(nd1*nd2*nd3/nproc))
     126             : 
     127             :          !the kernel must be built and scattered to all the processes
     128             :          CALL Free_Kernel(n01, n02, n03, n1, n2, n3, nd1, nd2, nd3, hgrid, &
     129         526 :                           itype_scf, iproc, nproc, kernel, mpi_group)
     130             : 
     131             :          !last plane calculated for the density and the kernel
     132         526 :          nlimd = n2/2
     133        1052 :          nlimk = n3/2 + 1
     134             : 
     135             :       ELSE
     136             : 
     137           0 :          CPABORT("No wavelet based poisson solver for given geometry")
     138             : 
     139             :       END IF
     140             : !!!  IF (iproc==0) THEN
     141             : !!!     write(*,*)'done.'
     142             : !!!     write(*,'(1x,a,i0)') 'Allocate words for kernel ',nd1*nd2*nd3/nproc
     143             : !!!     !print the load balancing of the different dimensions on screen
     144             : !!!     write(*,'(1x,a,3(i5))')'Grid Dimensions:',n01,n02,n03
     145             : !!!     if (nproc > 1) then
     146             : !!!        write(*,'(1x,a,3(i5),a,3(i5),a,3(i5))')&
     147             : !!!             'Memory occ. per proc.  Density',md1,md3,md2/nproc,'   Kernel',nd1,nd2,nd3/nproc
     148             : !!!        write(*,'(1x,a)')'Load Balancing--------------------------------------------'
     149             : !!!        jhd=1000
     150             : !!!        jzd=1000
     151             : !!!        npd=0
     152             : !!!        load_balancing: do jproc=0,nproc-1
     153             : !!!           !print *,'jproc,jfull=',jproc,jproc*md2/nproc,(jproc+1)*md2/nproc
     154             : !!!           if ((jproc+1)*md2/nproc <= nlimd) then
     155             : !!!              jfd=jproc
     156             : !!!           else if (jproc*md2/nproc <= nlimd) then
     157             : !!!              jhd=jproc
     158             : !!!              npd=nint(real(nlimd-(jproc)*md2/nproc,KIND=dp)/real(md2/nproc,KIND=dp)*100._dp)
     159             : !!!           else
     160             : !!!              jzd=jproc
     161             : !!!              exit load_balancing
     162             : !!!           end if
     163             : !!!        end do load_balancing
     164             : !!!        write(*,'(1x,a,i3,a)')'LB_den        : processors   0  -',jfd,' work at 100%'
     165             : !!!        if (jfd < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhd,&
     166             : !!!             '    work at ',npd,'%'
     167             : !!!        if (jhd < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',&
     168             : !!!             jzd,'  -',nproc-1,' work at   0%'
     169             : !!!        jhk=1000
     170             : !!!        jzk=1000
     171             : !!!        npk=0
     172             : !!!        if (geocode /= 'P') then
     173             : !!!           load_balancingk: do jproc=0,nproc-1
     174             : !!!              !print *,'jproc,jfull=',jproc,jproc*nd3/nproc,(jproc+1)*nd3/nproc
     175             : !!!              if ((jproc+1)*nd3/nproc <= nlimk) then
     176             : !!!                 jfk=jproc
     177             : !!!              else if (jproc*nd3/nproc <= nlimk) then
     178             : !!!                 jhk=jproc
     179             : !!!                 npk=nint(real(nlimk-(jproc)*nd3/nproc,KIND=dp)/real(nd3/nproc,KIND=dp)*100._dp)
     180             : !!!              else
     181             : !!!                 jzk=jproc
     182             : !!!                 exit load_balancingk
     183             : !!!              end if
     184             : !!!           end do load_balancingk
     185             : !!!           write(*,'(1x,a,i3,a)')'LB_ker        : processors   0  -',jfk,' work at 100%'
     186             : !!!           if (jfk < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhk,&
     187             : !!!                '    work at ',npk,'%'
     188             : !!!           if (jhk < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',jzk,'  -',nproc-1,&
     189             : !!!                ' work at   0%'
     190             : !!!        end if
     191             : !!!        write(*,'(1x,a)')'The LB per processor is 1/3 LB_den + 2/3 LB_ker-----------'
     192             : !!!     end if
     193             : !!!
     194             : !!!  END IF
     195         840 :    END SUBROUTINE createKernel
     196             : 
     197             : ! **************************************************************************************************
     198             : !> \brief Build the kernel of the Poisson operator with
     199             : !>     surfaces Boundary conditions
     200             : !>     in an interpolating scaling functions basis.
     201             : !>     Beware of the fact that the nonperiodic direction is y!
     202             : !> \param n1 Dimensions for the FFT
     203             : !> \param n2 Dimensions for the FFT
     204             : !> \param n3 Dimensions for the FFT
     205             : !> \param m3 Actual dimension in non-periodic direction
     206             : !> \param nker1 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     207             : !> \param nker2 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     208             : !> \param nker3 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     209             : !> \param h1 Mesh steps in the three dimensions
     210             : !> \param h2 Mesh steps in the three dimensions
     211             : !> \param h3 Mesh steps in the three dimensions
     212             : !> \param itype_scf Order of the scaling function
     213             : !> \param karray output array
     214             : !> \param iproc Number of process
     215             : !> \param nproc number of processes
     216             : !> \param mpi_group ...
     217             : !> \date October 2006
     218             : !> \author L. Genovese
     219             : ! **************************************************************************************************
     220           2 :    SUBROUTINE Surfaces_Kernel(n1, n2, n3, m3, nker1, nker2, nker3, h1, h2, h3, &
     221           2 :                               itype_scf, karray, iproc, nproc, mpi_group)
     222             : 
     223             :       INTEGER, INTENT(in)                                :: n1, n2, n3, m3, nker1, nker2, nker3
     224             :       REAL(KIND=dp), INTENT(in)                          :: h1, h2, h3
     225             :       INTEGER, INTENT(in)                                :: itype_scf, nproc, iproc
     226             :       REAL(KIND=dp), &
     227             :          DIMENSION(nker1, nker2, nker3/nproc), &
     228             :          INTENT(out)                                     :: karray
     229             :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
     230             : 
     231             :       INTEGER, PARAMETER                                 :: n_points = 2**6, ncache_optimal = 8*1024
     232             : 
     233             :       INTEGER :: i, i1, i2, i3, ic, iend, imu, ind1, ind2, inzee, ipolyord, ireim, istart, j2, &
     234             :          j2nd, j2st, jnd1, jp2, jreim, n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, &
     235             :          shift
     236           2 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after, before, now
     237             :       REAL(kind=dp)                                      :: a, b, c, cp, d, diff, dx, feI, feR, foI, &
     238             :                                                             foR, fR, mu1, pi, pion, ponx, pony, &
     239             :                                                             sp, value, x
     240           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: kernel_scf, x_scf, y_scf
     241           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig, cossinarr
     242           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: halfft_cache, kernel
     243           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: kernel_mpi
     244             :       REAL(KIND=dp), DIMENSION(9, 8)                     :: cpol
     245             : 
     246             : !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
     247             : !  include "perfdata.inc"
     248             : !FFT arrays
     249             : !coefficients for the polynomial interpolation
     250             : !assign the values of the coefficients
     251             : 
     252       45530 :       karray = 0.0_dp
     253         162 :       cpol(:, :) = 1._dp
     254             : 
     255           2 :       cpol(1, 2) = .25_dp
     256             : 
     257           2 :       cpol(1, 3) = 1._dp/3._dp
     258             : 
     259           2 :       cpol(1, 4) = 7._dp/12._dp
     260           2 :       cpol(2, 4) = 8._dp/3._dp
     261             : 
     262           2 :       cpol(1, 5) = 19._dp/50._dp
     263           2 :       cpol(2, 5) = 3._dp/2._dp
     264             : 
     265           2 :       cpol(1, 6) = 41._dp/272._dp
     266           2 :       cpol(2, 6) = 27._dp/34._dp
     267           2 :       cpol(3, 6) = 27._dp/272._dp
     268             : 
     269           2 :       cpol(1, 7) = 751._dp/2989._dp
     270           2 :       cpol(2, 7) = 73._dp/61._dp
     271           2 :       cpol(3, 7) = 27._dp/61._dp
     272             : 
     273           2 :       cpol(1, 8) = -989._dp/4540._dp
     274           2 :       cpol(2, 8) = -1472._dp/1135._dp
     275           2 :       cpol(3, 8) = 232._dp/1135._dp
     276           2 :       cpol(4, 8) = -2624._dp/1135._dp
     277             : 
     278             :       !renormalize values
     279           2 :       cpol(1, 1) = .5_dp*cpol(1, 1)
     280           6 :       cpol(1:2, 2) = 2._dp/3._dp*cpol(1:2, 2)
     281           6 :       cpol(1:2, 3) = 3._dp/8._dp*cpol(1:2, 3)
     282           8 :       cpol(1:3, 4) = 2._dp/15._dp*cpol(1:3, 4)
     283           8 :       cpol(1:3, 5) = 25._dp/144._dp*cpol(1:3, 5)
     284          10 :       cpol(1:4, 6) = 34._dp/105._dp*cpol(1:4, 6)
     285          10 :       cpol(1:4, 7) = 2989._dp/17280._dp*cpol(1:4, 7)
     286          12 :       cpol(1:5, 8) = -454._dp/2835._dp*cpol(1:5, 8)
     287             : 
     288             :       !assign the complete values
     289           2 :       cpol(2, 1) = cpol(1, 1)
     290             : 
     291           2 :       cpol(3, 2) = cpol(1, 2)
     292             : 
     293           2 :       cpol(3, 3) = cpol(2, 3)
     294           2 :       cpol(4, 3) = cpol(1, 3)
     295             : 
     296           2 :       cpol(4, 4) = cpol(2, 4)
     297           2 :       cpol(5, 4) = cpol(1, 4)
     298             : 
     299           2 :       cpol(4, 5) = cpol(3, 5)
     300           2 :       cpol(5, 5) = cpol(2, 5)
     301           2 :       cpol(6, 5) = cpol(1, 5)
     302             : 
     303           2 :       cpol(5, 6) = cpol(3, 6)
     304           2 :       cpol(6, 6) = cpol(2, 6)
     305           2 :       cpol(7, 6) = cpol(1, 6)
     306             : 
     307           2 :       cpol(5, 7) = cpol(4, 7)
     308           2 :       cpol(6, 7) = cpol(3, 7)
     309           2 :       cpol(7, 7) = cpol(2, 7)
     310           2 :       cpol(8, 7) = cpol(1, 7)
     311             : 
     312           2 :       cpol(6, 8) = cpol(4, 8)
     313           2 :       cpol(7, 8) = cpol(3, 8)
     314           2 :       cpol(8, 8) = cpol(2, 8)
     315           2 :       cpol(9, 8) = cpol(1, 8)
     316             : 
     317             :       !Number of integration points : 2*itype_scf*n_points
     318           2 :       n_scf = 2*itype_scf*n_points
     319             :       !Allocations
     320           6 :       ALLOCATE (x_scf(0:n_scf))
     321           4 :       ALLOCATE (y_scf(0:n_scf))
     322             : 
     323             :       !Build the scaling function
     324           2 :       CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
     325             :       !Step grid for the integration
     326           2 :       dx = REAL(n_range, KIND=dp)/REAL(n_scf, KIND=dp)
     327             :       !Extend the range (no more calculations because fill in by 0._dp)
     328           2 :       n_cell = m3
     329           2 :       n_range = MAX(n_cell, n_range)
     330             : 
     331             :       !Allocations
     332           2 :       ncache = ncache_optimal
     333             :       !the HalFFT must be performed only in the third dimension,
     334             :       !and nker3=n3/2+1, hence
     335           2 :       IF (ncache <= (nker3 - 1)*4) ncache = nker3 - 1*4
     336             : 
     337             :       !enlarge the second dimension of the kernel to be compatible with nproc
     338           2 :       nact2 = nker2
     339           0 :       enlarge_ydim: DO
     340           2 :          IF (nproc*(nact2/nproc) /= nact2) THEN
     341           0 :             nact2 = nact2 + 1
     342             :          ELSE
     343             :             EXIT enlarge_ydim
     344             :          END IF
     345             :       END DO enlarge_ydim
     346             : 
     347             :       !array for the MPI procedure
     348          10 :       ALLOCATE (kernel(nker1, nact2/nproc, nker3))
     349          12 :       ALLOCATE (kernel_mpi(nker1, nact2/nproc, nker3/nproc, nproc))
     350           6 :       ALLOCATE (kernel_scf(n_range))
     351           8 :       ALLOCATE (halfft_cache(2, ncache/4, 2))
     352           6 :       ALLOCATE (cossinarr(2, n3/2 - 1))
     353           2 :       ALLOCATE (btrig(2, ctrig_length))
     354           2 :       ALLOCATE (after(7))
     355           2 :       ALLOCATE (now(7))
     356           2 :       ALLOCATE (before(7))
     357             : 
     358             :       !constants
     359           2 :       pi = 4._dp*ATAN(1._dp)
     360             : 
     361             :       !arrays for the halFFT
     362           2 :       CALL ctrig(n3/2, btrig, after, before, now, 1, ic)
     363             : 
     364             :       !build the phases for the HalFFT reconstruction
     365           2 :       pion = 2._dp*pi/REAL(n3, KIND=dp)
     366         108 :       DO i3 = 2, n3/2
     367         106 :          x = REAL(i3 - 1, KIND=dp)*pion
     368         106 :          cossinarr(1, i3 - 1) = COS(x)
     369         108 :          cossinarr(2, i3 - 1) = -SIN(x)
     370             :       END DO
     371             : 
     372             :       ! satisfy valgrind, init arrays to large value, even if the offending bit is (likely?) padding
     373       45586 :       kernel = HUGE(0._dp)
     374       45590 :       kernel_mpi = HUGE(0._dp)
     375             : 
     376             :       !calculate the limits of the FFT calculations
     377             :       !that can be performed in a row remaining inside the cache
     378           2 :       num_of_mus = ncache/(2*n3)
     379             : 
     380           2 :       diff = 0._dp
     381             :       !order of the polynomial to be used for integration (must be a power of two)
     382           2 :       ipolyord = 8 !this part should be incorporated inside the numerical integration
     383             :       !here we have to choice the piece of the x-y grid to cover
     384             : 
     385             :       !let us now calculate the fraction of mu that will be considered
     386           2 :       j2st = iproc*(nact2/nproc)
     387           2 :       j2nd = MIN((iproc + 1)*(nact2/nproc), n2/2 + 1)
     388             : 
     389          24 :       DO ind2 = (n1/2 + 1)*j2st + 1, (n1/2 + 1)*j2nd, num_of_mus
     390          22 :          istart = ind2
     391          22 :          iend = MIN(ind2 + (num_of_mus - 1), (n1/2 + 1)*j2nd)
     392          22 :          nfft = iend - istart + 1
     393          22 :          shift = 0
     394             : 
     395             :          !initialization of the interesting part of the cache array
     396      270402 :          halfft_cache(:, :, :) = 0._dp
     397             : 
     398          22 :          IF (istart == 1) THEN
     399             :             !i2=1
     400           1 :             shift = 1
     401             : 
     402             :             CALL calculates_green_opt_muzero(n_range, n_scf, ipolyord, x_scf, y_scf, &
     403           1 :                                              cpol(1, ipolyord), dx, kernel_scf)
     404             : 
     405             :             !copy of the first zero value
     406           1 :             halfft_cache(1, 1, 1) = 0._dp
     407             : 
     408          55 :             DO i3 = 1, m3
     409             : 
     410          54 :                value = 0.5_dp*h3*kernel_scf(i3)
     411             :                !index in where to copy the value of the kernel
     412          54 :                CALL indices(ireim, num_of_mus, n3/2 + i3, 1, ind1)
     413             :                !index in where to copy the symmetric value
     414          54 :                CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, 1, jnd1)
     415          54 :                halfft_cache(ireim, ind1, 1) = value
     416          55 :                halfft_cache(jreim, jnd1, 1) = value
     417             : 
     418             :             END DO
     419             : 
     420             :          END IF
     421             : 
     422         805 :          loopimpulses: DO imu = istart + shift, iend
     423             : 
     424             :             !here there is the value of mu associated to hgrid
     425             :             !note that we have multiplicated mu for hgrid to be comparable
     426             :             !with mu0ref
     427             : 
     428             :             !calculate the proper value of mu taking into account the periodic dimensions
     429             :             !corresponding value of i1 and i2
     430         783 :             i1 = MOD(imu, n1/2 + 1)
     431         783 :             IF (i1 == 0) i1 = n1/2 + 1
     432         783 :             i2 = (imu - i1)/(n1/2 + 1) + 1
     433         783 :             ponx = REAL(i1 - 1, KIND=dp)/REAL(n1, KIND=dp)
     434         783 :             pony = REAL(i2 - 1, KIND=dp)/REAL(n2, KIND=dp)
     435             : 
     436         783 :             mu1 = 2._dp*pi*SQRT((ponx/h1)**2 + (pony/h2)**2)*h3
     437             : 
     438             :             CALL calculates_green_opt(n_range, n_scf, itype_scf, ipolyord, x_scf, y_scf, &
     439         783 :                                       cpol(1, ipolyord), mu1, dx, kernel_scf)
     440             : 
     441             :             !readjust the coefficient and define the final kernel
     442             : 
     443             :             !copy of the first zero value
     444         783 :             halfft_cache(1, imu - istart + 1, 1) = 0._dp
     445       43087 :             DO i3 = 1, m3
     446       42282 :                value = -0.5_dp*h3/mu1*kernel_scf(i3)
     447             :                !write(80,*)mu1,i3,kernel_scf(i03)
     448             :                !index in where to copy the value of the kernel
     449       42282 :                CALL indices(ireim, num_of_mus, n3/2 + i3, imu - istart + 1, ind1)
     450             :                !index in where to copy the symmetric value
     451       42282 :                CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, imu - istart + 1, jnd1)
     452       42282 :                halfft_cache(ireim, ind1, 1) = value
     453       43065 :                halfft_cache(jreim, jnd1, 1) = value
     454             :             END DO
     455             : 
     456             :          END DO loopimpulses
     457             : 
     458             :          !now perform the FFT of the array in cache
     459          22 :          inzee = 1
     460          88 :          DO i = 1, ic
     461             :             CALL fftstp(num_of_mus, nfft, n3/2, num_of_mus, n3/2, &
     462             :                         halfft_cache(1, 1, inzee), halfft_cache(1, 1, 3 - inzee), &
     463          66 :                         btrig, after(i), now(i), before(i), 1)
     464          88 :             inzee = 3 - inzee
     465             :          END DO
     466             :          !assign the values of the FFT array
     467             :          !and compare with the good results
     468         808 :          DO imu = istart, iend
     469             : 
     470             :             !corresponding value of i1 and i2
     471         784 :             i1 = MOD(imu, n1/2 + 1)
     472         784 :             IF (i1 == 0) i1 = n1/2 + 1
     473         784 :             i2 = (imu - i1)/(n1/2 + 1) + 1
     474             : 
     475         784 :             j2 = i2 - j2st
     476             : 
     477         784 :             a = halfft_cache(1, imu - istart + 1, inzee)
     478         784 :             b = halfft_cache(2, imu - istart + 1, inzee)
     479         784 :             kernel(i1, j2, 1) = a + b
     480         784 :             kernel(i1, j2, n3/2 + 1) = a - b
     481             : 
     482       42358 :             DO i3 = 2, n3/2
     483       41552 :                ind1 = imu - istart + 1 + num_of_mus*(i3 - 1)
     484       41552 :                jnd1 = imu - istart + 1 + num_of_mus*(n3/2 + 2 - i3 - 1)
     485       41552 :                cp = cossinarr(1, i3 - 1)
     486       41552 :                sp = cossinarr(2, i3 - 1)
     487       41552 :                a = halfft_cache(1, ind1, inzee)
     488       41552 :                b = halfft_cache(2, ind1, inzee)
     489       41552 :                c = halfft_cache(1, jnd1, inzee)
     490       41552 :                d = halfft_cache(2, jnd1, inzee)
     491       41552 :                feR = .5_dp*(a + c)
     492       41552 :                feI = .5_dp*(b - d)
     493       41552 :                foR = .5_dp*(a - c)
     494       41552 :                foI = .5_dp*(b + d)
     495       41552 :                fR = feR + cp*foI - sp*foR
     496       42336 :                kernel(i1, j2, i3) = fR
     497             :             END DO
     498             :          END DO
     499             : 
     500             :       END DO
     501             : 
     502             :       !give to each processor a slice of the third dimension
     503           2 :       IF (nproc > 1) THEN
     504             :          CALL mpi_group%alltoall(kernel, &!nker1*(nact2/nproc)*(nker3/nproc), &
     505           2 :                                  kernel_mpi, nker1*(nact2/nproc)*(nker3/nproc))
     506             : 
     507           6 :          DO jp2 = 1, nproc
     508         118 :             DO i3 = 1, nker3/nproc
     509        1684 :                DO i2 = 1, nact2/nproc
     510        1568 :                   j2 = i2 + (jp2 - 1)*(nact2/nproc)
     511        1680 :                   IF (j2 <= nker2) THEN
     512       45472 :                      DO i1 = 1, nker1
     513             :                         karray(i1, j2, i3) = &
     514       45472 :                            kernel_mpi(i1, i2, i3, jp2)
     515             :                      END DO
     516             :                   END IF
     517             :                END DO
     518             :             END DO
     519             :          END DO
     520             : 
     521             :       ELSE
     522           0 :          karray(1:nker1, 1:nker2, 1:nker3) = kernel(1:nker1, 1:nker2, 1:nker3)
     523             :       END IF
     524             : 
     525             :       !De-allocations
     526           2 :       DEALLOCATE (kernel)
     527           2 :       DEALLOCATE (kernel_mpi)
     528           2 :       DEALLOCATE (btrig)
     529           2 :       DEALLOCATE (after)
     530           2 :       DEALLOCATE (now)
     531           2 :       DEALLOCATE (before)
     532           2 :       DEALLOCATE (halfft_cache)
     533           2 :       DEALLOCATE (kernel_scf)
     534           2 :       DEALLOCATE (x_scf)
     535           2 :       DEALLOCATE (y_scf)
     536             : 
     537           4 :    END SUBROUTINE Surfaces_Kernel
     538             : 
     539             : ! **************************************************************************************************
     540             : !> \brief ...
     541             : !> \param n ...
     542             : !> \param n_scf ...
     543             : !> \param itype_scf ...
     544             : !> \param intorder ...
     545             : !> \param xval ...
     546             : !> \param yval ...
     547             : !> \param c ...
     548             : !> \param mu ...
     549             : !> \param hres ...
     550             : !> \param g_mu ...
     551             : ! **************************************************************************************************
     552         783 :    SUBROUTINE calculates_green_opt(n, n_scf, itype_scf, intorder, xval, yval, c, mu, hres, g_mu)
     553             :       INTEGER, INTENT(in)                                :: n, n_scf, itype_scf, intorder
     554             :       REAL(KIND=dp), DIMENSION(0:n_scf), INTENT(in)      :: xval, yval
     555             :       REAL(KIND=dp), DIMENSION(intorder+1), INTENT(in)   :: c
     556             :       REAL(KIND=dp), INTENT(in)                          :: mu, hres
     557             :       REAL(KIND=dp), DIMENSION(n), INTENT(out)           :: g_mu
     558             : 
     559             :       REAL(KIND=dp), PARAMETER                           :: mu_max = 0.2_dp
     560             : 
     561             :       INTEGER                                            :: i, iend, ikern, ivalue, izero, n_iter, &
     562             :                                                             nrec
     563             :       REAL(KIND=dp)                                      :: f, filter, fl, fr, gleft, gltmp, gright, &
     564             :                                                             grtmp, mu0, ratio, x, x0, x1
     565         783 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: green, green1
     566             : 
     567       43065 :       g_mu = 0.0_dp
     568             :       !We calculate the number of iterations to go from mu0 to mu0_ref
     569         783 :       IF (mu <= mu_max) THEN
     570           3 :          n_iter = 0
     571           3 :          mu0 = mu
     572             :       ELSE
     573         780 :          n_iter = 1
     574        2296 :          loop_iter: DO
     575        3076 :             ratio = REAL(2**n_iter, KIND=dp)
     576        3076 :             mu0 = mu/ratio
     577        3076 :             IF (mu0 <= mu_max) THEN
     578             :                EXIT loop_iter
     579             :             END IF
     580        2296 :             n_iter = n_iter + 1
     581             :          END DO loop_iter
     582             :       END IF
     583             : 
     584             :       !dimension needed for the correct calculation of the recursion
     585         783 :       nrec = 2**n_iter*n
     586             : 
     587        2349 :       ALLOCATE (green(-nrec:nrec))
     588             : 
     589             :       !initialization of the branching value
     590      652239 :       ikern = 0
     591      652239 :       izero = 0
     592      651456 :       initialization: DO
     593      652239 :          IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) EXIT initialization
     594      651456 :          izero = izero + 1
     595             :       END DO initialization
     596     1474362 :       green = 0._dp
     597             :       !now perform the interpolation in right direction
     598             :       ivalue = izero
     599             :       gright = 0._dp
     600       93177 :       loop_right: DO
     601       93960 :          IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
     602      931770 :          DO i = 1, intorder + 1
     603      838593 :             x = xval(ivalue) - REAL(ikern, KIND=dp)
     604      838593 :             f = yval(ivalue)*EXP(-mu0*x)
     605      838593 :             filter = intorder*c(i)
     606      838593 :             gright = gright + filter*f
     607      931770 :             ivalue = ivalue + 1
     608             :          END DO
     609       93177 :          ivalue = ivalue - 1
     610             :       END DO loop_right
     611         783 :       iend = n_scf - ivalue
     612        7047 :       DO i = 1, iend
     613        6264 :          x = xval(ivalue) - REAL(ikern, KIND=dp)
     614        6264 :          f = yval(ivalue)*EXP(-mu0*x)
     615        6264 :          filter = intorder*c(i)
     616        6264 :          gright = gright + filter*f
     617        7047 :          ivalue = ivalue + 1
     618             :       END DO
     619         783 :       gright = hres*gright
     620             : 
     621             :       !the scaling function is symmetric, so the same for the other direction
     622         783 :       gleft = gright
     623             : 
     624         783 :       green(ikern) = gleft + gright
     625             : 
     626             :       !now the loop until the last value
     627      252960 :       DO ikern = 1, nrec
     628      335124 :          gltmp = 0._dp
     629      335124 :          grtmp = 0._dp
     630      335124 :          ivalue = izero
     631      335124 :          x0 = xval(izero)
     632       82215 :          loop_integration: DO
     633      335124 :             IF (izero == n_scf) EXIT loop_integration
     634      927855 :             DO i = 1, intorder + 1
     635      845640 :                x = xval(ivalue)
     636      845640 :                fl = yval(ivalue)*EXP(mu0*x)
     637      845640 :                fr = yval(ivalue)*EXP(-mu0*x)
     638      845640 :                filter = intorder*c(i)
     639      845640 :                gltmp = gltmp + filter*fl
     640      845640 :                grtmp = grtmp + filter*fr
     641      845640 :                ivalue = ivalue + 1
     642      845640 :                IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) THEN
     643             :                   x1 = xval(izero)
     644             :                   EXIT loop_integration
     645             :                END IF
     646      916110 :                izero = izero + 1
     647             :             END DO
     648       82215 :             ivalue = ivalue - 1
     649       82215 :             izero = izero - 1
     650             :          END DO loop_integration
     651      252909 :          gleft = EXP(-mu0)*(gleft + hres*EXP(-mu0*REAL(ikern - 1, KIND=dp))*gltmp)
     652      252909 :          IF (izero == n_scf) THEN
     653             :             gright = 0._dp
     654             :          ELSE
     655       10962 :             gright = EXP(mu0)*(gright - hres*EXP(mu0*REAL(ikern - 1, KIND=dp))*grtmp)
     656             :          END IF
     657      252909 :          green(ikern) = gleft + gright
     658      252909 :          green(-ikern) = gleft + gright
     659      252960 :          IF (ABS(green(ikern)) <= 1.e-20_dp) THEN
     660         732 :             nrec = ikern
     661         732 :             EXIT
     662             :          END IF
     663             :          !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
     664             :       END DO
     665             :       !now we must calculate the recursion
     666        2349 :       ALLOCATE (green1(-nrec:nrec))
     667             : 
     668             :       !Start the iteration to go from mu0 to mu
     669         783 :       CALL scf_recursion(itype_scf, n_iter, nrec, green(-nrec), green1(-nrec))
     670             : 
     671       43065 :       DO i = 1, MIN(n, nrec)
     672       43065 :          g_mu(i) = green(i - 1)
     673             :       END DO
     674         783 :       DO i = MIN(n, nrec) + 1, n
     675         783 :          g_mu(i) = 0._dp
     676             :       END DO
     677             : 
     678         783 :       DEALLOCATE (green, green1)
     679             : 
     680         783 :    END SUBROUTINE calculates_green_opt
     681             : 
     682             : ! **************************************************************************************************
     683             : !> \brief ...
     684             : !> \param n ...
     685             : !> \param n_scf ...
     686             : !> \param intorder ...
     687             : !> \param xval ...
     688             : !> \param yval ...
     689             : !> \param c ...
     690             : !> \param hres ...
     691             : !> \param green ...
     692             : ! **************************************************************************************************
     693           1 :    SUBROUTINE calculates_green_opt_muzero(n, n_scf, intorder, xval, yval, c, hres, green)
     694             :       INTEGER, INTENT(in)                                :: n, n_scf, intorder
     695             :       REAL(KIND=dp), DIMENSION(0:n_scf), INTENT(in)      :: xval, yval
     696             :       REAL(KIND=dp), DIMENSION(intorder+1), INTENT(in)   :: c
     697             :       REAL(KIND=dp), INTENT(in)                          :: hres
     698             :       REAL(KIND=dp), DIMENSION(n), INTENT(out)           :: green
     699             : 
     700             :       INTEGER                                            :: i, iend, ikern, ivalue, izero
     701             :       REAL(KIND=dp)                                      :: c0, c1, filter, gl0, gl1, gr0, gr1, x, y
     702             : 
     703             : !initialization of the branching value
     704             : 
     705           1 :       ikern = 0
     706           1 :       izero = 0
     707         832 :       initialization: DO
     708         833 :          IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) EXIT initialization
     709         833 :          izero = izero + 1
     710             :       END DO initialization
     711          55 :       green = 0._dp
     712             :       !first case, ikern=0
     713             :       !now perform the interpolation in right direction
     714             :       ivalue = izero
     715             :       gr1 = 0._dp
     716         119 :       loop_right: DO
     717         120 :          IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
     718        1190 :          DO i = 1, intorder + 1
     719        1071 :             x = xval(ivalue)
     720        1071 :             y = yval(ivalue)
     721        1071 :             filter = intorder*c(i)
     722        1071 :             gr1 = gr1 + filter*x*y
     723        1190 :             ivalue = ivalue + 1
     724             :          END DO
     725         119 :          ivalue = ivalue - 1
     726             :       END DO loop_right
     727           1 :       iend = n_scf - ivalue
     728           9 :       DO i = 1, iend
     729           8 :          x = xval(ivalue)
     730           8 :          y = yval(ivalue)
     731           8 :          filter = intorder*c(i)
     732           8 :          gr1 = gr1 + filter*x*y
     733           9 :          ivalue = ivalue + 1
     734             :       END DO
     735           1 :       gr1 = hres*gr1
     736             :       !the scaling function is symmetric
     737           1 :       gl1 = -gr1
     738           1 :       gl0 = 0.5_dp
     739           1 :       gr0 = 0.5_dp
     740             : 
     741           1 :       green(1) = 2._dp*gr1
     742             : 
     743             :       !now the loop until the last value
     744          54 :       DO ikern = 1, n - 1
     745             :          c0 = 0._dp
     746             :          c1 = 0._dp
     747             :          ivalue = izero
     748         105 :          loop_integration: DO
     749         158 :             IF (izero == n_scf) EXIT loop_integration
     750        1185 :             DO i = 1, intorder + 1
     751        1080 :                x = xval(ivalue)
     752        1080 :                y = yval(ivalue)
     753        1080 :                filter = intorder*c(i)
     754        1080 :                c0 = c0 + filter*y
     755        1080 :                c1 = c1 + filter*y*x
     756        1080 :                ivalue = ivalue + 1
     757        1080 :                IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) THEN
     758             :                   EXIT loop_integration
     759             :                END IF
     760        1170 :                izero = izero + 1
     761             :             END DO
     762         105 :             ivalue = ivalue - 1
     763         120 :             izero = izero - 1
     764             :          END DO loop_integration
     765          53 :          c0 = hres*c0
     766          53 :          c1 = hres*c1
     767             : 
     768          53 :          gl0 = gl0 + c0
     769          53 :          gl1 = gl1 + c1
     770          53 :          gr0 = gr0 - c0
     771          53 :          gr1 = gr1 - c1
     772             :          !general case
     773          54 :          green(ikern + 1) = REAL(ikern, KIND=dp)*(gl0 - gr0) + gr1 - gl1
     774             :          !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
     775             :       END DO
     776             : 
     777           1 :    END SUBROUTINE calculates_green_opt_muzero
     778             : 
     779             : ! **************************************************************************************************
     780             : !> \brief ...
     781             : !> \param var_realimag ...
     782             : !> \param nelem ...
     783             : !> \param intrn ...
     784             : !> \param extrn ...
     785             : !> \param index ...
     786             : ! **************************************************************************************************
     787       84672 :    SUBROUTINE indices(var_realimag, nelem, intrn, extrn, index)
     788             : 
     789             :       INTEGER, INTENT(out)                               :: var_realimag
     790             :       INTEGER, INTENT(in)                                :: nelem, intrn, extrn
     791             :       INTEGER, INTENT(out)                               :: index
     792             : 
     793             :       INTEGER                                            :: i
     794             : 
     795             : !real or imaginary part
     796             : 
     797       84672 :       var_realimag = 2 - MOD(intrn, 2)
     798             : !actual index over half the length
     799             : 
     800       84672 :       i = (intrn + 1)/2
     801             :       !check
     802       84672 :       IF (2*(i - 1) + var_realimag /= intrn) THEN
     803           0 :          PRINT *, 'error, index=', intrn, 'var_realimag=', var_realimag, 'i=', i
     804             :       END IF
     805             :       !complete index to be assigned
     806       84672 :       index = extrn + nelem*(i - 1)
     807             : 
     808       84672 :    END SUBROUTINE indices
     809             : 
     810             : ! **************************************************************************************************
     811             : !> \brief Build the kernel of a gaussian function
     812             : !>     for interpolating scaling functions.
     813             : !>     Do the parallel HalFFT of the symmetrized function and stores into
     814             : !>     memory only 1/8 of the grid divided by the number of processes nproc
     815             : !>
     816             : !>     Build the kernel (karray) of a gaussian function
     817             : !>     for interpolating scaling functions
     818             : !>     $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(x'-x) \delta(x'- j) dx dx' $$
     819             : !> \param n01 Mesh dimensions of the density
     820             : !> \param n02 Mesh dimensions of the density
     821             : !> \param n03 Mesh dimensions of the density
     822             : !> \param nfft1 Dimensions of the FFT grid (HalFFT in the third direction)
     823             : !> \param nfft2 Dimensions of the FFT grid (HalFFT in the third direction)
     824             : !> \param nfft3 Dimensions of the FFT grid (HalFFT in the third direction)
     825             : !> \param n1k Dimensions of the kernel FFT
     826             : !> \param n2k Dimensions of the kernel FFT
     827             : !> \param n3k Dimensions of the kernel FFT
     828             : !> \param hgrid Mesh step
     829             : !> \param itype_scf Order of the scaling function (8,14,16)
     830             : !> \param iproc ...
     831             : !> \param nproc ...
     832             : !> \param karray ...
     833             : !> \param mpi_group ...
     834             : !> \date February 2006
     835             : !> \author T. Deutsch, L. Genovese
     836             : ! **************************************************************************************************
     837         526 :    SUBROUTINE Free_Kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, &
     838         526 :                           hgrid, itype_scf, iproc, nproc, karray, mpi_group)
     839             : 
     840             :       INTEGER, INTENT(in)                                :: n01, n02, n03, nfft1, nfft2, nfft3, n1k, &
     841             :                                                             n2k, n3k
     842             :       REAL(KIND=dp), INTENT(in)                          :: hgrid
     843             :       INTEGER, INTENT(in)                                :: itype_scf, iproc, nproc
     844             :       REAL(KIND=dp), DIMENSION(n1k, n2k, n3k/nproc), &
     845             :          INTENT(out)                                     :: karray
     846             :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
     847             : 
     848             :       INTEGER, PARAMETER                                 :: n_gauss = 89, n_points = 2**6
     849             :       REAL(KIND=dp), PARAMETER                           :: p0_ref = 1._dp
     850             : 
     851             :       INTEGER                                            :: i, i01, i02, i03, i1, i2, i3, i_gauss, &
     852             :                                                             i_kern, iend, istart, istart1, n1h, &
     853             :                                                             n2h, n3h, n_cell, n_iter, n_range, &
     854             :                                                             n_scf, nker1, nker2, nker3
     855             :       REAL(KIND=dp)                                      :: a1, a2, a3, a_range, absci, acc_gauss, &
     856             :                                                             dr_gauss, dx, factor, factor2, kern, &
     857             :                                                             p0_cell, p0gauss, pgauss, ur_gauss
     858         526 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: kern_1_scf, kernel_scf, x_scf, y_scf
     859         526 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: kp
     860             :       REAL(KIND=dp), DIMENSION(n_gauss)                  :: p_gauss, w_gauss
     861             : 
     862             : !Do not touch !!!!
     863             : !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
     864             : !Better p_gauss for calculation
     865             : !(the support of the exponential should be inside [-n_range/2,n_range/2])
     866             : !Number of integration points : 2*itype_scf*n_points
     867             : 
     868         526 :       n_scf = 2*itype_scf*n_points
     869             :       !Set karray
     870    34411842 :       karray = 0.0_dp
     871             :       !here we must set the dimensions for the fft part, starting from the nfft
     872             :       !remember that actually nfft2 is associated to n03 and viceversa
     873             : 
     874             :       !dimensions that define the center of symmetry
     875         526 :       n1h = nfft1/2
     876         526 :       n2h = nfft2/2
     877         526 :       n3h = nfft3/2
     878             : 
     879             :       !Auxiliary dimensions only for building the FFT part
     880         526 :       nker1 = nfft1
     881         526 :       nker2 = nfft2
     882         526 :       nker3 = nfft3/2 + 1
     883             : 
     884             :       !adjusting the last two dimensions to be multiples of nproc
     885           0 :       DO
     886         526 :          IF (MODULO(nker2, nproc) == 0) EXIT
     887           0 :          nker2 = nker2 + 1
     888             :       END DO
     889         366 :       DO
     890         892 :          IF (MODULO(nker3, nproc) == 0) EXIT
     891         366 :          nker3 = nker3 + 1
     892             :       END DO
     893             : 
     894             :       !this will be the array of the kernel in the real space
     895        3156 :       ALLOCATE (kp(n1h + 1, n3h + 1, nker2/nproc))
     896             : 
     897             :       !defining proper extremes for the calculation of the
     898             :       !local part of the kernel
     899             : 
     900         526 :       istart = iproc*nker2/nproc + 1
     901         526 :       iend = MIN((iproc + 1)*nker2/nproc, n2h + n03)
     902             : 
     903         526 :       istart1 = istart
     904         526 :       IF (iproc .EQ. 0) istart1 = n2h - n03 + 2
     905             : 
     906             :       !Allocations
     907        1578 :       ALLOCATE (x_scf(0:n_scf))
     908        1052 :       ALLOCATE (y_scf(0:n_scf))
     909             : 
     910             :       !Build the scaling function
     911         526 :       CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
     912             :       !Step grid for the integration
     913         526 :       dx = REAL(n_range, KIND=dp)/REAL(n_scf, KIND=dp)
     914             :       !Extend the range (no more calculations because fill in by 0._dp)
     915         526 :       n_cell = MAX(n01, n02, n03)
     916         526 :       n_range = MAX(n_cell, n_range)
     917             : 
     918             :       !Allocations
     919        1578 :       ALLOCATE (kernel_scf(-n_range:n_range))
     920        1052 :       ALLOCATE (kern_1_scf(-n_range:n_range))
     921             : 
     922             :       !Lengthes of the box (use FFT dimension)
     923         526 :       a1 = hgrid*REAL(n01, KIND=dp)
     924         526 :       a2 = hgrid*REAL(n02, KIND=dp)
     925         526 :       a3 = hgrid*REAL(n03, KIND=dp)
     926             : 
     927     2680860 :       x_scf(:) = hgrid*x_scf(:)
     928     2680860 :       y_scf(:) = 1._dp/hgrid*y_scf(:)
     929         526 :       dx = hgrid*dx
     930             :       !To have a correct integration
     931         526 :       p0_cell = p0_ref/(hgrid*hgrid)
     932             : 
     933             :       !Initialization of the gaussian (Beylkin)
     934         526 :       CALL gequad(p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
     935             :       !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
     936             :       !(biggest length in the cube)
     937             :       !We divide the p_gauss by a_range**2 and a_gauss by a_range
     938         526 :       a_range = SQRT(a1*a1 + a2*a2 + a3*a3)
     939         526 :       factor = 1._dp/a_range
     940             :       !factor2 = factor*factor
     941         526 :       factor2 = 1._dp/(a1*a1 + a2*a2 + a3*a3)
     942       47340 :       DO i_gauss = 1, n_gauss
     943       47340 :          p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
     944             :       END DO
     945       47340 :       DO i_gauss = 1, n_gauss
     946       47340 :          w_gauss(i_gauss) = factor*w_gauss(i_gauss)
     947             :       END DO
     948             : 
     949    66572642 :       kp(:, :, :) = 0._dp
     950             :       !Use in this order (better for accuracy).
     951       47340 :       loop_gauss: DO i_gauss = n_gauss, 1, -1
     952             :          !Gaussian
     953       46814 :          pgauss = p_gauss(i_gauss)
     954             : 
     955             :          !We calculate the number of iterations to go from pgauss to p0_ref
     956       46814 :          n_iter = NINT((LOG(pgauss) - LOG(p0_cell))/LOG(4._dp))
     957       46814 :          IF (n_iter <= 0) THEN
     958        9966 :             n_iter = 0
     959        9966 :             p0gauss = pgauss
     960             :          ELSE
     961       36848 :             p0gauss = pgauss/4._dp**n_iter
     962             :          END IF
     963             : 
     964             :          !Stupid integration
     965             :          !Do the integration with the exponential centered in i_kern
     966     7581732 :          kernel_scf(:) = 0._dp
     967     1475508 :          DO i_kern = 0, n_range
     968             :             kern = 0._dp
     969  7511069488 :             DO i = 0, n_scf
     970  7509598104 :                absci = x_scf(i) - REAL(i_kern, KIND=dp)*hgrid
     971  7509598104 :                absci = absci*absci
     972  7511069488 :                kern = kern + y_scf(i)*EXP(-p0gauss*absci)*dx
     973             :             END DO
     974     1471384 :             kernel_scf(i_kern) = kern
     975     1471384 :             kernel_scf(-i_kern) = kern
     976     1475508 :             IF (ABS(kern) < 1.e-18_dp) THEN
     977             :                !Too small not useful to calculate
     978             :                EXIT
     979             :             END IF
     980             :          END DO
     981             : 
     982             :          !Start the iteration to go from p0gauss to pgauss
     983       46814 :          CALL scf_recursion(itype_scf, n_iter, n_range, kernel_scf, kern_1_scf)
     984             : 
     985             :          !Add to the kernel (only the local part)
     986             : 
     987     2436189 :          DO i3 = istart1, iend
     988     2388849 :             i03 = i3 - n2h - 1
     989             :             ! Crash if index out of range
     990             :             ! Without compiler bounds checking, the calculation might run successfully but
     991             :             ! it is also possible that the Hartree energy will blow up
     992             :             ! This seems to happen with large MPI processor counts if the size of the
     993             :             ! RS grid is not directly compatible with the allowed FFT dimensions in
     994             :             ! subroutine fourier_dim (ps_wavelet_fft3d.F)
     995     2388849 :             IF (i03 .LT. -n_range .OR. i03 .GT. n_range) THEN
     996             :                CALL cp_abort(__LOCATION__, "Index out of range in wavelet solver. "// &
     997             :                              "Try decreasing the number of MPI processors, or adjust the PW_CUTOFF or cell size "// &
     998             :                              "so that 2*(number of RS grid points) matches the allowed FFT dimensions "// &
     999           0 :                              "(see ps_wavelet_fft3d.F) exactly.")
    1000             :             END IF
    1001   110182623 :             DO i2 = 1, n02
    1002   107746960 :                i02 = i2 - 1
    1003  5545665917 :                DO i1 = 1, n01
    1004  5435530108 :                   i01 = i1 - 1
    1005             :                   kp(i1, i2, i3 - istart + 1) = kp(i1, i2, i3 - istart + 1) + w_gauss(i_gauss)* &
    1006  5543277068 :                                                 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
    1007             :                END DO
    1008             :             END DO
    1009             :          END DO
    1010             : 
    1011             :       END DO loop_gauss
    1012             : 
    1013             :       !De-allocations
    1014         526 :       DEALLOCATE (kernel_scf)
    1015         526 :       DEALLOCATE (kern_1_scf)
    1016         526 :       DEALLOCATE (x_scf)
    1017         526 :       DEALLOCATE (y_scf)
    1018             : 
    1019             : !!!!END KERNEL CONSTRUCTION
    1020             : 
    1021             : !!$ if(iproc .eq. 0) print *,"Do a 3D PHalFFT for the kernel"
    1022             : 
    1023             :       CALL kernelfft(nfft1, nfft2, nfft3, nker1, nker2, nker3, n1k, n2k, n3k, nproc, iproc, &
    1024         526 :                      kp, karray, mpi_group)
    1025             : 
    1026             :       !De-allocations
    1027         526 :       DEALLOCATE (kp)
    1028             : 
    1029         526 :    END SUBROUTINE Free_Kernel
    1030             : 
    1031             : ! **************************************************************************************************
    1032             : !> \brief ...
    1033             : !> \param n1 ...
    1034             : !> \param n3 ...
    1035             : !> \param lot ...
    1036             : !> \param nfft ...
    1037             : !> \param i1 ...
    1038             : !> \param zf ...
    1039             : !> \param zw ...
    1040             : ! **************************************************************************************************
    1041       72932 :    SUBROUTINE inserthalf(n1, n3, lot, nfft, i1, zf, zw)
    1042             :       INTEGER, INTENT(in)                                :: n1, n3, lot, nfft, i1
    1043             :       REAL(KIND=dp), DIMENSION(n1/2+1, n3/2+1), &
    1044             :          INTENT(in)                                      :: zf
    1045             :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), &
    1046             :          INTENT(out)                                     :: zw
    1047             : 
    1048             :       INTEGER                                            :: i01, i03i, i03r, i3, l1, l3
    1049             : 
    1050   446915092 :       zw = 0.0_dp
    1051       72932 :       i3 = 0
    1052     4117828 :       DO l3 = 1, n3, 2
    1053     4044896 :          i3 = i3 + 1
    1054     4044896 :          i03r = ABS(l3 - n3/2 - 1) + 1
    1055     4044896 :          i03i = ABS(l3 - n3/2) + 1
    1056   129648588 :          DO l1 = 1, nfft
    1057   125530760 :             i01 = ABS(l1 - 1 + i1 - n1/2 - 1) + 1
    1058   125530760 :             zw(1, l1, i3) = zf(i01, i03r)
    1059   129575656 :             zw(2, l1, i3) = zf(i01, i03i)
    1060             :          END DO
    1061             :       END DO
    1062             : 
    1063       72932 :    END SUBROUTINE inserthalf
    1064             : 
    1065             : ! **************************************************************************************************
    1066             : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1067             : !>      Calculates the FFT of the distributed kernel
    1068             : !> \param n1 logical dimension of the transform.
    1069             : !> \param n2 logical dimension of the transform.
    1070             : !> \param n3 logical dimension of the transform.
    1071             : !> \param nd1 Dimensions of work arrays
    1072             : !> \param nd2 Dimensions of work arrays
    1073             : !> \param nd3 Dimensions of work arrays
    1074             : !> \param nk1 ...
    1075             : !> \param nk2 ...
    1076             : !> \param nk3 ...
    1077             : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
    1078             : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
    1079             : !> \param zf Real kernel (input)
    1080             : !>                   zf(i1,i2,i3)
    1081             : !> \param zr Distributed Kernel FFT
    1082             : !>                   zr(2,i1,i2,i3)
    1083             : !> \param mpi_group ...
    1084             : !> \date February 2006
    1085             : !> \par Restrictions
    1086             : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1087             : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1088             : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1089             : !>      This file is distributed under the terms of the
    1090             : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1091             : !> \author S. Goedecker, L. Genovese
    1092             : !> \note As transform lengths
    1093             : !>                  most products of the prime factors 2,3,5 are allowed.
    1094             : !>                   The detailed table with allowed transform lengths can
    1095             : !>                   be found in subroutine CTRIG
    1096             : ! **************************************************************************************************
    1097         526 :    SUBROUTINE kernelfft(n1, n2, n3, nd1, nd2, nd3, nk1, nk2, nk3, nproc, iproc, zf, zr, mpi_group)
    1098             : 
    1099             :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, nk1, nk2, &
    1100             :                                                             nk3, nproc, iproc
    1101             :       REAL(KIND=dp), &
    1102             :          DIMENSION(n1/2+1, n3/2+1, nd2/nproc), &
    1103             :          INTENT(in)                                      :: zf
    1104             :       REAL(KIND=dp), DIMENSION(nk1, nk2, nk3/nproc), &
    1105             :          INTENT(inout)                                   :: zr
    1106             :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
    1107             : 
    1108             :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
    1109             : 
    1110             :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
    1111             :                                                             J2st, j3, Jp2st, lot, lzt, ma, mb, &
    1112             :                                                             ncache, nfft
    1113         526 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
    1114         526 :                                                             before2, before3, now1, now2, now3
    1115             :       REAL(kind=dp)                                      :: twopion
    1116         526 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: cosinarr, trig1, trig2, trig3
    1117         526 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
    1118             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
    1119             :       REAL(KIND=dp), ALLOCATABLE, &
    1120         526 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
    1121             : 
    1122             : !  include "perfdata.inc"
    1123             : !work arrays for transpositions
    1124             : !work arrays for MPI
    1125             : !cache work array
    1126             : !FFT work arrays
    1127             : !Body
    1128             : !check input
    1129             : 
    1130         526 :       CPASSERT(nd1 .GE. n1)
    1131         526 :       CPASSERT(nd2 .GE. n2)
    1132         526 :       CPASSERT(nd3 .GE. n3/2 + 1)
    1133         526 :       CPASSERT(MOD(nd3, nproc) .EQ. 0)
    1134         526 :       CPASSERT(MOD(nd2, nproc) .EQ. 0)
    1135             :       MARK_USED(nd1)
    1136             : 
    1137             :       !defining work arrays dimensions
    1138         526 :       ncache = ncache_optimal
    1139         526 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
    1140         526 :       lzt = n2
    1141         526 :       IF (MOD(n2, 2) .EQ. 0) lzt = lzt + 1
    1142         526 :       IF (MOD(n2, 4) .EQ. 0) lzt = lzt + 1
    1143             : 
    1144             :       !Allocations
    1145         526 :       ALLOCATE (trig1(2, ctrig_length))
    1146         526 :       ALLOCATE (after1(7))
    1147         526 :       ALLOCATE (now1(7))
    1148         526 :       ALLOCATE (before1(7))
    1149         526 :       ALLOCATE (trig2(2, ctrig_length))
    1150         526 :       ALLOCATE (after2(7))
    1151         526 :       ALLOCATE (now2(7))
    1152         526 :       ALLOCATE (before2(7))
    1153         526 :       ALLOCATE (trig3(2, ctrig_length))
    1154         526 :       ALLOCATE (after3(7))
    1155         526 :       ALLOCATE (now3(7))
    1156         526 :       ALLOCATE (before3(7))
    1157        2104 :       ALLOCATE (zw(2, ncache/4, 2))
    1158        2104 :       ALLOCATE (zt(2, lzt, n1))
    1159        2630 :       ALLOCATE (zmpi2(2, n1, nd2/nproc, nd3))
    1160        2104 :       ALLOCATE (cosinarr(2, n3/2))
    1161         526 :       IF (nproc .GT. 1) THEN
    1162        2196 :          ALLOCATE (zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc))
    1163   300131642 :          zmpi1 = 0.0_dp
    1164             :       END IF
    1165             : 
    1166   390508098 :       zmpi2 = 0.0_dp
    1167             :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
    1168         526 :       CALL ctrig(n3/2, trig3, after3, before3, now3, 1, ic3)
    1169         526 :       CALL ctrig(n1, trig1, after1, before1, now1, 1, ic1)
    1170         526 :       CALL ctrig(n2, trig2, after2, before2, now2, 1, ic2)
    1171             : 
    1172             :       !Calculating array of phases for HalFFT decoding
    1173         526 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
    1174       23030 :       DO i3 = 1, n3/2
    1175       22504 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
    1176       23030 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
    1177             :       END DO
    1178             : 
    1179             :       !transform along z axis
    1180             : 
    1181         526 :       lot = ncache/(2*n3)
    1182         526 :       CPASSERT(lot .GE. 1)
    1183             : 
    1184       27860 :       DO j2 = 1, nd2/nproc
    1185             :          !this condition ensures that we manage only the interesting part for the FFT
    1186       27860 :          IF (iproc*(nd2/nproc) + j2 .LE. n2) THEN
    1187      100266 :             DO i1 = 1, n1, lot
    1188       72932 :                ma = i1
    1189       72932 :                mb = MIN(i1 + (lot - 1), n1)
    1190       72932 :                nfft = mb - ma + 1
    1191             : 
    1192             :                !inserting real data into complex array of half length
    1193             :                !input: I1,I3,J2,(Jp2)
    1194             : 
    1195       72932 :                CALL inserthalf(n1, n3, lot, nfft, i1, zf(1, 1, j2), zw(1, 1, 1))
    1196             : 
    1197             :                !performing FFT
    1198       72932 :                inzee = 1
    1199      263824 :                DO i = 1, ic3
    1200             :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1201      190892 :                               trig3, after3(i), now3(i), before3(i), 1)
    1202      263824 :                   inzee = 3 - inzee
    1203             :                END DO
    1204             :                !output: I1,i3,J2,(Jp2)
    1205             : 
    1206             :                !unpacking FFT in order to restore correct result,
    1207             :                !while exchanging components
    1208             :                !input: I1,i3,J2,(Jp2)
    1209      100266 :                CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, nd2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
    1210             :                !output: I1,J2,i3,(Jp2)
    1211             :             END DO
    1212             :          END IF
    1213             :       END DO
    1214             : 
    1215             :       !Interprocessor data transposition
    1216             :       !input: I1,J2,j3,jp3,(Jp2)
    1217         526 :       IF (nproc .GT. 1) THEN
    1218             :          !communication scheduling
    1219             :          CALL mpi_group%alltoall(zmpi2, &!2*n1*(nd2/nproc)*(nd3/nproc), &
    1220         366 :                                  zmpi1, 2*n1*(nd2/nproc)*(nd3/nproc))
    1221             :          ! output: I1,J2,j3,Jp2,(jp3)
    1222             :       END IF
    1223             : 
    1224       14902 :       DO j3 = 1, nd3/nproc
    1225             :          !this condition ensures that we manage only the interesting part for the FFT
    1226       14902 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
    1227       14193 :             Jp2st = 1
    1228       14193 :             J2st = 1
    1229             : 
    1230             :             !transform along x axis
    1231       14193 :             lot = ncache/(4*n1)
    1232       14193 :             CPASSERT(lot .GE. 1)
    1233             : 
    1234       85859 :             DO j = 1, n2, lot
    1235       71666 :                ma = j
    1236       71666 :                mb = MIN(j + (lot - 1), n2)
    1237       71666 :                nfft = mb - ma + 1
    1238             : 
    1239             :                !reverse ordering
    1240             :                !input: I1,J2,j3,Jp2,(jp3)
    1241       71666 :                IF (nproc .EQ. 1) THEN
    1242       18060 :                   CALL mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1243             :                ELSE
    1244       53606 :                   CALL mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1245             :                END IF
    1246             :                !output: J2,Jp2,I1,j3,(jp3)
    1247             : 
    1248             :                !performing FFT
    1249             :                !input: I2,I1,j3,(jp3)
    1250       71666 :                inzee = 1
    1251      233162 :                DO i = 1, ic1 - 1
    1252             :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1253      161496 :                               trig1, after1(i), now1(i), before1(i), 1)
    1254      233162 :                   inzee = 3 - inzee
    1255             :                END DO
    1256             :                !storing the last step into zt
    1257       71666 :                i = ic1
    1258             :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1259       85859 :                            trig1, after1(i), now1(i), before1(i), 1)
    1260             :                !output: I2,i1,j3,(jp3)
    1261             :             END DO
    1262             : 
    1263             :             !transform along y axis, and taking only the first half
    1264       14193 :             lot = ncache/(4*n2)
    1265       14193 :             CPASSERT(lot .GE. 1)
    1266             : 
    1267       55534 :             DO j = 1, nk1, lot
    1268       41341 :                ma = j
    1269       41341 :                mb = MIN(j + (lot - 1), nk1)
    1270       41341 :                nfft = mb - ma + 1
    1271             : 
    1272             :                !reverse ordering
    1273             :                !input: I2,i1,j3,(jp3)
    1274       41341 :                CALL switch(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1275             :                !output: i1,I2,j3,(jp3)
    1276             : 
    1277             :                !performing FFT
    1278             :                !input: i1,I2,j3,(jp3)
    1279       41341 :                inzee = 1
    1280      176951 :                DO i = 1, ic2
    1281             :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1282      135610 :                               trig2, after2(i), now2(i), before2(i), 1)
    1283      176951 :                   inzee = 3 - inzee
    1284             :                END DO
    1285             : 
    1286       55534 :                CALL realcopy(lot, nfft, n2, nk1, nk2, zw(1, 1, inzee), zr(j, 1, j3))
    1287             : 
    1288             :             END DO
    1289             :             !output: i1,i2,j3,(jp3)
    1290             :          END IF
    1291             :       END DO
    1292             : 
    1293             :       !De-allocations
    1294         526 :       DEALLOCATE (trig1)
    1295         526 :       DEALLOCATE (after1)
    1296         526 :       DEALLOCATE (now1)
    1297         526 :       DEALLOCATE (before1)
    1298         526 :       DEALLOCATE (trig2)
    1299         526 :       DEALLOCATE (after2)
    1300         526 :       DEALLOCATE (now2)
    1301         526 :       DEALLOCATE (before2)
    1302         526 :       DEALLOCATE (trig3)
    1303         526 :       DEALLOCATE (after3)
    1304         526 :       DEALLOCATE (now3)
    1305         526 :       DEALLOCATE (before3)
    1306         526 :       DEALLOCATE (zmpi2)
    1307         526 :       DEALLOCATE (zw)
    1308         526 :       DEALLOCATE (zt)
    1309         526 :       DEALLOCATE (cosinarr)
    1310         526 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
    1311             : 
    1312         526 :    END SUBROUTINE kernelfft
    1313             : 
    1314             : ! **************************************************************************************************
    1315             : !> \brief ...
    1316             : !> \param lot ...
    1317             : !> \param nfft ...
    1318             : !> \param n2 ...
    1319             : !> \param nk1 ...
    1320             : !> \param nk2 ...
    1321             : !> \param zin ...
    1322             : !> \param zout ...
    1323             : ! **************************************************************************************************
    1324       41341 :    SUBROUTINE realcopy(lot, nfft, n2, nk1, nk2, zin, zout)
    1325             :       INTEGER, INTENT(in)                                :: lot, nfft, n2, nk1, nk2
    1326             :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zin
    1327             :       REAL(KIND=dp), DIMENSION(nk1, nk2), INTENT(inout)  :: zout
    1328             : 
    1329             :       INTEGER                                            :: i, j
    1330             : 
    1331     2347693 :       DO i = 1, nk2
    1332    35634277 :          DO j = 1, nfft
    1333    35592936 :             zout(j, i) = zin(1, j, i)
    1334             :          END DO
    1335             :       END DO
    1336             : 
    1337       41341 :    END SUBROUTINE realcopy
    1338             : 
    1339             : ! **************************************************************************************************
    1340             : !> \brief ...
    1341             : !> \param nfft ...
    1342             : !> \param n2 ...
    1343             : !> \param lot ...
    1344             : !> \param n1 ...
    1345             : !> \param lzt ...
    1346             : !> \param zt ...
    1347             : !> \param zw ...
    1348             : ! **************************************************************************************************
    1349       41341 :    SUBROUTINE switch(nfft, n2, lot, n1, lzt, zt, zw)
    1350             :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    1351             :       REAL(KIND=dp)                                      :: zt(2, lzt, n1), zw(2, lot, n2)
    1352             : 
    1353             :       INTEGER                                            :: i, j
    1354             : 
    1355      692075 :       DO 200, j = 1, nfft
    1356    65922434 :          DO 100, i = 1, n2
    1357    65271700 :             zw(1, j, i) = zt(1, i, j)
    1358    65271700 :             zw(2, j, i) = zt(2, i, j)
    1359      650734 : 100         CONTINUE
    1360       41341 : 200         CONTINUE
    1361       41341 :             RETURN
    1362             :             END SUBROUTINE switch
    1363             : 
    1364             : ! **************************************************************************************************
    1365             : !> \brief ...
    1366             : !> \param j3 ...
    1367             : !> \param nfft ...
    1368             : !> \param Jp2st ...
    1369             : !> \param J2st ...
    1370             : !> \param lot ...
    1371             : !> \param n1 ...
    1372             : !> \param nd2 ...
    1373             : !> \param nd3 ...
    1374             : !> \param nproc ...
    1375             : !> \param zmpi1 ...
    1376             : !> \param zw ...
    1377             : ! **************************************************************************************************
    1378       71666 :             SUBROUTINE mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
    1379             :       INTEGER                                            :: j3, nfft, Jp2st, J2st, lot, n1, nd2, &
    1380             :                                                             nd3, nproc
    1381             :       REAL(KIND=dp) :: zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
    1382             : 
    1383             :       INTEGER                                            :: I1, J2, JP2, mfft
    1384             : 
    1385       71666 :                mfft = 0
    1386       94696 :                DO 300, Jp2 = Jp2st, nproc
    1387     1353585 :                   DO 200, J2 = J2st, nd2/nproc
    1388     1330555 :                      mfft = mfft + 1
    1389     1330555 :                      IF (mfft .GT. nfft) THEN
    1390       57473 :                         Jp2st = Jp2
    1391       57473 :                         J2st = J2
    1392       57473 :                         RETURN
    1393             :                      END IF
    1394   129270318 :                      DO 100, I1 = 1, n1
    1395   127997236 :                         zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
    1396   127997236 :                         zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
    1397     1273082 : 100                     CONTINUE
    1398       23030 : 200                     CONTINUE
    1399       23030 :                         J2st = 1
    1400       14193 : 300                     CONTINUE
    1401             :                         END SUBROUTINE mpiswitch
    1402             : 
    1403             : ! **************************************************************************************************
    1404             : !> \brief ...
    1405             : !> \param p ...
    1406             : !> \param w ...
    1407             : !> \param urange ...
    1408             : !> \param drange ...
    1409             : !> \param acc ...
    1410             : ! **************************************************************************************************
    1411         526 :                         SUBROUTINE gequad(p, w, urange, drange, acc)
    1412             : !
    1413             :       REAL(KIND=dp)                                      :: p(*), w(*), urange, drange, acc
    1414             : 
    1415             : !
    1416             : !
    1417             : !       range [10^(-9),1] and accuracy ~10^(-8);
    1418             : !
    1419             : !
    1420             : 
    1421         526 :                            p(1) = 4.96142640560223544e19_dp
    1422         526 :                            p(2) = 1.37454269147978052e19_dp
    1423         526 :                            p(3) = 7.58610013441204679e18_dp
    1424         526 :                            p(4) = 4.42040691347806996e18_dp
    1425         526 :                            p(5) = 2.61986077948367892e18_dp
    1426         526 :                            p(6) = 1.56320138155496681e18_dp
    1427         526 :                            p(7) = 9.35645215863028402e17_dp
    1428         526 :                            p(8) = 5.60962910452691703e17_dp
    1429         526 :                            p(9) = 3.3666225119686761e17_dp
    1430         526 :                            p(10) = 2.0218253197947866e17_dp
    1431         526 :                            p(11) = 1.21477756091902017e17_dp
    1432         526 :                            p(12) = 7.3012982513608503e16_dp
    1433         526 :                            p(13) = 4.38951893556421099e16_dp
    1434         526 :                            p(14) = 2.63949482512262325e16_dp
    1435         526 :                            p(15) = 1.58742054072786174e16_dp
    1436         526 :                            p(16) = 9.54806587737665531e15_dp
    1437         526 :                            p(17) = 5.74353712364571709e15_dp
    1438         526 :                            p(18) = 3.455214877389445e15_dp
    1439         526 :                            p(19) = 2.07871658520326804e15_dp
    1440         526 :                            p(20) = 1.25064667315629928e15_dp
    1441         526 :                            p(21) = 7.52469429541933745e14_dp
    1442         526 :                            p(22) = 4.5274603337253175e14_dp
    1443         526 :                            p(23) = 2.72414006900059548e14_dp
    1444         526 :                            p(24) = 1.63912168349216752e14_dp
    1445         526 :                            p(25) = 9.86275802590865738e13_dp
    1446         526 :                            p(26) = 5.93457701624974985e13_dp
    1447         526 :                            p(27) = 3.5709554322296296e13_dp
    1448         526 :                            p(28) = 2.14872890367310454e13_dp
    1449         526 :                            p(29) = 1.29294719957726902e13_dp
    1450         526 :                            p(30) = 7.78003375426361016e12_dp
    1451         526 :                            p(31) = 4.68148199759876704e12_dp
    1452         526 :                            p(32) = 2.8169955024829868e12_dp
    1453         526 :                            p(33) = 1.69507790481958464e12_dp
    1454         526 :                            p(34) = 1.01998486064607581e12_dp
    1455         526 :                            p(35) = 6.13759486539856459e11_dp
    1456         526 :                            p(36) = 3.69320183828682544e11_dp
    1457         526 :                            p(37) = 2.22232783898905102e11_dp
    1458         526 :                            p(38) = 1.33725247623668682e11_dp
    1459         526 :                            p(39) = 8.0467192739036288e10_dp
    1460         526 :                            p(40) = 4.84199582415144143e10_dp
    1461         526 :                            p(41) = 2.91360091170559564e10_dp
    1462         526 :                            p(42) = 1.75321747475309216e10_dp
    1463         526 :                            p(43) = 1.0549735552210995e10_dp
    1464         526 :                            p(44) = 6.34815321079006586e9_dp
    1465         526 :                            p(45) = 3.81991113733594231e9_dp
    1466         526 :                            p(46) = 2.29857747533101109e9_dp
    1467         526 :                            p(47) = 1.38313653595483694e9_dp
    1468         526 :                            p(48) = 8.32282908580025358e8_dp
    1469         526 :                            p(49) = 5.00814519374587467e8_dp
    1470         526 :                            p(50) = 3.01358090773319025e8_dp
    1471         526 :                            p(51) = 1.81337994217503535e8_dp
    1472         526 :                            p(52) = 1.09117589961086823e8_dp
    1473         526 :                            p(53) = 6.56599771718640323e7_dp
    1474         526 :                            p(54) = 3.95099693638497164e7_dp
    1475         526 :                            p(55) = 2.37745694710665991e7_dp
    1476         526 :                            p(56) = 1.43060135285912813e7_dp
    1477         526 :                            p(57) = 8.60844290313506695e6_dp
    1478         526 :                            p(58) = 5.18000974075383424e6_dp
    1479         526 :                            p(59) = 3.116998193057466e6_dp
    1480         526 :                            p(60) = 1.87560993870024029e6_dp
    1481         526 :                            p(61) = 1.12862197183979562e6_dp
    1482         526 :                            p(62) = 679132.441326077231_dp
    1483         526 :                            p(63) = 408658.421279877969_dp
    1484         526 :                            p(64) = 245904.473450669789_dp
    1485         526 :                            p(65) = 147969.568088321005_dp
    1486         526 :                            p(66) = 89038.612357311147_dp
    1487         526 :                            p(67) = 53577.7362552358895_dp
    1488         526 :                            p(68) = 32239.6513926914668_dp
    1489         526 :                            p(69) = 19399.7580852362791_dp
    1490         526 :                            p(70) = 11673.5323603058634_dp
    1491         526 :                            p(71) = 7024.38438577707758_dp
    1492         526 :                            p(72) = 4226.82479307685999_dp
    1493         526 :                            p(73) = 2543.43254175354295_dp
    1494         526 :                            p(74) = 1530.47486269122675_dp
    1495         526 :                            p(75) = 920.941785160749482_dp
    1496         526 :                            p(76) = 554.163803906291646_dp
    1497         526 :                            p(77) = 333.46029740785694_dp
    1498         526 :                            p(78) = 200.6550575335041_dp
    1499         526 :                            p(79) = 120.741366914147284_dp
    1500         526 :                            p(80) = 72.6544243200329916_dp
    1501         526 :                            p(81) = 43.7187810415471025_dp
    1502         526 :                            p(82) = 26.3071631447061043_dp
    1503         526 :                            p(83) = 15.8299486353816329_dp
    1504         526 :                            p(84) = 9.52493152341244004_dp
    1505         526 :                            p(85) = 5.72200417067776041_dp
    1506         526 :                            p(86) = 3.36242234070940928_dp
    1507         526 :                            p(87) = 1.75371394604499472_dp
    1508         526 :                            p(88) = 0.64705932650658966_dp
    1509         526 :                            p(89) = 0.072765905943708247_dp
    1510             :                            !
    1511         526 :                            w(1) = 47.67445484528304247e10_dp
    1512         526 :                            w(2) = 11.37485774750442175e9_dp
    1513         526 :                            w(3) = 78.64340976880190239e8_dp
    1514         526 :                            w(4) = 46.27335788759590498e8_dp
    1515         526 :                            w(5) = 24.7380464827152951e8_dp
    1516         526 :                            w(6) = 13.62904116438987719e8_dp
    1517         526 :                            w(7) = 92.79560029045882433e8_dp
    1518         526 :                            w(8) = 52.15931216254660251e8_dp
    1519         526 :                            w(9) = 31.67018011061666244e8_dp
    1520         526 :                            w(10) = 1.29291036801493046e8_dp
    1521         526 :                            w(11) = 1.00139319988015862e8_dp
    1522         526 :                            w(12) = 7.75892350510188341e7_dp
    1523         526 :                            w(13) = 6.01333567950731271e7_dp
    1524         526 :                            w(14) = 4.66141178654796875e7_dp
    1525         526 :                            w(15) = 3.61398903394911448e7_dp
    1526         526 :                            w(16) = 2.80225846672956389e7_dp
    1527         526 :                            w(17) = 2.1730509180930247e7_dp
    1528         526 :                            w(18) = 1.68524482625876965e7_dp
    1529         526 :                            w(19) = 1.30701489345870338e7_dp
    1530         526 :                            w(20) = 1.01371784832269282e7_dp
    1531         526 :                            w(21) = 7.86264116300379329e6_dp
    1532         526 :                            w(22) = 6.09861667912273717e6_dp
    1533         526 :                            w(23) = 4.73045784039455683e6_dp
    1534         526 :                            w(24) = 3.66928949951594161e6_dp
    1535         526 :                            w(25) = 2.8462050836230259e6_dp
    1536         526 :                            w(26) = 2.20777394798527011e6_dp
    1537         526 :                            w(27) = 1.71256191589205524e6_dp
    1538         526 :                            w(28) = 1.32843556197737076e6_dp
    1539         526 :                            w(29) = 1.0304731275955989e6_dp
    1540         526 :                            w(30) = 799345.206572271448_dp
    1541         526 :                            w(31) = 620059.354143595343_dp
    1542         526 :                            w(32) = 480986.704107449333_dp
    1543         526 :                            w(33) = 373107.167700228515_dp
    1544         526 :                            w(34) = 289424.08337412132_dp
    1545         526 :                            w(35) = 224510.248231581788_dp
    1546         526 :                            w(36) = 174155.825690028966_dp
    1547         526 :                            w(37) = 135095.256919654065_dp
    1548         526 :                            w(38) = 104795.442776800312_dp
    1549         526 :                            w(39) = 81291.4458222430418_dp
    1550         526 :                            w(40) = 63059.0493649328682_dp
    1551         526 :                            w(41) = 48915.9040455329689_dp
    1552         526 :                            w(42) = 37944.8484018048756_dp
    1553         526 :                            w(43) = 29434.4290473253969_dp
    1554         526 :                            w(44) = 22832.7622054490044_dp
    1555         526 :                            w(45) = 17711.743950151233_dp
    1556         526 :                            w(46) = 13739.287867104177_dp
    1557         526 :                            w(47) = 10657.7895710752585_dp
    1558         526 :                            w(48) = 8267.42141053961834_dp
    1559         526 :                            w(49) = 6413.17397520136448_dp
    1560         526 :                            w(50) = 4974.80402838654277_dp
    1561         526 :                            w(51) = 3859.03698188553047_dp
    1562         526 :                            w(52) = 2993.51824493299154_dp
    1563         526 :                            w(53) = 2322.1211966811754_dp
    1564         526 :                            w(54) = 1801.30750964719641_dp
    1565         526 :                            w(55) = 1397.30379659817038_dp
    1566         526 :                            w(56) = 1083.91149143250697_dp
    1567         526 :                            w(57) = 840.807939169209188_dp
    1568         526 :                            w(58) = 652.228524366749422_dp
    1569         526 :                            w(59) = 505.944376983506128_dp
    1570         526 :                            w(60) = 392.469362317941064_dp
    1571         526 :                            w(61) = 304.444930257324312_dp
    1572         526 :                            w(62) = 236.162932842453601_dp
    1573         526 :                            w(63) = 183.195466078603525_dp
    1574         526 :                            w(64) = 142.107732186551471_dp
    1575         526 :                            w(65) = 110.23530215723992_dp
    1576         526 :                            w(66) = 85.5113346705382257_dp
    1577         526 :                            w(67) = 66.3325469806696621_dp
    1578         526 :                            w(68) = 51.4552463353841373_dp
    1579         526 :                            w(69) = 39.9146798429449273_dp
    1580         526 :                            w(70) = 30.9624728409162095_dp
    1581         526 :                            w(71) = 24.018098812215013_dp
    1582         526 :                            w(72) = 18.6312338024296588_dp
    1583         526 :                            w(73) = 14.4525541233150501_dp
    1584         526 :                            w(74) = 11.2110836519105938_dp
    1585         526 :                            w(75) = 8.69662175848497178_dp
    1586         526 :                            w(76) = 6.74611236165731961_dp
    1587         526 :                            w(77) = 5.23307018057529994_dp
    1588         526 :                            w(78) = 4.05937850501539556_dp
    1589         526 :                            w(79) = 3.14892659076635714_dp
    1590         526 :                            w(80) = 2.44267408211071604_dp
    1591         526 :                            w(81) = 1.89482240522855261_dp
    1592         526 :                            w(82) = 1.46984505907050079_dp
    1593         526 :                            w(83) = 1.14019261330527007_dp
    1594         526 :                            w(84) = 0.884791217422925293_dp
    1595         526 :                            w(85) = 0.692686387080616483_dp
    1596         526 :                            w(86) = 0.585244576897023282_dp
    1597         526 :                            w(87) = 0.576182522545327589_dp
    1598         526 :                            w(88) = 0.596688817388997178_dp
    1599         526 :                            w(89) = 0.607879901151108771_dp
    1600             :                            !
    1601             :                            !
    1602         526 :                            urange = 1._dp
    1603         526 :                            drange = 1e-08_dp
    1604         526 :                            acc = 1e-08_dp
    1605             :                            !
    1606         526 :                            RETURN
    1607             :                         END SUBROUTINE gequad
    1608             : 
    1609             :                         END MODULE ps_wavelet_kernel

Generated by: LCOV version 1.15