LCOV - code coverage report
Current view: top level - src/pw - fft_tools.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 718 1360 52.8 %
Date: 2024-11-22 07:00:40 Functions: 18 31 58.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \par History
      10             : !>      JGH (30-Nov-2000): ESSL FFT Library added
      11             : !>      JGH (05-Jan-2001): Added SGI library FFT
      12             : !>      JGH (14-Jan-2001): Added parallel 3d FFT
      13             : !>      JGH (10-Feb-2006): New interface type
      14             : !>      JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
      15             : !>                         Possible problems can be related with setting arrays
      16             : !>                         not to zero
      17             : !>                         Some interfaces could be further simplified by avoiding
      18             : !>                         an initial copy. However, this assumes contiguous arrays
      19             : !>      IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
      20             : !>                         fft_scratch type, reducing number of calls dramatically
      21             : !>      IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
      22             : !>      IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
      23             : !>      IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
      24             : !>      IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
      25             : !>                         (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
      26             : !>      HFP (17-Oct-2024): Thread-safety insurance (CPASSERT), and OMP ATOMIC (tick_fft_pool)
      27             : !> \author JGH
      28             : ! **************************************************************************************************
      29             : MODULE fft_tools
      30             :    USE ISO_C_BINDING,                   ONLY: C_F_POINTER,&
      31             :                                               C_LOC,&
      32             :                                               C_PTR,&
      33             :                                               C_SIZE_T
      34             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      35             :    USE fft_lib,                         ONLY: &
      36             :         fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
      37             :         fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
      38             :    USE fft_plan,                        ONLY: fft_plan_type
      39             :    USE kinds,                           ONLY: dp,&
      40             :                                               dp_size,&
      41             :                                               sp
      42             :    USE mathconstants,                   ONLY: z_zero
      43             :    USE message_passing,                 ONLY: mp_cart_type,&
      44             :                                               mp_comm_null,&
      45             :                                               mp_comm_type,&
      46             :                                               mp_request_type,&
      47             :                                               mp_waitall
      48             :    USE offload_api,                     ONLY: offload_free_pinned_mem,&
      49             :                                               offload_malloc_pinned_mem
      50             : 
      51             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_in_parallel
      52             : 
      53             : #include "../base/base_uses.f90"
      54             : 
      55             :    IMPLICIT NONE
      56             : 
      57             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
      58             : 
      59             :    ! Types for the pool of scratch data needed in FFT routines
      60             :    ! keep the subroutine "is_equal" up-to-date
      61             :    ! needs a default initialization
      62             :    TYPE fft_scratch_sizes
      63             :       INTEGER                              :: nx = 0, ny = 0, nz = 0
      64             :       INTEGER                              :: lmax = 0, mmax = 0, nmax = 0
      65             :       INTEGER                              :: mx1 = 0, mx2 = 0, mx3 = 0
      66             :       INTEGER                              :: my1 = 0, my2 = 0, my3 = 0
      67             :       INTEGER                              :: mz1 = 0, mz2 = 0, mz3 = 0
      68             :       INTEGER                              :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
      69             :       INTEGER                              :: lg = 0, mg = 0
      70             :       INTEGER                              :: nbx = 0, nbz = 0
      71             :       INTEGER                              :: nmray = 0, nyzray = 0
      72             :       TYPE(mp_cart_type)                   :: rs_group = mp_cart_type()
      73             :       INTEGER, DIMENSION(2)                :: g_pos = 0, r_pos = 0, r_dim = 0
      74             :       INTEGER                              :: numtask = 0
      75             :    END TYPE fft_scratch_sizes
      76             : 
      77             :    TYPE fft_scratch_type
      78             :       INTEGER                              :: fft_scratch_id = 0
      79             :       INTEGER                              :: tf_type = -1
      80             :       LOGICAL                              :: in_use = .TRUE.
      81             :       TYPE(mp_comm_type)                   :: group = mp_comm_type()
      82             :       INTEGER, DIMENSION(3)                :: nfft = -1
      83             :       ! to be used in cube_transpose_* routines
      84             :       TYPE(mp_cart_type), DIMENSION(2)     :: cart_sub_comm = mp_cart_type()
      85             :       INTEGER, DIMENSION(2)                :: dim = -1, pos = -1
      86             :       ! to be used in fft3d_s
      87             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      88             :          :: ziptr => NULL(), zoptr => NULL()
      89             :       ! to be used in fft3d_ps : block distribution
      90             :       COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
      91             :          :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
      92             :             p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
      93             :       ! to be used in fft3d_ps : plane distribution
      94             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
      95             :          :: r1buf => NULL(), r2buf => NULL()
      96             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      97             :          :: tbuf => NULL()
      98             :       ! to be used in fft3d_pb
      99             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     100             :          :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
     101             :             a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
     102             :       ! to be used in communication routines
     103             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER    :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
     104             :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => NULL()
     105             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER    :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
     106             :       INTEGER                                       :: in = 0, mip = -1
     107             :       REAL(KIND=dp)                                 :: rsratio = 1.0_dp
     108             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
     109             :          :: xzbuf => NULL(), yzbuf => NULL()
     110             :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
     111             :          :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
     112             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     113             :          :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
     114             :             rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
     115             :       COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     116             :          :: ss => NULL(), tt => NULL()
     117             :       INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => NULL()
     118             :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS    :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
     119             :       TYPE(fft_scratch_sizes)                       :: sizes = fft_scratch_sizes()
     120             :       TYPE(fft_plan_type), DIMENSION(6)             :: fft_plan = fft_plan_type()
     121             :       INTEGER                                       :: last_tick = -1
     122             :    END TYPE fft_scratch_type
     123             : 
     124             :    TYPE fft_scratch_pool_type
     125             :       TYPE(fft_scratch_type), POINTER       :: fft_scratch => NULL()
     126             :       TYPE(fft_scratch_pool_type), POINTER  :: fft_scratch_next => NULL()
     127             :    END TYPE fft_scratch_pool_type
     128             : 
     129             :    INTEGER, SAVE                              :: init_fft_pool = 0
     130             :    ! the clock for fft pool. Allows to identify the least recently used scratch
     131             :    INTEGER, SAVE                              :: tick_fft_pool = 0
     132             :    ! limit the number of scratch pools to fft_pool_scratch_limit.
     133             :    INTEGER, SAVE                              :: fft_pool_scratch_limit = 15
     134             :    TYPE(fft_scratch_pool_type), POINTER, SAVE :: fft_scratch_first
     135             :    ! END of types for the pool of scratch data needed in FFT routines
     136             : 
     137             :    PRIVATE
     138             :    PUBLIC :: init_fft, fft3d, finalize_fft
     139             :    PUBLIC :: fft_alloc, fft_dealloc
     140             :    PUBLIC :: init_fft_scratch_pool
     141             :    PUBLIC :: fft_radix_operations, fft_fw1d
     142             :    PUBLIC :: FWFFT, BWFFT
     143             :    PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
     144             :    PUBLIC :: FFT_RADIX_NEXT_ODD
     145             : 
     146             :    INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
     147             :    INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
     148             :    INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
     149             :    INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
     150             : 
     151             :    REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
     152             : 
     153             :    ! these saved variables are FFT globals
     154             :    INTEGER, SAVE :: fft_type = 0
     155             :    LOGICAL, SAVE :: alltoall_sgl = .FALSE.
     156             :    LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
     157             :    INTEGER, SAVE :: fft_plan_style = 1
     158             : 
     159             :    ! these are only needed for pw_gpu (-D__OFFLOAD)
     160             :    PUBLIC :: get_fft_scratch, release_fft_scratch
     161             :    PUBLIC :: cube_transpose_1, cube_transpose_2
     162             :    PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
     163             :    PUBLIC :: fft_scratch_sizes, fft_scratch_type
     164             :    PUBLIC :: fft_type, fft_plan_style
     165             : 
     166             :    INTERFACE fft3d
     167             :       MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
     168             :    END INTERFACE
     169             : 
     170             : ! **************************************************************************************************
     171             : 
     172             : CONTAINS
     173             : 
     174             : ! **************************************************************************************************
     175             : !> \brief ...
     176             : !> \param fftlib ...
     177             : !> \param alltoall ...
     178             : !> \param fftsg_sizes ...
     179             : !> \param pool_limit ...
     180             : !> \param wisdom_file ...
     181             : !> \param plan_style ...
     182             : !> \author JGH
     183             : ! **************************************************************************************************
     184        9143 :    SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
     185             :                        plan_style)
     186             : 
     187             :       CHARACTER(LEN=*), INTENT(IN)                       :: fftlib
     188             :       LOGICAL, INTENT(IN)                                :: alltoall, fftsg_sizes
     189             :       INTEGER, INTENT(IN)                                :: pool_limit
     190             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     191             :       INTEGER, INTENT(IN)                                :: plan_style
     192             : 
     193        9143 :       use_fftsg_sizes = fftsg_sizes
     194        9143 :       alltoall_sgl = alltoall
     195        9143 :       fft_pool_scratch_limit = pool_limit
     196        9143 :       fft_type = fft_library(fftlib)
     197        9143 :       fft_plan_style = plan_style
     198             : 
     199        9143 :       IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
     200             : 
     201        9143 :       CALL fft_do_init(fft_type, wisdom_file)
     202             : 
     203             :       ! setup the FFT scratch pool, if one is associated, clear first
     204        9143 :       CALL init_fft_scratch_pool() ! CALLs release_fft_scratch_pool()
     205             : 
     206        9143 :    END SUBROUTINE init_fft
     207             : 
     208             : ! **************************************************************************************************
     209             : !> \brief does whatever is needed to finalize the current fft setup
     210             : !> \param para_env ...
     211             : !> \param wisdom_file ...
     212             : !> \par History
     213             : !>      10.2007 created [Joost VandeVondele]
     214             : ! **************************************************************************************************
     215        8933 :    SUBROUTINE finalize_fft(para_env, wisdom_file)
     216             :       CLASS(mp_comm_type)                    :: para_env
     217             :       CHARACTER(LEN=*), INTENT(IN)           :: wisdom_file
     218             : 
     219             : ! release the FFT scratch pool
     220             : 
     221        8933 :       CALL release_fft_scratch_pool()
     222             : 
     223             :       ! finalize fft libs
     224             : 
     225        8933 :       CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
     226             : 
     227        8933 :    END SUBROUTINE finalize_fft
     228             : 
     229             : ! **************************************************************************************************
     230             : !> \brief Determine the allowed lengths of FFT's   '''
     231             : !> \param radix_in ...
     232             : !> \param radix_out ...
     233             : !> \param operation ...
     234             : !> \par History
     235             : !>      new library structure (JGH)
     236             : !> \author Ari Seitsonen
     237             : ! **************************************************************************************************
     238      110726 :    SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
     239             : 
     240             :       INTEGER, INTENT(IN)                                :: radix_in
     241             :       INTEGER, INTENT(OUT)                               :: radix_out
     242             :       INTEGER, INTENT(IN)                                :: operation
     243             : 
     244             :       INTEGER, PARAMETER                                 :: fft_type_sg = 1
     245             : 
     246             :       INTEGER                                            :: i, iloc, ldata
     247      110726 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: DATA
     248             : 
     249      110726 :       ldata = 1024
     250      110726 :       ALLOCATE (DATA(ldata))
     251   113494150 :       DATA = -1
     252             : 
     253             :       ! if the user wants to use fftsg sizes go for it
     254      110726 :       IF (use_fftsg_sizes) THEN
     255      110222 :          CALL fft_get_lengths(fft_type_sg, DATA, ldata)
     256             :       ELSE
     257         504 :          CALL fft_get_lengths(fft_type, DATA, ldata)
     258             :       END IF
     259             : 
     260      110726 :       iloc = 0
     261     1222242 :       DO i = 1, ldata
     262     1222242 :          IF (DATA(i) == radix_in) THEN
     263             :             iloc = i
     264             :             EXIT
     265             :          ELSE
     266     1192150 :             IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     267             :                CYCLE
     268     1192150 :             ELSE IF (DATA(i) > radix_in) THEN
     269             :                iloc = i
     270             :                EXIT
     271             :             END IF
     272             :          END IF
     273             :       END DO
     274             : 
     275      110726 :       IF (iloc == 0) THEN
     276           0 :          CPABORT("Index to radix array not found.")
     277             :       END IF
     278             : 
     279      110726 :       IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     280           0 :          IF (DATA(iloc) == radix_in) THEN
     281           0 :             radix_out = FFT_RADIX_ALLOWED
     282             :          ELSE
     283           0 :             radix_out = FFT_RADIX_DISALLOWED
     284             :          END IF
     285             : 
     286      110726 :       ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
     287         270 :          IF (DATA(iloc) == radix_in) THEN
     288         102 :             radix_out = DATA(iloc)
     289             :          ELSE
     290         168 :             IF (ABS(DATA(iloc - 1) - radix_in) <= &
     291             :                 ABS(DATA(iloc) - radix_in)) THEN
     292         162 :                radix_out = DATA(iloc - 1)
     293             :             ELSE
     294           6 :                radix_out = DATA(iloc)
     295             :             END IF
     296             :          END IF
     297             : 
     298      110456 :       ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
     299      108884 :          radix_out = DATA(iloc)
     300             : 
     301        1572 :       ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
     302        3562 :          DO i = iloc, ldata
     303        3562 :             IF (MOD(DATA(i), 2) == 1) THEN
     304        1572 :                radix_out = DATA(i)
     305        1572 :                EXIT
     306             :             END IF
     307             :          END DO
     308        1572 :          IF (MOD(radix_out, 2) == 0) THEN
     309           0 :             CPABORT("No odd radix found.")
     310             :          END IF
     311             : 
     312             :       ELSE
     313           0 :          CPABORT("Disallowed radix operation.")
     314             :       END IF
     315             : 
     316      110726 :       DEALLOCATE (DATA)
     317             : 
     318      110726 :    END SUBROUTINE fft_radix_operations
     319             : 
     320             : ! **************************************************************************************************
     321             : !> \brief Performs m 1-D forward FFT-s of size n.
     322             : !> \param n      size of the FFT
     323             : !> \param m      number of FFT-s
     324             : !> \param trans  shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
     325             : !> \param zin    input array
     326             : !> \param zout   output array
     327             : !> \param scale  scaling factor
     328             : !> \param stat   status of the operation, non-zero code indicates an error
     329             : ! **************************************************************************************************
     330         208 :    SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
     331             :       INTEGER, INTENT(in)                                :: n, m
     332             :       LOGICAL, INTENT(in)                                :: trans
     333             :       COMPLEX(kind=dp), DIMENSION(*), INTENT(inout)      :: zin, zout
     334             :       REAL(kind=dp), INTENT(in)                          :: scale
     335             :       INTEGER, INTENT(out)                               :: stat
     336             : 
     337             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft_fw1d'
     338             : 
     339             :       INTEGER                                            :: handle
     340             :       TYPE(fft_plan_type)                                :: fft_plan
     341             : 
     342         208 :       CALL timeset(routineN, handle)
     343             : 
     344         208 :       IF (fft_type == 3) THEN
     345         208 :          CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
     346         208 :          CALL fft_1dm(fft_plan, zin, zout, scale, stat)
     347         208 :          CALL fft_destroy_plan(fft_plan)
     348             :       ELSE
     349             :          CALL cp_warn(__LOCATION__, &
     350           0 :                       "FFT library in use cannot handle transformation of an arbitrary length.")
     351           0 :          stat = 1
     352             :       END IF
     353             : 
     354         208 :       CALL timestop(handle)
     355         832 :    END SUBROUTINE fft_fw1d
     356             : 
     357             : ! **************************************************************************************************
     358             : !> \brief Calls the 3D-FFT function from the initialized library
     359             : !> \param fsign ...
     360             : !> \param n ...
     361             : !> \param zin ...
     362             : !> \param zout ...
     363             : !> \param status ...
     364             : !> \param debug ...
     365             : !> \par History
     366             : !>      none
     367             : !> \author JGH
     368             : ! **************************************************************************************************
     369      635466 :    SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
     370             : 
     371             :       INTEGER, INTENT(IN)                                :: fsign
     372             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: n
     373             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     374             :          INTENT(INOUT)                                   :: zin
     375             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     376             :          INTENT(INOUT), OPTIONAL, TARGET                 :: zout
     377             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     378             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     379             : 
     380             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_s'
     381             : 
     382             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     383      635466 :          POINTER                                         :: zoptr
     384             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET       :: zdum
     385             :       INTEGER                                            :: handle, ld(3), lo(3), output_unit, sign, &
     386             :                                                             stat
     387             :       LOGICAL                                            :: fft_in_place, test
     388             :       REAL(KIND=dp)                                      :: in_sum, norm, out_sum
     389             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     390             : 
     391      635466 :       CALL timeset(routineN, handle)
     392      635466 :       output_unit = cp_logger_get_default_io_unit()
     393             : 
     394      635466 :       IF (fsign == FWFFT) THEN
     395     1205800 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     396      334016 :       ELSE IF (fsign == BWFFT) THEN
     397      334016 :          norm = 1.0_dp
     398             :       ELSE
     399           0 :          CPABORT("Unknown FFT direction!")
     400             :       END IF
     401             : 
     402      635466 :       IF (PRESENT(debug)) THEN
     403      598617 :          test = debug
     404             :       ELSE
     405             :          test = .FALSE.
     406             :       END IF
     407             : 
     408      635466 :       IF (PRESENT(zout)) THEN
     409             :          fft_in_place = .FALSE.
     410             :       ELSE
     411      635458 :          fft_in_place = .TRUE.
     412             :       END IF
     413             : 
     414      635466 :       IF (test) THEN
     415           0 :          in_sum = SUM(ABS(zin))
     416             :       END IF
     417             : 
     418      635466 :       ld(1) = SIZE(zin, 1)
     419      635466 :       ld(2) = SIZE(zin, 2)
     420      635466 :       ld(3) = SIZE(zin, 3)
     421             : 
     422      635466 :       IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
     423           0 :          CPABORT("Size and dimension (zin) have to be the same.")
     424             :       END IF
     425             : 
     426      635466 :       sign = fsign
     427      635466 :       CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
     428             : 
     429      635466 :       IF (fft_in_place) THEN
     430      635458 :          zoptr => zdum
     431      635458 :          IF (fsign == FWFFT) THEN
     432      301446 :             CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
     433             :          ELSE
     434      334012 :             CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
     435             :          END IF
     436             :       ELSE
     437           8 :          IF (fsign == FWFFT) THEN
     438           4 :             CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
     439             :          ELSE
     440           4 :             CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
     441             :          END IF
     442             :       END IF
     443             : 
     444      635466 :       CALL release_fft_scratch(fft_scratch)
     445             : 
     446      635466 :       IF (PRESENT(zout)) THEN
     447           8 :          lo(1) = SIZE(zout, 1)
     448           8 :          lo(2) = SIZE(zout, 2)
     449           8 :          lo(3) = SIZE(zout, 3)
     450           8 :          IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
     451           0 :             CPABORT("Size and dimension (zout) have to be the same.")
     452             :          END IF
     453             :       END IF
     454             : 
     455      635466 :       IF (PRESENT(status)) THEN
     456        9131 :          status = stat
     457             :       END IF
     458             : 
     459      635466 :       IF (test .AND. output_unit > 0) THEN
     460           0 :          IF (PRESENT(zout)) THEN
     461           0 :             out_sum = SUM(ABS(zout))
     462           0 :             WRITE (output_unit, '(A)') "  Out of place 3D FFT (local)  : fft3d_s"
     463           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     464           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input array dimensions ", ld
     465           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Output array dimensions ", lo
     466           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     467           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     468             :          ELSE
     469           0 :             out_sum = SUM(ABS(zin))
     470           0 :             WRITE (output_unit, '(A)') "  In place 3D FFT (local)  : fft3d_s"
     471           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     472           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input/output array dimensions ", ld
     473           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     474           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     475             :          END IF
     476             :       END IF
     477             : 
     478      635466 :       CALL timestop(handle)
     479             : 
     480      635466 :    END SUBROUTINE fft3d_s
     481             : 
     482             : ! **************************************************************************************************
     483             : !> \brief ...
     484             : !> \param fsign ...
     485             : !> \param n ...
     486             : !> \param cin ...
     487             : !> \param gin ...
     488             : !> \param rs_group ...
     489             : !> \param yzp ...
     490             : !> \param nyzray ...
     491             : !> \param bo ...
     492             : !> \param status ...
     493             : !> \param debug ...
     494             : ! **************************************************************************************************
     495     2663210 :    SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
     496     2663210 :                        bo, status, debug)
     497             : 
     498             :       INTEGER, INTENT(IN)                                :: fsign
     499             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: n
     500             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     501             :          INTENT(INOUT)                                   :: cin
     502             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     503             :          INTENT(INOUT)                                   :: gin
     504             :       TYPE(mp_cart_type), INTENT(IN)                     :: rs_group
     505             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
     506             :          INTENT(IN)                                      :: yzp
     507             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nyzray
     508             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     509             :          INTENT(IN)                                      :: bo
     510             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     511             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     512             : 
     513             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_ps'
     514             : 
     515             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     516     2663210 :          POINTER                                         :: pbuf, qbuf, rbuf, sbuf
     517             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     518     2663210 :          POINTER                                         :: tbuf
     519             :       INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
     520             :          nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
     521     2663210 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: p2p
     522             :       LOGICAL                                            :: test
     523             :       REAL(KIND=dp)                                      :: norm, sum_data
     524    18642470 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     525             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     526             : 
     527     2663210 :       CALL timeset(routineN, handle)
     528     2663210 :       output_unit = cp_logger_get_default_io_unit()
     529             : 
     530     2663210 :       IF (PRESENT(debug)) THEN
     531     2663210 :          test = debug
     532             :       ELSE
     533             :          test = .FALSE.
     534             :       END IF
     535             : 
     536     2663210 :       g_pos = rs_group%mepos
     537     2663210 :       numtask = rs_group%num_pe
     538     7989630 :       r_dim = rs_group%num_pe_cart
     539     7989630 :       r_pos = rs_group%mepos_cart
     540             : 
     541     2663210 :       IF (fsign == FWFFT) THEN
     542     5266936 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     543     1346476 :       ELSE IF (fsign == BWFFT) THEN
     544     1346476 :          norm = 1.0_dp
     545             :       ELSE
     546           0 :          CPABORT("Unknown FFT direction!")
     547             :       END IF
     548             : 
     549     2663210 :       sign = fsign
     550             : 
     551     2663210 :       lg = SIZE(gin, 1)
     552     2663210 :       mg = SIZE(gin, 2)
     553             : 
     554     2663210 :       nx = SIZE(cin, 1)
     555     2663210 :       ny = SIZE(cin, 2)
     556     2663210 :       nz = SIZE(cin, 3)
     557             : 
     558     2663210 :       IF (mg == 0) THEN
     559             :          mmax = 1
     560             :       ELSE
     561     2663210 :          mmax = mg
     562             :       END IF
     563     2663210 :       lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
     564             : 
     565     7989630 :       ALLOCATE (p2p(0:numtask - 1))
     566             : 
     567     2663210 :       CALL rs_group%rank_compare(rs_group, p2p)
     568             : 
     569     2663210 :       rp = p2p(g_pos)
     570     2663210 :       mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
     571     2663210 :       my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
     572     2663210 :       mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
     573     2663210 :       mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
     574             : 
     575     7989630 :       n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
     576     7989630 :       n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
     577     2663210 :       nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
     578     7989630 :       nmax = MAX(nmax, n1*MAXVAL(nyzray))
     579     7989630 :       n1 = MAXVAL(bo(2, 1, :, 2))
     580     7989630 :       n2 = MAXVAL(bo(2, 3, :, 2))
     581             : 
     582     2663210 :       fft_scratch_size%nx = nx
     583     2663210 :       fft_scratch_size%ny = ny
     584     2663210 :       fft_scratch_size%nz = nz
     585     2663210 :       fft_scratch_size%lmax = lmax
     586     2663210 :       fft_scratch_size%mmax = mmax
     587     2663210 :       fft_scratch_size%mx1 = mx1
     588     2663210 :       fft_scratch_size%mx2 = mx2
     589     2663210 :       fft_scratch_size%my1 = my1
     590     2663210 :       fft_scratch_size%mz2 = mz2
     591     2663210 :       fft_scratch_size%lg = lg
     592     2663210 :       fft_scratch_size%mg = mg
     593     2663210 :       fft_scratch_size%nbx = n1
     594     2663210 :       fft_scratch_size%nbz = n2
     595     7989630 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
     596     7989630 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
     597     7989630 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
     598     2663210 :       fft_scratch_size%mcz1 = mcz1
     599     2663210 :       fft_scratch_size%mcx2 = mcx2
     600     2663210 :       fft_scratch_size%mcz2 = mcz2
     601     2663210 :       fft_scratch_size%nmax = nmax
     602     7989630 :       fft_scratch_size%nmray = MAXVAL(nyzray)
     603     2663210 :       fft_scratch_size%nyzray = nyzray(g_pos)
     604     2663210 :       fft_scratch_size%rs_group = rs_group
     605     7989630 :       fft_scratch_size%g_pos = g_pos
     606     7989630 :       fft_scratch_size%r_pos = r_pos
     607     7989630 :       fft_scratch_size%r_dim = r_dim
     608     2663210 :       fft_scratch_size%numtask = numtask
     609             : 
     610     2663210 :       IF (test) THEN
     611           8 :          IF (g_pos == 0 .AND. output_unit > 0) THEN
     612           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_ps"
     613           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     614           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg, mg
     615           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", nx, ny, nz
     616             :          END IF
     617             :       END IF
     618             : 
     619     2663210 :       IF (r_dim(2) > 1) THEN
     620             : 
     621             :          !
     622             :          ! real space is distributed over x and y coordinate
     623             :          ! we have two stages of communication
     624             :          !
     625             : 
     626           0 :          IF (r_dim(1) == 1) THEN
     627           0 :             CPABORT("This processor distribution is not supported.")
     628             :          END IF
     629           0 :          CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
     630             : 
     631           0 :          IF (sign == FWFFT) THEN
     632             :             ! cin -> gin
     633             : 
     634           0 :             IF (test) THEN
     635           0 :                sum_data = ABS(SUM(cin))
     636           0 :                CALL rs_group%sum(sum_data)
     637           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     638           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     639           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     640           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     641           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     642           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     643             :                END IF
     644             :             END IF
     645             : 
     646           0 :             pbuf => fft_scratch%p1buf
     647           0 :             qbuf => fft_scratch%p2buf
     648             : 
     649             :             ! FFT along z
     650           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
     651             : 
     652           0 :             rbuf => fft_scratch%p3buf
     653             : 
     654           0 :             IF (test) THEN
     655           0 :                sum_data = ABS(SUM(qbuf))
     656           0 :                CALL rs_group%sum(sum_data)
     657           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     658           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
     659             :                END IF
     660             :             END IF
     661             : 
     662             :             ! Exchange data ( transpose of matrix )
     663           0 :             CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
     664             : 
     665           0 :             IF (test) THEN
     666           0 :                sum_data = ABS(SUM(rbuf))
     667           0 :                CALL rs_group%sum(sum_data)
     668           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     669           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) T", sum_data
     670             :                END IF
     671             :             END IF
     672             : 
     673           0 :             pbuf => fft_scratch%p4buf
     674             : 
     675             :             ! FFT along y
     676           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
     677             : 
     678           0 :             qbuf => fft_scratch%p5buf
     679             : 
     680           0 :             IF (test) THEN
     681           0 :                sum_data = ABS(SUM(pbuf))
     682           0 :                CALL rs_group%sum(sum_data)
     683           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     684           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) TS", sum_data
     685             :                END IF
     686             :             END IF
     687             : 
     688             :             ! Exchange data ( transpose of matrix ) and sort
     689             :             CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     690           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     691             : 
     692           0 :             IF (test) THEN
     693           0 :                sum_data = ABS(SUM(qbuf))
     694           0 :                CALL rs_group%sum(sum_data)
     695           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     696           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) TS", sum_data
     697             :                END IF
     698             :             END IF
     699             : 
     700             :             ! FFT along x
     701           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
     702             : 
     703           0 :             IF (test) THEN
     704           0 :                sum_data = ABS(SUM(gin))
     705           0 :                CALL rs_group%sum(sum_data)
     706           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     707           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     708             :                END IF
     709             :             END IF
     710             : 
     711           0 :          ELSE IF (sign == BWFFT) THEN
     712             :             ! gin -> cin
     713             : 
     714           0 :             IF (test) THEN
     715           0 :                sum_data = ABS(SUM(gin))
     716           0 :                CALL rs_group%sum(sum_data)
     717           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     718           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     719           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     720           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     721           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     722           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     723             :                END IF
     724             :             END IF
     725             : 
     726           0 :             pbuf => fft_scratch%p7buf
     727             : 
     728             :             ! FFT along x
     729           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
     730             : 
     731           0 :             qbuf => fft_scratch%p4buf
     732             : 
     733           0 :             IF (test) THEN
     734           0 :                sum_data = ABS(SUM(pbuf))
     735           0 :                CALL rs_group%sum(sum_data)
     736           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     737           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     738             :                END IF
     739             :             END IF
     740             : 
     741             :             ! Exchange data ( transpose of matrix ) and sort
     742             :             CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     743           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     744             : 
     745           0 :             IF (test) THEN
     746           0 :                sum_data = ABS(SUM(qbuf))
     747           0 :                CALL rs_group%sum(sum_data)
     748           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     749           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     750             :                END IF
     751             :             END IF
     752             : 
     753           0 :             rbuf => fft_scratch%p3buf
     754             : 
     755             :             ! FFT along y
     756           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
     757             : 
     758           0 :             pbuf => fft_scratch%p2buf
     759             : 
     760           0 :             IF (test) THEN
     761           0 :                sum_data = ABS(SUM(rbuf))
     762           0 :                CALL rs_group%sum(sum_data)
     763           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     764           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
     765             :                END IF
     766             :             END IF
     767             : 
     768             :             ! Exchange data ( transpose of matrix )
     769           0 :             CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
     770             : 
     771           0 :             IF (test) THEN
     772           0 :                sum_data = ABS(SUM(pbuf))
     773           0 :                CALL rs_group%sum(sum_data)
     774           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     775           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) T", sum_data
     776             :                END IF
     777             :             END IF
     778             : 
     779           0 :             qbuf => fft_scratch%p1buf
     780             : 
     781             :             ! FFT along z
     782           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
     783             : 
     784           0 :             IF (test) THEN
     785           0 :                sum_data = ABS(SUM(cin))
     786           0 :                CALL rs_group%sum(sum_data)
     787           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     788           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     789             :                END IF
     790             :             END IF
     791             : 
     792             :          ELSE
     793             : 
     794           0 :             CPABORT("Illegal fsign parameter.")
     795             : 
     796             :          END IF
     797             : 
     798           0 :          CALL release_fft_scratch(fft_scratch)
     799             : 
     800             :       ELSE
     801             : 
     802             :          !
     803             :          ! real space is only distributed over x coordinate
     804             :          ! we have one stage of communication, after the transform of
     805             :          ! direction x
     806             :          !
     807             : 
     808     2663210 :          CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
     809             : 
     810     2663210 :          sbuf => fft_scratch%r1buf
     811     2663210 :          tbuf => fft_scratch%tbuf
     812             : 
     813 80737581213 :          sbuf = z_zero
     814 81119098863 :          tbuf = z_zero
     815             : 
     816     2663210 :          IF (sign == FWFFT) THEN
     817             :             ! cin -> gin
     818             : 
     819     1316734 :             IF (test) THEN
     820        9284 :                sum_data = ABS(SUM(cin))
     821           4 :                CALL rs_group%sum(sum_data)
     822           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     823           2 :                   WRITE (output_unit, '(A)') "     One step communication algorithm "
     824           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     825           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     826           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     827             :                END IF
     828             :             END IF
     829             : 
     830             :             ! FFT along y and z
     831     1316734 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
     832     1316734 :             CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
     833             : 
     834     1316734 :             IF (test) THEN
     835        8740 :                sum_data = ABS(SUM(tbuf))
     836           4 :                CALL rs_group%sum(sum_data)
     837           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     838           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     839             :                END IF
     840             :             END IF
     841             : 
     842             :             ! Exchange data ( transpose of matrix ) and sort
     843             :             CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     844     1316734 :                          bo(:, :, :, 2), sbuf, fft_scratch)
     845             : 
     846     1316734 :             IF (test) THEN
     847        8776 :                sum_data = ABS(SUM(sbuf))
     848           4 :                CALL rs_group%sum(sum_data)
     849           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     850           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     851             :                END IF
     852             :             END IF
     853             :             ! FFT along x
     854     1316734 :             CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
     855             : 
     856     1316734 :             IF (test) THEN
     857        8708 :                sum_data = ABS(SUM(gin))
     858           4 :                CALL rs_group%sum(sum_data)
     859           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     860           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     861             :                END IF
     862             :             END IF
     863             : 
     864     1346476 :          ELSE IF (sign == BWFFT) THEN
     865             :             ! gin -> cin
     866             : 
     867     1346476 :             IF (test) THEN
     868        8708 :                sum_data = ABS(SUM(gin))
     869           4 :                CALL rs_group%sum(sum_data)
     870           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     871           2 :                   WRITE (output_unit, '(A)') "  One step communication algorithm "
     872           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     873           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     874           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     875             :                END IF
     876             :             END IF
     877             : 
     878             :             ! FFT along x
     879     1346476 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
     880             : 
     881     1346476 :             IF (test) THEN
     882        8776 :                sum_data = ABS(SUM(sbuf))
     883           4 :                CALL rs_group%sum(sum_data)
     884           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     885           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     886             :                END IF
     887             :             END IF
     888             : 
     889             :             ! Exchange data ( transpose of matrix ) and sort
     890             :             CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     891     1346476 :                          bo(:, :, :, 2), tbuf, fft_scratch)
     892             : 
     893     1346476 :             IF (test) THEN
     894        8740 :                sum_data = ABS(SUM(tbuf))
     895           4 :                CALL rs_group%sum(sum_data)
     896           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     897           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     898             :                END IF
     899             :             END IF
     900             : 
     901             :             ! FFT along y and z
     902     1346476 :             CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
     903     1346476 :             CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
     904             : 
     905     1346476 :             IF (test) THEN
     906        9284 :                sum_data = ABS(SUM(cin))
     907           4 :                CALL rs_group%sum(sum_data)
     908           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     909           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     910             :                END IF
     911             :             END IF
     912             :          ELSE
     913           0 :             CPABORT("Illegal fsign parameter.")
     914             :          END IF
     915             : 
     916     2663210 :          CALL release_fft_scratch(fft_scratch)
     917             : 
     918             :       END IF
     919             : 
     920     2663210 :       DEALLOCATE (p2p)
     921             : 
     922     2663210 :       IF (PRESENT(status)) THEN
     923           0 :          status = stat
     924             :       END IF
     925     2663210 :       CALL timestop(handle)
     926             : 
     927     5326420 :    END SUBROUTINE fft3d_ps
     928             : 
     929             : ! **************************************************************************************************
     930             : !> \brief ...
     931             : !> \param fsign ...
     932             : !> \param n ...
     933             : !> \param zin ...
     934             : !> \param gin ...
     935             : !> \param group ...
     936             : !> \param bo ...
     937             : !> \param status ...
     938             : !> \param debug ...
     939             : ! **************************************************************************************************
     940         208 :    SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
     941             : 
     942             :       INTEGER, INTENT(IN)                                :: fsign
     943             :       INTEGER, DIMENSION(3), INTENT(IN)                  :: n
     944             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     945             :          INTENT(INOUT)                                   :: zin
     946             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     947             :          INTENT(INOUT)                                   :: gin
     948             :       TYPE(mp_cart_type), INTENT(IN)                     :: group
     949             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     950             :          INTENT(IN)                                      :: bo
     951             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     952             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     953             : 
     954             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_pb'
     955             : 
     956             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     957         208 :          POINTER                                         :: abuf, bbuf
     958             :       INTEGER                                            :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
     959             :                                                             mcz2, mx1, mx2, mx3, my1, my2, my3, &
     960             :                                                             my_pos, mz1, mz2, mz3, output_unit, &
     961             :                                                             sign, stat
     962             :       INTEGER, DIMENSION(2)                              :: dim
     963             :       LOGICAL                                            :: test
     964             :       REAL(KIND=dp)                                      :: norm, sum_data
     965        1456 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     966             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     967             : 
     968             : !------------------------------------------------------------------------------
     969             : ! "Real Space"  1) xyZ      or      1) xYZ
     970             : !               2) xYz      or         not used
     971             : ! "G Space"     3) Xyz      or      3) XYz
     972             : !
     973             : ! There is one communicator (2-dimensional) for all distributions
     974             : ! np = n1 * n2, where np is the total number of processors
     975             : ! If n2 = 1, we have the second case and only one transpose step is needed
     976             : !
     977             : ! Assignment of dimensions to axis for different steps
     978             : ! First case: 1) n1=x; n2=y
     979             : !             2) n1=x; n2=z
     980             : !             3) n1=y; n2=z
     981             : ! Second case 1) n1=x
     982             : !             3) n1=z
     983             : !
     984             : ! The more general case with two communicators for the initial and final
     985             : ! distribution is not covered.
     986             : !------------------------------------------------------------------------------
     987             : 
     988         208 :       CALL timeset(routineN, handle)
     989         208 :       output_unit = cp_logger_get_default_io_unit()
     990             : 
     991         624 :       dim = group%num_pe_cart
     992         208 :       my_pos = group%mepos
     993             : 
     994         208 :       IF (PRESENT(debug)) THEN
     995         208 :          test = debug
     996             :       ELSE
     997             :          test = .FALSE.
     998             :       END IF
     999             : 
    1000         208 :       IF (fsign == FWFFT) THEN
    1001         416 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
    1002         104 :       ELSE IF (fsign == BWFFT) THEN
    1003         104 :          norm = 1.0_dp
    1004             :       ELSE
    1005           0 :          CPABORT("Unknown FFT direction!")
    1006             :       END IF
    1007             : 
    1008         208 :       sign = fsign
    1009             : 
    1010         208 :       IF (test) THEN
    1011           8 :          lg(1) = SIZE(gin, 1)
    1012           8 :          lg(2) = SIZE(gin, 2)
    1013           8 :          lz(1) = SIZE(zin, 1)
    1014           8 :          lz(2) = SIZE(zin, 2)
    1015           8 :          lz(3) = SIZE(zin, 3)
    1016           8 :          IF (my_pos == 0 .AND. output_unit > 0) THEN
    1017           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_pb"
    1018           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
    1019           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg
    1020           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", lz
    1021             :          END IF
    1022             :       END IF
    1023             : 
    1024         208 :       mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
    1025         208 :       my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
    1026         208 :       mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
    1027         208 :       mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
    1028         208 :       my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
    1029         208 :       mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
    1030         208 :       mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
    1031         208 :       my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
    1032         208 :       mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
    1033         208 :       fft_scratch_size%mx1 = mx1
    1034         208 :       fft_scratch_size%mx2 = mx2
    1035         208 :       fft_scratch_size%mx3 = mx3
    1036         208 :       fft_scratch_size%my1 = my1
    1037         208 :       fft_scratch_size%my2 = my2
    1038         208 :       fft_scratch_size%my3 = my3
    1039         208 :       fft_scratch_size%mz1 = mz1
    1040         208 :       fft_scratch_size%mz2 = mz2
    1041         208 :       fft_scratch_size%mz3 = mz3
    1042         624 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
    1043         624 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
    1044         624 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
    1045         624 :       mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
    1046         208 :       fft_scratch_size%mcz1 = mcz1
    1047         208 :       fft_scratch_size%mcx2 = mcx2
    1048         208 :       fft_scratch_size%mcz2 = mcz2
    1049         208 :       fft_scratch_size%mcy3 = mcy3
    1050         208 :       fft_scratch_size%rs_group = group
    1051         624 :       fft_scratch_size%g_pos = my_pos
    1052         208 :       fft_scratch_size%numtask = DIM(1)*DIM(2)
    1053             : 
    1054         208 :       IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
    1055             : 
    1056             :          !
    1057             :          ! First case; two stages of communication
    1058             :          !
    1059             : 
    1060           0 :          CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
    1061             : 
    1062           0 :          IF (sign == FWFFT) THEN
    1063             :             ! Stage 1 -> 3
    1064             : 
    1065           0 :             bbuf => fft_scratch%a2buf
    1066             : 
    1067           0 :             IF (test) THEN
    1068           0 :                sum_data = ABS(SUM(zin))
    1069           0 :                CALL group%sum(sum_data)
    1070           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1071           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1072           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1073           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1074             :                END IF
    1075             :             END IF
    1076             : 
    1077             :             ! FFT along z
    1078           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
    1079             : 
    1080           0 :             abuf => fft_scratch%a3buf
    1081             : 
    1082           0 :             IF (test) THEN
    1083           0 :                sum_data = ABS(SUM(bbuf))
    1084           0 :                CALL group%sum(sum_data)
    1085           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1086           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1087             :                END IF
    1088             :             END IF
    1089             : 
    1090           0 :             CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
    1091             : 
    1092           0 :             bbuf => fft_scratch%a4buf
    1093             : 
    1094           0 :             IF (test) THEN
    1095           0 :                sum_data = ABS(SUM(abuf))
    1096           0 :                CALL group%sum(sum_data)
    1097           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1098           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1099           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1100             :                END IF
    1101             :             END IF
    1102             : 
    1103             :             ! FFT along y
    1104           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1105             : 
    1106           0 :             abuf => fft_scratch%a5buf
    1107             : 
    1108           0 :             IF (test) THEN
    1109           0 :                sum_data = ABS(SUM(bbuf))
    1110           0 :                CALL group%sum(sum_data)
    1111           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1112           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1113             :                END IF
    1114             :             END IF
    1115             : 
    1116           0 :             CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
    1117             : 
    1118           0 :             IF (test) THEN
    1119           0 :                sum_data = ABS(SUM(abuf))
    1120           0 :                CALL group%sum(sum_data)
    1121           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1122           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1123           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1124             :                END IF
    1125             :             END IF
    1126             : 
    1127             :             ! FFT along x
    1128           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1129             : 
    1130           0 :             IF (test) THEN
    1131           0 :                sum_data = ABS(SUM(gin))
    1132           0 :                CALL group%sum(sum_data)
    1133           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1134           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1135             :                END IF
    1136             :             END IF
    1137             : 
    1138           0 :          ELSEIF (sign == BWFFT) THEN
    1139             :             ! Stage 3 -> 1
    1140             : 
    1141           0 :             bbuf => fft_scratch%a5buf
    1142             : 
    1143           0 :             IF (test) THEN
    1144           0 :                sum_data = ABS(SUM(gin))
    1145           0 :                CALL group%sum(sum_data)
    1146           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1147           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1148           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1149           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1150             :                END IF
    1151             :             END IF
    1152             : 
    1153             :             ! FFT along x
    1154           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1155             : 
    1156           0 :             abuf => fft_scratch%a4buf
    1157             : 
    1158           0 :             IF (test) THEN
    1159           0 :                sum_data = ABS(SUM(bbuf))
    1160           0 :                CALL group%sum(sum_data)
    1161           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1162           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1163             :                END IF
    1164             :             END IF
    1165             : 
    1166           0 :             CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
    1167             : 
    1168           0 :             bbuf => fft_scratch%a3buf
    1169             : 
    1170           0 :             IF (test) THEN
    1171           0 :                sum_data = ABS(SUM(abuf))
    1172           0 :                CALL group%sum(sum_data)
    1173           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1174           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1175           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1176             :                END IF
    1177             :             END IF
    1178             : 
    1179             :             ! FFT along y
    1180           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1181             : 
    1182           0 :             abuf => fft_scratch%a2buf
    1183             : 
    1184           0 :             IF (test) THEN
    1185           0 :                sum_data = ABS(SUM(bbuf))
    1186           0 :                CALL group%sum(sum_data)
    1187           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1188           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1189             :                END IF
    1190             :             END IF
    1191             : 
    1192           0 :             CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
    1193             : 
    1194           0 :             IF (test) THEN
    1195           0 :                sum_data = ABS(SUM(abuf))
    1196           0 :                CALL group%sum(sum_data)
    1197           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1198           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1199           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1200             :                END IF
    1201             :             END IF
    1202             : 
    1203             :             ! FFT along z
    1204           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
    1205             : 
    1206           0 :             IF (test) THEN
    1207           0 :                sum_data = ABS(SUM(zin))
    1208           0 :                CALL group%sum(sum_data)
    1209           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1210           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1211             :                END IF
    1212             :             END IF
    1213             : 
    1214             :          ELSE
    1215           0 :             CPABORT("Illegal fsign parameter.")
    1216             :          END IF
    1217             : 
    1218           0 :          CALL release_fft_scratch(fft_scratch)
    1219             : 
    1220         208 :       ELSEIF (DIM(2) == 1) THEN
    1221             : 
    1222             :          !
    1223             :          ! Second case; one stage of communication
    1224             :          !
    1225             : 
    1226         208 :          CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
    1227             : 
    1228         208 :          IF (sign == FWFFT) THEN
    1229             :             ! Stage 1 -> 3
    1230             : 
    1231         104 :             IF (test) THEN
    1232        9284 :                sum_data = ABS(SUM(zin))
    1233           4 :                CALL group%sum(sum_data)
    1234           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1235           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1236           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1237           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1238           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1239             :                END IF
    1240             :             END IF
    1241             : 
    1242         104 :             abuf => fft_scratch%a3buf
    1243         104 :             bbuf => fft_scratch%a4buf
    1244             :             ! FFT along z and y
    1245         104 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
    1246         104 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1247             : 
    1248         104 :             abuf => fft_scratch%a5buf
    1249             : 
    1250         104 :             IF (test) THEN
    1251        8708 :                sum_data = ABS(SUM(bbuf))
    1252           4 :                CALL group%sum(sum_data)
    1253           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1254           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1255             :                END IF
    1256             :             END IF
    1257             : 
    1258         104 :             CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
    1259             : 
    1260         104 :             IF (test) THEN
    1261        8260 :                sum_data = ABS(SUM(abuf))
    1262           4 :                CALL group%sum(sum_data)
    1263           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1264           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1265           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1266             :                END IF
    1267             :             END IF
    1268             : 
    1269             :             ! FFT along x
    1270         104 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1271             : 
    1272         104 :             IF (test) THEN
    1273        8708 :                sum_data = ABS(SUM(gin))
    1274           4 :                CALL group%sum(sum_data)
    1275           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1276           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1277             :                END IF
    1278             :             END IF
    1279             : 
    1280         104 :          ELSEIF (sign == BWFFT) THEN
    1281             :             ! Stage 3 -> 1
    1282             : 
    1283         104 :             IF (test) THEN
    1284        8708 :                sum_data = ABS(SUM(gin))
    1285           4 :                CALL group%sum(sum_data)
    1286           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1287           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1288           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1289           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1290             :                END IF
    1291             :             END IF
    1292             : 
    1293         104 :             bbuf => fft_scratch%a5buf
    1294             : 
    1295             :             ! FFT along x
    1296         104 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1297             : 
    1298         104 :             abuf => fft_scratch%a4buf
    1299             : 
    1300         104 :             IF (test) THEN
    1301        8260 :                sum_data = ABS(SUM(bbuf))
    1302           4 :                CALL group%sum(sum_data)
    1303           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1304           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1305             :                END IF
    1306             :             END IF
    1307             : 
    1308         104 :             CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
    1309             : 
    1310         104 :             bbuf => fft_scratch%a3buf
    1311             : 
    1312         104 :             IF (test) THEN
    1313        8708 :                sum_data = ABS(SUM(abuf))
    1314           4 :                CALL group%sum(sum_data)
    1315           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1316           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1317           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1318           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1319             :                END IF
    1320             :             END IF
    1321             : 
    1322             :             ! FFT along y
    1323         104 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1324             : 
    1325             :             ! FFT along z
    1326         104 :             CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
    1327             : 
    1328         104 :             IF (test) THEN
    1329        9284 :                sum_data = ABS(SUM(zin))
    1330           4 :                CALL group%sum(sum_data)
    1331           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1332           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1333             :                END IF
    1334             :             END IF
    1335             : 
    1336             :          ELSE
    1337           0 :             CPABORT("Illegal fsign parameter.")
    1338             :          END IF
    1339             : 
    1340         208 :          CALL release_fft_scratch(fft_scratch)
    1341             : 
    1342             :       ELSE
    1343             : 
    1344           0 :          CPABORT("Partition not implemented.")
    1345             : 
    1346             :       END IF
    1347             : 
    1348         208 :       IF (PRESENT(status)) THEN
    1349           0 :          status = stat
    1350             :       END IF
    1351             : 
    1352         208 :       CALL timestop(handle)
    1353             : 
    1354         416 :    END SUBROUTINE fft3d_pb
    1355             : 
    1356             : ! **************************************************************************************************
    1357             : !> \brief ...
    1358             : !> \param sb ...
    1359             : !> \param group ...
    1360             : !> \param my_pos ...
    1361             : !> \param p2p ...
    1362             : !> \param yzp ...
    1363             : !> \param nray ...
    1364             : !> \param bo ...
    1365             : !> \param tb ...
    1366             : !> \param fft_scratch ...
    1367             : !> \par History
    1368             : !>      15. Feb. 2006 : single precision all_to_all
    1369             : !> \author JGH (14-Jan-2001)
    1370             : ! **************************************************************************************************
    1371     1346476 :    SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1372             : 
    1373             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1374             :          INTENT(IN)                                      :: sb
    1375             : 
    1376             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1377             :       INTEGER, INTENT(IN)                                :: my_pos
    1378             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1379             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1380             :          INTENT(IN)                                      :: yzp
    1381             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1382             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1383             :          INTENT(IN)                                      :: bo
    1384             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1385             :          INTENT(INOUT)                                   :: tb
    1386             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1387             : 
    1388             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'x_to_yz'
    1389             : 
    1390             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1391     1346476 :          POINTER                                         :: rr
    1392             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1393     1346476 :          POINTER                                         :: ss, tt
    1394             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1395             :                                                             nm, np, nr, nx
    1396     1346476 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1397             : 
    1398     1346476 :       CALL timeset(routineN, handle)
    1399             : 
    1400     1346476 :       np = SIZE(p2p)
    1401     1346476 :       scount => fft_scratch%scount
    1402     1346476 :       rcount => fft_scratch%rcount
    1403     1346476 :       sdispl => fft_scratch%sdispl
    1404     1346476 :       rdispl => fft_scratch%rdispl
    1405             : 
    1406     1346476 :       IF (alltoall_sgl) THEN
    1407         230 :          ss => fft_scratch%ss
    1408         230 :          tt => fft_scratch%tt
    1409     3441099 :          ss(:, :) = CMPLX(sb(:, :), KIND=sp)
    1410     3353860 :          tt(:, :) = 0._sp
    1411             :       ELSE
    1412     1346246 :          rr => fft_scratch%rr
    1413             :       END IF
    1414             : 
    1415     1346476 :       mpr = p2p(my_pos)
    1416     4039428 :       nm = MAXVAL(nray(0:np - 1))
    1417     1346476 :       nr = nray(my_pos)
    1418             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1419             : !$OMP             PRIVATE(ix,nx), &
    1420     1346476 : !$OMP             SHARED(np,p2p,bo,nr,scount,sdispl)
    1421             :       DO ip = 0, np - 1
    1422             :          ix = p2p(ip)
    1423             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1424             :          scount(ip) = nr*nx
    1425             :          sdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1426             :       END DO
    1427             : !$OMP END PARALLEL DO
    1428     1346476 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1429             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1430             : !$OMP             PRIVATE(nr), &
    1431     1346476 : !$OMP             SHARED(np,nray,nx,rcount,rdispl,nm)
    1432             :       DO ip = 0, np - 1
    1433             :          nr = nray(ip)
    1434             :          rcount(ip) = nr*nx
    1435             :          rdispl(ip) = nm*nx*ip
    1436             :       END DO
    1437             : !$OMP END PARALLEL DO
    1438     1346476 :       IF (alltoall_sgl) THEN
    1439         230 :          CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
    1440             :       ELSE
    1441     1346246 :          CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
    1442             :       END IF
    1443             : 
    1444     1346476 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1445             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1446             : !$OMP             PRIVATE(ixx,ir,iy,iz,ix) &
    1447     1346476 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
    1448             :       DO ip = 0, np - 1
    1449             :          DO ix = 1, nx
    1450             :             ixx = nray(ip)*(ix - 1)
    1451             :             IF (alltoall_sgl) THEN
    1452             :                DO ir = 1, nray(ip)
    1453             :                   iy = yzp(1, ir, ip)
    1454             :                   iz = yzp(2, ir, ip)
    1455             :                   tb(iy, iz, ix) = tt(ir + ixx, ip)
    1456             :                END DO
    1457             :             ELSE
    1458             :                DO ir = 1, nray(ip)
    1459             :                   iy = yzp(1, ir, ip)
    1460             :                   iz = yzp(2, ir, ip)
    1461             :                   tb(iy, iz, ix) = rr(ir + ixx, ip)
    1462             :                END DO
    1463             :             END IF
    1464             :          END DO
    1465             :       END DO
    1466             : !$OMP END PARALLEL DO
    1467             : 
    1468     1346476 :       CALL timestop(handle)
    1469             : 
    1470     1346476 :    END SUBROUTINE x_to_yz
    1471             : 
    1472             : ! **************************************************************************************************
    1473             : !> \brief ...
    1474             : !> \param tb ...
    1475             : !> \param group ...
    1476             : !> \param my_pos ...
    1477             : !> \param p2p ...
    1478             : !> \param yzp ...
    1479             : !> \param nray ...
    1480             : !> \param bo ...
    1481             : !> \param sb ...
    1482             : !> \param fft_scratch ...
    1483             : !> \par History
    1484             : !>      15. Feb. 2006 : single precision all_to_all
    1485             : !> \author JGH (14-Jan-2001)
    1486             : ! **************************************************************************************************
    1487     1316734 :    SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
    1488             : 
    1489             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1490             :          INTENT(IN)                                      :: tb
    1491             : 
    1492             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1493             :       INTEGER, INTENT(IN)                                :: my_pos
    1494             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1495             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1496             :          INTENT(IN)                                      :: yzp
    1497             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1498             :       INTEGER, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1499             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1500             :          INTENT(INOUT)                                   :: sb
    1501             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1502             : 
    1503             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_x'
    1504             : 
    1505             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1506     1316734 :          POINTER                                         :: rr
    1507             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1508     1316734 :          POINTER                                         :: ss, tt
    1509             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1510             :                                                             nm, np, nr, nx
    1511     1316734 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1512             : 
    1513     1316734 :       CALL timeset(routineN, handle)
    1514             : 
    1515     1316734 :       np = SIZE(p2p)
    1516     1316734 :       mpr = p2p(my_pos)
    1517     1316734 :       scount => fft_scratch%scount
    1518     1316734 :       rcount => fft_scratch%rcount
    1519     1316734 :       sdispl => fft_scratch%sdispl
    1520     1316734 :       rdispl => fft_scratch%rdispl
    1521             : 
    1522     1316734 :       IF (alltoall_sgl) THEN
    1523         238 :          ss => fft_scratch%ss
    1524         238 :          tt => fft_scratch%tt
    1525     3703835 :          ss = 0._sp
    1526     3609884 :          tt = 0._sp
    1527             :       ELSE
    1528     1316496 :          rr => fft_scratch%rr
    1529             :       END IF
    1530             : 
    1531     1316734 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1532             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1533             : !$OMP             PRIVATE(ip, ixx, ir, iy, iz, ix) &
    1534     1316734 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
    1535             :       DO ip = 0, np - 1
    1536             :          DO ix = 1, nx
    1537             :             ixx = nray(ip)*(ix - 1)
    1538             :             IF (alltoall_sgl) THEN
    1539             :                DO ir = 1, nray(ip)
    1540             :                   iy = yzp(1, ir, ip)
    1541             :                   iz = yzp(2, ir, ip)
    1542             :                   tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
    1543             :                END DO
    1544             :             ELSE
    1545             :                DO ir = 1, nray(ip)
    1546             :                   iy = yzp(1, ir, ip)
    1547             :                   iz = yzp(2, ir, ip)
    1548             :                   rr(ir + ixx, ip) = tb(iy, iz, ix)
    1549             :                END DO
    1550             :             END IF
    1551             :          END DO
    1552             :       END DO
    1553             : !$OMP END PARALLEL DO
    1554     3950202 :       nm = MAXVAL(nray(0:np - 1))
    1555     1316734 :       nr = nray(my_pos)
    1556             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1557             : !$OMP             PRIVATE(ix,nx), &
    1558     1316734 : !$OMP             SHARED(np,p2p,bo,rcount,rdispl,nr)
    1559             :       DO ip = 0, np - 1
    1560             :          ix = p2p(ip)
    1561             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1562             :          rcount(ip) = nr*nx
    1563             :          rdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1564             :       END DO
    1565             : !$OMP END PARALLEL DO
    1566     1316734 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1567             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1568             : !$OMP             PRIVATE(nr), &
    1569     1316734 : !$OMP             SHARED(np,nray,scount,sdispl,nx,nm)
    1570             :       DO ip = 0, np - 1
    1571             :          nr = nray(ip)
    1572             :          scount(ip) = nr*nx
    1573             :          sdispl(ip) = nm*nx*ip
    1574             :       END DO
    1575             : !$OMP END PARALLEL DO
    1576             : 
    1577     1316734 :       IF (alltoall_sgl) THEN
    1578         238 :          CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
    1579     3703835 :          sb = ss
    1580             :       ELSE
    1581     1316496 :          CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
    1582             :       END IF
    1583             : 
    1584     1316734 :       CALL timestop(handle)
    1585             : 
    1586     1316734 :    END SUBROUTINE yz_to_x
    1587             : 
    1588             : ! **************************************************************************************************
    1589             : !> \brief ...
    1590             : !> \param sb ...
    1591             : !> \param group ...
    1592             : !> \param dims ...
    1593             : !> \param my_pos ...
    1594             : !> \param p2p ...
    1595             : !> \param yzp ...
    1596             : !> \param nray ...
    1597             : !> \param bo ...
    1598             : !> \param tb ...
    1599             : !> \param fft_scratch ...
    1600             : !> \par History
    1601             : !>      15. Feb. 2006 : single precision all_to_all
    1602             : !> \author JGH (18-Jan-2001)
    1603             : ! **************************************************************************************************
    1604           0 :    SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1605             : 
    1606             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1607             :          INTENT(IN)                                      :: sb
    1608             : 
    1609             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1610             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1611             :       INTEGER, INTENT(IN)                                :: my_pos
    1612             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1613             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1614             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1615             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1616             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1617             :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1618             : 
    1619             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_xz'
    1620             : 
    1621           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1622           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1623             :       INTEGER                                            :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
    1624             :                                                             jj, jx, jy, jz, myx, myz, np, npx, &
    1625             :                                                             npz, nx, nz, rs_pos
    1626           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1627           0 :                                                                         xcor, zcor
    1628           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1629             : 
    1630           0 :       CALL timeset(routineN, handle)
    1631             : 
    1632           0 :       np = SIZE(p2p)
    1633             : 
    1634           0 :       rs_pos = p2p(my_pos)
    1635             : 
    1636           0 :       IF (alltoall_sgl) THEN
    1637           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1638           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1639             :       ELSE
    1640           0 :          yzbuf => fft_scratch%yzbuf
    1641           0 :          xzbuf => fft_scratch%xzbuf
    1642             :       END IF
    1643           0 :       npx = dims(1)
    1644           0 :       npz = dims(2)
    1645           0 :       pgrid => fft_scratch%pgrid
    1646           0 :       xcor => fft_scratch%xcor
    1647           0 :       zcor => fft_scratch%zcor
    1648           0 :       pzcoord => fft_scratch%pzcoord
    1649           0 :       scount => fft_scratch%scount
    1650           0 :       rcount => fft_scratch%rcount
    1651           0 :       sdispl => fft_scratch%sdispl
    1652           0 :       rdispl => fft_scratch%rdispl
    1653             : 
    1654           0 :       nx = SIZE(sb, 2)
    1655             : 
    1656             : ! If the send and recv counts are not already cached, then
    1657             : ! calculate and store them
    1658           0 :       IF (fft_scratch%in == 0) THEN
    1659             : 
    1660           0 :          scount = 0
    1661             : 
    1662           0 :          DO ix = 0, npx - 1
    1663           0 :             ip = pgrid(ix, 0)
    1664           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1665             :          END DO
    1666           0 :          DO iz = 0, npz - 1
    1667           0 :             ip = pgrid(0, iz)
    1668           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1669             :          END DO
    1670           0 :          DO jx = 1, nx
    1671           0 :             IF (alltoall_sgl) THEN
    1672           0 :                DO ir = 1, nray(my_pos)
    1673           0 :                   jy = yzp(1, ir, my_pos)
    1674           0 :                   jz = yzp(2, ir, my_pos)
    1675           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1676           0 :                   scount(ip) = scount(ip) + 1
    1677             :                END DO
    1678             :             ELSE
    1679           0 :                DO ir = 1, nray(my_pos)
    1680           0 :                   jy = yzp(1, ir, my_pos)
    1681           0 :                   jz = yzp(2, ir, my_pos)
    1682           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1683           0 :                   scount(ip) = scount(ip) + 1
    1684             :                END DO
    1685             :             END IF
    1686             :          END DO
    1687             : 
    1688           0 :          CALL group%alltoall(scount, rcount, 1)
    1689           0 :          fft_scratch%yzcount = scount
    1690           0 :          fft_scratch%xzcount = rcount
    1691             : 
    1692             :          ! Work out the correct displacements in the buffers
    1693           0 :          sdispl(0) = 0
    1694           0 :          rdispl(0) = 0
    1695           0 :          DO ip = 1, np - 1
    1696           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1697           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1698             :          END DO
    1699             : 
    1700           0 :          fft_scratch%yzdispl = sdispl
    1701           0 :          fft_scratch%xzdispl = rdispl
    1702             : 
    1703           0 :          icrs = 0
    1704           0 :          DO ip = 0, np - 1
    1705           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1706           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1707             :          END DO
    1708           0 :          CALL group%sum(icrs)
    1709           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1710             : 
    1711           0 :          fft_scratch%in = 1
    1712             :       ELSE
    1713           0 :          scount = fft_scratch%yzcount
    1714           0 :          rcount = fft_scratch%xzcount
    1715           0 :          sdispl = fft_scratch%yzdispl
    1716           0 :          rdispl = fft_scratch%xzdispl
    1717             :       END IF
    1718             : 
    1719             : ! Do the actual packing
    1720             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1721             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1722             : !$OMP             SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
    1723             : !$OMP             SHARED(yzbuf,sb,scount,sdispl,my_pos),&
    1724           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl)
    1725             :       DO ip = 0, np - 1
    1726             :          IF (scount(ip) == 0) CYCLE
    1727             :          ipl = p2p(ip)
    1728             :          jj = 0
    1729             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1730             :          DO ir = 1, nray(my_pos)
    1731             :             jz = yzp(2, ir, my_pos)
    1732             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1733             :                jj = jj + 1
    1734             :                jy = yzp(1, ir, my_pos)
    1735             :                IF (alltoall_sgl) THEN
    1736             :                   DO jx = 0, nx - 1
    1737             :                      yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
    1738             :                   END DO
    1739             :                ELSE
    1740             :                   DO jx = 0, nx - 1
    1741             :                      yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
    1742             :                   END DO
    1743             :                END IF
    1744             :             END IF
    1745             :          END DO
    1746             :       END DO
    1747             : !$OMP END PARALLEL DO
    1748             : 
    1749           0 :       IF (alltoall_sgl) THEN
    1750           0 :          CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
    1751             :       ELSE
    1752           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1753           0 :             CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
    1754             :          ELSE
    1755           0 :             CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
    1756             :          END IF
    1757             :       END IF
    1758             : 
    1759           0 :       myx = fft_scratch%sizes%r_pos(1)
    1760           0 :       myz = fft_scratch%sizes%r_pos(2)
    1761           0 :       nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
    1762             : 
    1763             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1764             : !$OMP             PRIVATE(ipr,jj,ir,jx,jy,jz),&
    1765             : !$OMP             SHARED(tb,np,p2p,bo,rs_pos,nray),&
    1766             : !$OMP             SHARED(yzp,alltoall_sgl,zcor,myz),&
    1767           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
    1768             :       DO ip = 0, np - 1
    1769             :          ipr = p2p(ip)
    1770             :          jj = 0
    1771             :          DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
    1772             :             DO ir = 1, nray(ip)
    1773             :                jz = yzp(2, ir, ip)
    1774             :                IF (alltoall_sgl) THEN
    1775             :                   IF (zcor(jz) == myz) THEN
    1776             :                      jj = jj + 1
    1777             :                      jy = yzp(1, ir, ip)
    1778             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1779             :                      tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
    1780             :                   END IF
    1781             :                ELSE
    1782             :                   IF (zcor(jz) == myz) THEN
    1783             :                      jj = jj + 1
    1784             :                      jy = yzp(1, ir, ip)
    1785             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1786             :                      tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
    1787             :                   END IF
    1788             :                END IF
    1789             :             END DO
    1790             :          END DO
    1791             :       END DO
    1792             : !$OMP END PARALLEL DO
    1793             : 
    1794           0 :       CALL timestop(handle)
    1795             : 
    1796           0 :    END SUBROUTINE yz_to_xz
    1797             : 
    1798             : ! **************************************************************************************************
    1799             : !> \brief ...
    1800             : !> \param sb ...
    1801             : !> \param group ...
    1802             : !> \param dims ...
    1803             : !> \param my_pos ...
    1804             : !> \param p2p ...
    1805             : !> \param yzp ...
    1806             : !> \param nray ...
    1807             : !> \param bo ...
    1808             : !> \param tb ...
    1809             : !> \param fft_scratch ...
    1810             : !> \par History
    1811             : !>      15. Feb. 2006 : single precision all_to_all
    1812             : !> \author JGH (19-Jan-2001)
    1813             : ! **************************************************************************************************
    1814           0 :    SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1815             : 
    1816             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1817             :          INTENT(IN)                                      :: sb
    1818             : 
    1819             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1820             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1821             :       INTEGER, INTENT(IN)                                :: my_pos
    1822             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1823             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1824             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1825             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1826             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1827             :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1828             : 
    1829             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xz_to_yz'
    1830             : 
    1831           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1832           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1833             :       INTEGER                                            :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
    1834             :                                                             jj, jx, jy, jz, mp, myx, myz, np, npx, &
    1835             :                                                             npz, nx, nz
    1836           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1837           0 :                                                                         xcor, zcor
    1838           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1839             : 
    1840           0 :       CALL timeset(routineN, handle)
    1841             : 
    1842           0 :       np = SIZE(p2p)
    1843             : 
    1844           0 :       IF (alltoall_sgl) THEN
    1845           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1846           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1847             :       ELSE
    1848           0 :          yzbuf => fft_scratch%yzbuf
    1849           0 :          xzbuf => fft_scratch%xzbuf
    1850             :       END IF
    1851           0 :       npx = dims(1)
    1852           0 :       npz = dims(2)
    1853           0 :       pgrid => fft_scratch%pgrid
    1854           0 :       xcor => fft_scratch%xcor
    1855           0 :       zcor => fft_scratch%zcor
    1856           0 :       pzcoord => fft_scratch%pzcoord
    1857           0 :       scount => fft_scratch%scount
    1858           0 :       rcount => fft_scratch%rcount
    1859           0 :       sdispl => fft_scratch%sdispl
    1860           0 :       rdispl => fft_scratch%rdispl
    1861             : 
    1862             : ! If the send and recv counts are not already cached, then
    1863             : ! calculate and store them
    1864           0 :       IF (fft_scratch%in == 0) THEN
    1865             : 
    1866           0 :          rcount = 0
    1867           0 :          nx = MAXVAL(bo(2, 1, :))
    1868             : 
    1869           0 :          DO ix = 0, npx - 1
    1870           0 :             ip = pgrid(ix, 0)
    1871           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1872             :          END DO
    1873           0 :          DO iz = 0, npz - 1
    1874           0 :             ip = pgrid(0, iz)
    1875           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1876             :          END DO
    1877           0 :          DO jx = 1, nx
    1878           0 :             DO ir = 1, nray(my_pos)
    1879           0 :                jy = yzp(1, ir, my_pos)
    1880           0 :                jz = yzp(2, ir, my_pos)
    1881           0 :                ip = pgrid(xcor(jx), zcor(jz))
    1882           0 :                rcount(ip) = rcount(ip) + 1
    1883             :             END DO
    1884             :          END DO
    1885             : 
    1886           0 :          CALL group%alltoall(rcount, scount, 1)
    1887           0 :          fft_scratch%xzcount = scount
    1888           0 :          fft_scratch%yzcount = rcount
    1889             : 
    1890             :          ! Work out the correct displacements in the buffers
    1891           0 :          sdispl(0) = 0
    1892           0 :          rdispl(0) = 0
    1893           0 :          DO ip = 1, np - 1
    1894           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1895           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1896             :          END DO
    1897             : 
    1898           0 :          fft_scratch%xzdispl = sdispl
    1899           0 :          fft_scratch%yzdispl = rdispl
    1900             : 
    1901           0 :          icrs = 0
    1902           0 :          DO ip = 0, np - 1
    1903           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1904           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1905             :          END DO
    1906           0 :          CALL group%sum(icrs)
    1907           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1908             : 
    1909           0 :          fft_scratch%in = 1
    1910             :       ELSE
    1911           0 :          scount = fft_scratch%xzcount
    1912           0 :          rcount = fft_scratch%yzcount
    1913           0 :          sdispl = fft_scratch%xzdispl
    1914           0 :          rdispl = fft_scratch%yzdispl
    1915             :       END IF
    1916             : 
    1917             : ! Now do the actual packing
    1918           0 :       myx = fft_scratch%sizes%r_pos(1)
    1919           0 :       myz = fft_scratch%sizes%r_pos(2)
    1920           0 :       mp = p2p(my_pos)
    1921           0 :       nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
    1922           0 :       nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
    1923             : 
    1924             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1925             : !$OMP             PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
    1926             : !$OMP             SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
    1927             : !$OMP             SHARED(alltoall_sgl,nx,scount,sdispl),&
    1928           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,sb,nz)
    1929             :       DO ip = 0, np - 1
    1930             :          jj = 0
    1931             :          ipl = p2p(ip)
    1932             :          DO ir = 1, nray(ip)
    1933             :             jz = yzp(2, ir, ip)
    1934             :             IF (zcor(jz) == myz) THEN
    1935             :                jj = jj + 1
    1936             :                jy = yzp(1, ir, ip)
    1937             :                jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
    1938             :                IF (alltoall_sgl) THEN
    1939             :                   DO jx = 0, nx - 1
    1940             :                      ixx = jj + jx*scount(ipl)/nx
    1941             :                      xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
    1942             :                   END DO
    1943             :                ELSE
    1944             :                   DO jx = 0, nx - 1
    1945             :                      ixx = jj + jx*scount(ipl)/nx
    1946             :                      xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
    1947             :                   END DO
    1948             :                END IF
    1949             :             END IF
    1950             :          END DO
    1951             :       END DO
    1952             : !$OMP END PARALLEL DO
    1953             : 
    1954           0 :       IF (alltoall_sgl) THEN
    1955           0 :          CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
    1956             :       ELSE
    1957           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1958           0 :             CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
    1959             :          ELSE
    1960           0 :             CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
    1961             :          END IF
    1962             :       END IF
    1963             : 
    1964             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1965             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1966             : !$OMP             SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
    1967             : !$OMP             SHARED(rcount,rdispl,tb,yzbuf,zcor),&
    1968           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl,np)
    1969             :       DO ip = 0, np - 1
    1970             :          IF (rcount(ip) == 0) CYCLE
    1971             :          ipl = p2p(ip)
    1972             :          jj = 0
    1973             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1974             :          DO ir = 1, nray(my_pos)
    1975             :             jz = yzp(2, ir, my_pos)
    1976             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1977             :                jj = jj + 1
    1978             :                jy = yzp(1, ir, my_pos)
    1979             :                IF (alltoall_sgl) THEN
    1980             :                   DO jx = 0, nx - 1
    1981             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1982             :                   END DO
    1983             :                ELSE
    1984             :                   DO jx = 0, nx - 1
    1985             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1986             :                   END DO
    1987             :                END IF
    1988             :             END IF
    1989             :          END DO
    1990             :       END DO
    1991             : !$OMP END PARALLEL DO
    1992             : 
    1993           0 :       CALL timestop(handle)
    1994             : 
    1995           0 :    END SUBROUTINE xz_to_yz
    1996             : 
    1997             : ! **************************************************************************************************
    1998             : !> \brief ...
    1999             : !> \param cin ...
    2000             : !> \param boin ...
    2001             : !> \param boout ...
    2002             : !> \param sout ...
    2003             : !> \param fft_scratch ...
    2004             : !> \par History
    2005             : !>      none
    2006             : !> \author JGH (20-Jan-2001)
    2007             : ! **************************************************************************************************
    2008           0 :    SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
    2009             : 
    2010             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2011             :          INTENT(IN)                                      :: cin
    2012             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2013             :          INTENT(IN)                                      :: boin, boout
    2014             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2015             :          INTENT(OUT)                                     :: sout
    2016             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2017             : 
    2018             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_1'
    2019             : 
    2020             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2021           0 :          POINTER                                         :: rbuf
    2022             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixy, iz, mip, &
    2023             :                                                             mz, np, nx, ny, nz
    2024           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2025           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2026             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2027             : 
    2028           0 :       CALL timeset(routineN, handle)
    2029             : 
    2030           0 :       mip = fft_scratch%mip
    2031           0 :       dim = fft_scratch%dim
    2032           0 :       pos = fft_scratch%pos
    2033           0 :       scount => fft_scratch%scount
    2034           0 :       rcount => fft_scratch%rcount
    2035           0 :       sdispl => fft_scratch%sdispl
    2036           0 :       rdispl => fft_scratch%rdispl
    2037           0 :       pgrid => fft_scratch%pgcube
    2038           0 :       np = DIM(2)
    2039             : 
    2040           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2041           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2042             : 
    2043             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2044             : !$OMP             PRIVATE(ipl,ny), &
    2045           0 : !$OMP             SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
    2046             :       DO ip = 0, np - 1
    2047             :          ipl = pgrid(ip, 2)
    2048             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2049             :          scount(ip) = nx*nz*ny
    2050             :          sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
    2051             :       END DO
    2052             : !$OMP END PARALLEL DO
    2053           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2054           0 :       mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
    2055             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2056             : !$OMP             PRIVATE(ipl,nz), &
    2057           0 : !$OMP             SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
    2058             :       DO ip = 0, np - 1
    2059             :          ipl = pgrid(ip, 2)
    2060             :          nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2061             :          rcount(ip) = nx*nz*ny
    2062             :          rdispl(ip) = nx*ny*mz*ip
    2063             :       END DO
    2064             : !$OMP END PARALLEL DO
    2065             : 
    2066           0 :       rbuf => fft_scratch%rbuf1
    2067             : 
    2068           0 :       CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2069             : 
    2070             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2071             : !$OMP             PRIVATE(ip,ipl,nz,iz,is,ir) &
    2072           0 : !$OMP             SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
    2073             :       DO ixy = 1, nx*ny
    2074             :          DO ip = 0, np - 1
    2075             :             ipl = pgrid(ip, 2)
    2076             :             nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2077             :             DO iz = 1, nz
    2078             :                is = boin(1, 3, ipl) + iz - 1
    2079             :                ir = iz + nz*(ixy - 1)
    2080             :                sout(is, ixy) = rbuf(ir, ip)
    2081             :             END DO
    2082             :          END DO
    2083             :       END DO
    2084             : !$OMP END PARALLEL DO
    2085             : 
    2086           0 :       CALL timestop(handle)
    2087             : 
    2088           0 :    END SUBROUTINE cube_transpose_1
    2089             : 
    2090             : ! **************************************************************************************************
    2091             : !> \brief ...
    2092             : !> \param cin ...
    2093             : !> \param boin ...
    2094             : !> \param boout ...
    2095             : !> \param sout ...
    2096             : !> \param fft_scratch ...
    2097             : ! **************************************************************************************************
    2098           0 :    SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
    2099             : 
    2100             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2101             :          INTENT(IN)                                      :: cin
    2102             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2103             :          INTENT(IN)                                      :: boin, boout
    2104             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2105             :          INTENT(OUT)                                     :: sout
    2106             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2107             : 
    2108             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_2'
    2109             : 
    2110             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2111           0 :          POINTER                                         :: rbuf
    2112             :       INTEGER                                            :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
    2113             :                                                             np, nx, ny, nz
    2114           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2115           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2116             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2117             :       TYPE(mp_comm_type)                                 :: sub_group
    2118             : 
    2119           0 :       CALL timeset(routineN, handle)
    2120             : 
    2121           0 :       sub_group = fft_scratch%cart_sub_comm(2)
    2122           0 :       mip = fft_scratch%mip
    2123           0 :       dim = fft_scratch%dim
    2124           0 :       pos = fft_scratch%pos
    2125           0 :       scount => fft_scratch%scount
    2126           0 :       rcount => fft_scratch%rcount
    2127           0 :       sdispl => fft_scratch%sdispl
    2128           0 :       rdispl => fft_scratch%rdispl
    2129           0 :       pgrid => fft_scratch%pgcube
    2130           0 :       np = DIM(2)
    2131             : 
    2132           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2133           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2134           0 :       mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
    2135             : 
    2136           0 :       rbuf => fft_scratch%rbuf2
    2137             : 
    2138             : !$OMP PARALLEL DEFAULT(NONE), &
    2139             : !$OMP          PRIVATE(ip,ipl,nz,iz,ir), &
    2140           0 : !$OMP          SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
    2141             : !$OMP DO COLLAPSE(2)
    2142             :       DO ixy = 1, nx*ny
    2143             :          DO ip = 0, np - 1
    2144             :             ipl = pgrid(ip, 2)
    2145             :             nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2146             :             DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
    2147             :                ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
    2148             :                rbuf(ir, ip) = cin(iz, ixy)
    2149             :             END DO
    2150             :          END DO
    2151             :       END DO
    2152             : !$OMP END DO
    2153             : !$OMP DO
    2154             :       DO ip = 0, np - 1
    2155             :          ipl = pgrid(ip, 2)
    2156             :          nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2157             :          scount(ip) = nx*ny*nz
    2158             :          sdispl(ip) = nx*ny*mz*ip
    2159             :       END DO
    2160             : !$OMP END DO
    2161             : !$OMP END PARALLEL
    2162           0 :       nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
    2163             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2164             : !$OMP             PRIVATE(ipl,ny), &
    2165           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
    2166             :       DO ip = 0, np - 1
    2167             :          ipl = pgrid(ip, 2)
    2168             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2169             :          rcount(ip) = nx*ny*nz
    2170             :          rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
    2171             :       END DO
    2172             : !$OMP END PARALLEL DO
    2173             : 
    2174           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2175             : 
    2176           0 :       CALL timestop(handle)
    2177             : 
    2178           0 :    END SUBROUTINE cube_transpose_2
    2179             : 
    2180             : ! **************************************************************************************************
    2181             : !> \brief ...
    2182             : !> \param cin ...
    2183             : !> \param boin ...
    2184             : !> \param boout ...
    2185             : !> \param sout ...
    2186             : !> \param fft_scratch ...
    2187             : ! **************************************************************************************************
    2188           0 :    SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
    2189             : 
    2190             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2191             :          INTENT(IN)                                      :: cin
    2192             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2193             :          INTENT(IN)                                      :: boin, boout
    2194             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2195             :          INTENT(OUT)                                     :: sout
    2196             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2197             : 
    2198             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_3'
    2199             : 
    2200             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2201           0 :          POINTER                                         :: rbuf
    2202             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixz, iy, lb, &
    2203             :                                                             mip, my, my_id, np, num_threads, nx, &
    2204             :                                                             ny, nz, ub
    2205           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2206           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2207             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2208             :       TYPE(mp_comm_type)                                 :: sub_group
    2209             : 
    2210           0 :       CALL timeset(routineN, handle)
    2211             : 
    2212           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2213           0 :       mip = fft_scratch%mip
    2214           0 :       dim = fft_scratch%dim
    2215           0 :       pos = fft_scratch%pos
    2216           0 :       np = DIM(1)
    2217           0 :       scount => fft_scratch%scount
    2218           0 :       rcount => fft_scratch%rcount
    2219           0 :       sdispl => fft_scratch%sdispl
    2220           0 :       rdispl => fft_scratch%rdispl
    2221           0 :       pgrid => fft_scratch%pgcube
    2222             : 
    2223           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2224           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2225             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2226             : !$OMP             PRIVATE(ipl, nx), &
    2227           0 : !$OMP             SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
    2228             :       DO ip = 0, np - 1
    2229             :          ipl = pgrid(ip, 1)
    2230             :          nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
    2231             :          scount(ip) = nx*nz*ny
    2232             :          sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
    2233             :       END DO
    2234             : !$OMP END PARALLEL DO
    2235           0 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2236           0 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2237             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2238             : !$OMP             PRIVATE(ipl, ny), &
    2239           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
    2240             :       DO ip = 0, np - 1
    2241             :          ipl = pgrid(ip, 1)
    2242             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2243             :          rcount(ip) = nx*nz*ny
    2244             :          rdispl(ip) = nx*my*nz*ip
    2245             :       END DO
    2246             : !$OMP END PARALLEL DO
    2247             : 
    2248           0 :       rbuf => fft_scratch%rbuf3
    2249           0 :       num_threads = 1
    2250           0 :       my_id = 0
    2251             : !$OMP PARALLEL DEFAULT(NONE), &
    2252             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub) &
    2253           0 : !$OMP          SHARED(rbuf)
    2254             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2255             : !$    my_id = omp_get_thread_num()
    2256             :       IF (my_id < num_threads) THEN
    2257             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2258             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2259             :          rbuf(:, lb:ub) = 0.0_dp
    2260             :       END IF
    2261             : !$OMP END PARALLEL
    2262             : 
    2263           0 :       CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2264             : 
    2265             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2266             : !$OMP             PRIVATE(ip,ipl,ny,iy,is,ir) &
    2267           0 : !$OMP             SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
    2268             :       DO ixz = 1, nx*nz
    2269             :          DO ip = 0, np - 1
    2270             :             ipl = pgrid(ip, 1)
    2271             :             ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2272             :             DO iy = 1, ny
    2273             :                is = boin(1, 2, ipl) + iy - 1
    2274             :                ir = iy + ny*(ixz - 1)
    2275             :                sout(is, ixz) = rbuf(ir, ip)
    2276             :             END DO
    2277             :          END DO
    2278             :       END DO
    2279             : !$OMP END PARALLEL DO
    2280             : 
    2281           0 :       CALL timestop(handle)
    2282             : 
    2283           0 :    END SUBROUTINE cube_transpose_3
    2284             : 
    2285             : ! **************************************************************************************************
    2286             : !> \brief ...
    2287             : !> \param cin ...
    2288             : !> \param boin ...
    2289             : !> \param boout ...
    2290             : !> \param sout ...
    2291             : !> \param fft_scratch ...
    2292             : ! **************************************************************************************************
    2293           0 :    SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
    2294             : 
    2295             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2296             :          INTENT(IN)                                      :: cin
    2297             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2298             :          INTENT(IN)                                      :: boin, boout
    2299             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2300             :          INTENT(OUT)                                     :: sout
    2301             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2302             : 
    2303             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_4'
    2304             : 
    2305             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2306           0 :          POINTER                                         :: rbuf
    2307             :       INTEGER                                            :: handle, ip, ipl, ir, iy, izx, lb, mip, &
    2308             :                                                             my, my_id, np, num_threads, nx, ny, &
    2309             :                                                             nz, ub
    2310           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2311           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2312             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2313             :       TYPE(mp_comm_type)                                 :: sub_group
    2314             : 
    2315           0 :       CALL timeset(routineN, handle)
    2316             : 
    2317           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2318           0 :       mip = fft_scratch%mip
    2319           0 :       dim = fft_scratch%dim
    2320           0 :       pos = fft_scratch%pos
    2321           0 :       np = DIM(1)
    2322           0 :       scount => fft_scratch%scount
    2323           0 :       rcount => fft_scratch%rcount
    2324           0 :       sdispl => fft_scratch%sdispl
    2325           0 :       rdispl => fft_scratch%rdispl
    2326           0 :       pgrid => fft_scratch%pgcube
    2327             : 
    2328           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2329           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2330           0 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2331             : 
    2332           0 :       rbuf => fft_scratch%rbuf4
    2333           0 :       num_threads = 1
    2334           0 :       my_id = 0
    2335             : !$OMP PARALLEL DEFAULT(NONE), &
    2336             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
    2337           0 : !$OMP          SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
    2338             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2339             : !$    my_id = omp_get_thread_num()
    2340             :       IF (my_id < num_threads) THEN
    2341             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2342             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2343             :          rbuf(:, lb:ub) = 0.0_dp
    2344             :       END IF
    2345             : !$OMP BARRIER
    2346             : 
    2347             : !$OMP DO COLLAPSE(2)
    2348             :       DO izx = 1, nz*nx
    2349             :          DO ip = 0, np - 1
    2350             :             ipl = pgrid(ip, 1)
    2351             :             ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2352             :             DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
    2353             :                ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
    2354             :                rbuf(ir, ip) = cin(iy, izx)
    2355             :             END DO
    2356             :          END DO
    2357             :       END DO
    2358             : !$OMP END DO
    2359             : !$OMP DO
    2360             :       DO ip = 0, np - 1
    2361             :          ipl = pgrid(ip, 1)
    2362             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2363             :          scount(ip) = nx*ny*nz
    2364             :          sdispl(ip) = nx*nz*my*ip
    2365             :       END DO
    2366             : !$OMP END DO
    2367             : !$OMP END PARALLEL
    2368           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2369             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2370             : !$OMP             PRIVATE(ipl,nx), &
    2371           0 : !$OMP             SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
    2372             :       DO ip = 0, np - 1
    2373             :          ipl = pgrid(ip, 1)
    2374             :          nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
    2375             :          rcount(ip) = nx*ny*nz
    2376             :          rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
    2377             :       END DO
    2378             : !$OMP END PARALLEL DO
    2379             : 
    2380           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2381             : 
    2382           0 :       CALL timestop(handle)
    2383             : 
    2384           0 :    END SUBROUTINE cube_transpose_4
    2385             : 
    2386             : ! **************************************************************************************************
    2387             : !> \brief ...
    2388             : !> \param cin ...
    2389             : !> \param group ...
    2390             : !> \param boin ...
    2391             : !> \param boout ...
    2392             : !> \param sout ...
    2393             : !> \param fft_scratch ...
    2394             : ! **************************************************************************************************
    2395         104 :    SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
    2396             : 
    2397             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2398             :          INTENT(IN)                                      :: cin
    2399             : 
    2400             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    2401             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2402             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2403             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2404             : 
    2405             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_5'
    2406             : 
    2407         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2408             :       INTEGER                                            :: handle, ip, ir, is, ixz, iy, lb, mip, &
    2409             :                                                             my, my_id, np, num_threads, nx, ny, &
    2410             :                                                             nz, ub
    2411         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2412             : 
    2413         104 :       CALL timeset(routineN, handle)
    2414             : 
    2415         104 :       np = fft_scratch%sizes%numtask
    2416         104 :       mip = fft_scratch%mip
    2417         104 :       scount => fft_scratch%scount
    2418         104 :       rcount => fft_scratch%rcount
    2419         104 :       sdispl => fft_scratch%sdispl
    2420         104 :       rdispl => fft_scratch%rdispl
    2421             : 
    2422         104 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2423         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2424             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2425             : !$OMP             PRIVATE(nx), &
    2426         104 : !$OMP             SHARED(np,boout,ny,nz,scount,sdispl)
    2427             :       DO ip = 0, np - 1
    2428             :          nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
    2429             :          scount(ip) = nx*nz*ny
    2430             :          sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
    2431             :       END DO
    2432             : !$OMP END PARALLEL DO
    2433         104 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2434         312 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2435             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2436             : !$OMP             PRIVATE(ny), &
    2437         104 : !$OMP             SHARED(np,boin,nx,nz,rcount,rdispl,my)
    2438             :       DO ip = 0, np - 1
    2439             :          ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2440             :          rcount(ip) = nx*nz*ny
    2441             :          rdispl(ip) = nx*my*nz*ip
    2442             :       END DO
    2443             : !$OMP END PARALLEL DO
    2444             : 
    2445         104 :       rbuf => fft_scratch%rbuf5
    2446         104 :       num_threads = 1
    2447         104 :       my_id = 0
    2448             : !$OMP PARALLEL DEFAULT(NONE), &
    2449             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub), &
    2450         104 : !$OMP          SHARED(rbuf)
    2451             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2452             : !$    my_id = omp_get_thread_num()
    2453             :       IF (my_id < num_threads) THEN
    2454             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2455             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2456             :          rbuf(:, lb:ub) = 0.0_dp
    2457             :       END IF
    2458             : !$OMP END PARALLEL
    2459             : 
    2460         104 :       CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2461             : 
    2462             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2463             : !$OMP             PRIVATE(ip,ny,iy,is,ir) &
    2464         104 : !$OMP             SHARED(nx,nz,np,boin,sout,rbuf)
    2465             :       DO ixz = 1, nx*nz
    2466             :          DO ip = 0, np - 1
    2467             :             ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2468             :             DO iy = 1, ny
    2469             :                is = boin(1, 2, ip) + iy - 1
    2470             :                ir = iy + ny*(ixz - 1)
    2471             :                sout(is, ixz) = rbuf(ir, ip)
    2472             :             END DO
    2473             :          END DO
    2474             :       END DO
    2475             : !$OMP END PARALLEL DO
    2476             : 
    2477         104 :       CALL timestop(handle)
    2478             : 
    2479         104 :    END SUBROUTINE cube_transpose_5
    2480             : 
    2481             : ! **************************************************************************************************
    2482             : !> \brief ...
    2483             : !> \param cin ...
    2484             : !> \param group ...
    2485             : !> \param boin ...
    2486             : !> \param boout ...
    2487             : !> \param sout ...
    2488             : !> \param fft_scratch ...
    2489             : ! **************************************************************************************************
    2490         104 :    SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
    2491             : 
    2492             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2493             :          INTENT(IN)                                      :: cin
    2494             : 
    2495             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    2496             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2497             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2498             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2499             : 
    2500             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_6'
    2501             : 
    2502         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2503             :       INTEGER                                            :: handle, ip, ir, iy, izx, lb, mip, my, &
    2504             :                                                             my_id, np, num_threads, nx, ny, nz, ub
    2505         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2506             : 
    2507         104 :       CALL timeset(routineN, handle)
    2508             : 
    2509         104 :       np = fft_scratch%sizes%numtask
    2510         104 :       mip = fft_scratch%mip
    2511         104 :       scount => fft_scratch%scount
    2512         104 :       rcount => fft_scratch%rcount
    2513         104 :       sdispl => fft_scratch%sdispl
    2514         104 :       rdispl => fft_scratch%rdispl
    2515             : 
    2516         104 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2517         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2518         312 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2519             : 
    2520         104 :       rbuf => fft_scratch%rbuf5
    2521         104 :       num_threads = 1
    2522         104 :       my_id = 0
    2523             : !$OMP PARALLEL DEFAULT(NONE), &
    2524             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
    2525         104 : !$OMP          SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
    2526             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2527             : !$    my_id = omp_get_thread_num()
    2528             :       IF (my_id < num_threads) THEN
    2529             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2530             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2531             :          rbuf(:, lb:ub) = 0.0_dp
    2532             :       END IF
    2533             : !$OMP BARRIER
    2534             : 
    2535             : !$OMP DO COLLAPSE(2)
    2536             :       DO izx = 1, nz*nx
    2537             :          DO ip = 0, np - 1
    2538             :             ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2539             :             DO iy = boout(1, 2, ip), boout(2, 2, ip)
    2540             :                ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
    2541             :                rbuf(ir, ip) = cin(iy, izx)
    2542             :             END DO
    2543             :          END DO
    2544             :       END DO
    2545             : !$OMP END DO
    2546             : !$OMP DO
    2547             :       DO ip = 0, np - 1
    2548             :          ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2549             :          scount(ip) = nx*ny*nz
    2550             :          sdispl(ip) = nx*nz*my*ip
    2551             :       END DO
    2552             : !$OMP END DO
    2553             : !$OMP END PARALLEL
    2554         104 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2555             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2556             : !$OMP             PRIVATE(nx), &
    2557         104 : !$OMP             SHARED(np,boin,rcount,rdispl,nz,ny)
    2558             :       DO ip = 0, np - 1
    2559             :          nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
    2560             :          rcount(ip) = nx*ny*nz
    2561             :          rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
    2562             :       END DO
    2563             : !$OMP END PARALLEL DO
    2564             : 
    2565         104 :       CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2566             : 
    2567         104 :       CALL timestop(handle)
    2568             : 
    2569         104 :    END SUBROUTINE cube_transpose_6
    2570             : 
    2571             : ! **************************************************************************************************
    2572             : !> \brief ...
    2573             : ! **************************************************************************************************
    2574       17893 :    SUBROUTINE init_fft_scratch_pool()
    2575             : 
    2576       17893 :       CALL release_fft_scratch_pool()
    2577             : 
    2578             :       ! Allocate first scratch and mark it as used
    2579       17893 :       ALLOCATE (fft_scratch_first)
    2580      518897 :       ALLOCATE (fft_scratch_first%fft_scratch)
    2581             :       ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
    2582       17893 :       fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
    2583             : 
    2584       17893 :       init_fft_pool = init_fft_pool + 1
    2585             : 
    2586       17893 :    END SUBROUTINE init_fft_scratch_pool
    2587             : 
    2588             : ! **************************************************************************************************
    2589             : !> \brief ...
    2590             : !> \param fft_scratch ...
    2591             : ! **************************************************************************************************
    2592       57674 :    SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
    2593             :       TYPE(fft_scratch_type), INTENT(INOUT)    :: fft_scratch
    2594             : 
    2595             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2596             :       INTEGER                   :: ierr
    2597             :       COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
    2598             : #endif
    2599             : 
    2600             :       ! deallocate structures
    2601       57674 :       IF (ASSOCIATED(fft_scratch%ziptr)) THEN
    2602       13243 :          CALL fft_dealloc(fft_scratch%ziptr)
    2603             :       END IF
    2604       57674 :       IF (ASSOCIATED(fft_scratch%zoptr)) THEN
    2605       13243 :          CALL fft_dealloc(fft_scratch%zoptr)
    2606             :       END IF
    2607       57674 :       IF (ASSOCIATED(fft_scratch%p1buf)) THEN
    2608           0 :          CALL fft_dealloc(fft_scratch%p1buf)
    2609             :       END IF
    2610       57674 :       IF (ASSOCIATED(fft_scratch%p2buf)) THEN
    2611             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2612             :          dummy_ptr_z => fft_scratch%p2buf(1, 1)
    2613             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2614             : #else
    2615           0 :          CALL fft_dealloc(fft_scratch%p2buf)
    2616             : #endif
    2617             :       END IF
    2618       57674 :       IF (ASSOCIATED(fft_scratch%p3buf)) THEN
    2619             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2620             :          dummy_ptr_z => fft_scratch%p3buf(1, 1)
    2621             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2622             : #else
    2623           0 :          CALL fft_dealloc(fft_scratch%p3buf)
    2624             : #endif
    2625             :       END IF
    2626       57674 :       IF (ASSOCIATED(fft_scratch%p4buf)) THEN
    2627             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2628             :          dummy_ptr_z => fft_scratch%p4buf(1, 1)
    2629             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2630             : #else
    2631           0 :          CALL fft_dealloc(fft_scratch%p4buf)
    2632             : #endif
    2633             :       END IF
    2634       57674 :       IF (ASSOCIATED(fft_scratch%p5buf)) THEN
    2635             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2636             :          dummy_ptr_z => fft_scratch%p5buf(1, 1)
    2637             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2638             : #else
    2639           0 :          CALL fft_dealloc(fft_scratch%p5buf)
    2640             : #endif
    2641             :       END IF
    2642       57674 :       IF (ASSOCIATED(fft_scratch%p6buf)) THEN
    2643           0 :          CALL fft_dealloc(fft_scratch%p6buf)
    2644             :       END IF
    2645       57674 :       IF (ASSOCIATED(fft_scratch%p7buf)) THEN
    2646             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2647             :          dummy_ptr_z => fft_scratch%p7buf(1, 1)
    2648             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2649             : #else
    2650           0 :          CALL fft_dealloc(fft_scratch%p7buf)
    2651             : #endif
    2652             :       END IF
    2653       57674 :       IF (ASSOCIATED(fft_scratch%r1buf)) THEN
    2654             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2655             :          dummy_ptr_z => fft_scratch%r1buf(1, 1)
    2656             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2657             : #else
    2658       26530 :          CALL fft_dealloc(fft_scratch%r1buf)
    2659             : #endif
    2660             :       END IF
    2661       57674 :       IF (ASSOCIATED(fft_scratch%r2buf)) THEN
    2662       26530 :          CALL fft_dealloc(fft_scratch%r2buf)
    2663             :       END IF
    2664       57674 :       IF (ASSOCIATED(fft_scratch%tbuf)) THEN
    2665             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2666             :          dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
    2667             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2668             : #else
    2669       26530 :          CALL fft_dealloc(fft_scratch%tbuf)
    2670             : #endif
    2671             :       END IF
    2672       57674 :       IF (ASSOCIATED(fft_scratch%a1buf)) THEN
    2673           8 :          CALL fft_dealloc(fft_scratch%a1buf)
    2674             :       END IF
    2675       57674 :       IF (ASSOCIATED(fft_scratch%a2buf)) THEN
    2676           8 :          CALL fft_dealloc(fft_scratch%a2buf)
    2677             :       END IF
    2678       57674 :       IF (ASSOCIATED(fft_scratch%a3buf)) THEN
    2679           8 :          CALL fft_dealloc(fft_scratch%a3buf)
    2680             :       END IF
    2681       57674 :       IF (ASSOCIATED(fft_scratch%a4buf)) THEN
    2682           8 :          CALL fft_dealloc(fft_scratch%a4buf)
    2683             :       END IF
    2684       57674 :       IF (ASSOCIATED(fft_scratch%a5buf)) THEN
    2685           8 :          CALL fft_dealloc(fft_scratch%a5buf)
    2686             :       END IF
    2687       57674 :       IF (ASSOCIATED(fft_scratch%a6buf)) THEN
    2688           8 :          CALL fft_dealloc(fft_scratch%a6buf)
    2689             :       END IF
    2690       57674 :       IF (ASSOCIATED(fft_scratch%scount)) THEN
    2691           0 :          DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
    2692       26538 :                      fft_scratch%sdispl, fft_scratch%rdispl)
    2693             :       END IF
    2694       57674 :       IF (ASSOCIATED(fft_scratch%rr)) THEN
    2695       26522 :          DEALLOCATE (fft_scratch%rr)
    2696             :       END IF
    2697       57674 :       IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
    2698           0 :          DEALLOCATE (fft_scratch%xzbuf)
    2699             :       END IF
    2700       57674 :       IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
    2701           0 :          DEALLOCATE (fft_scratch%yzbuf)
    2702             :       END IF
    2703       57674 :       IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
    2704           0 :          DEALLOCATE (fft_scratch%xzbuf_sgl)
    2705             :       END IF
    2706       57674 :       IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
    2707           0 :          DEALLOCATE (fft_scratch%yzbuf_sgl)
    2708             :       END IF
    2709       57674 :       IF (ASSOCIATED(fft_scratch%ss)) THEN
    2710           8 :          DEALLOCATE (fft_scratch%ss)
    2711             :       END IF
    2712       57674 :       IF (ASSOCIATED(fft_scratch%tt)) THEN
    2713           8 :          DEALLOCATE (fft_scratch%tt)
    2714             :       END IF
    2715       57674 :       IF (ASSOCIATED(fft_scratch%pgrid)) THEN
    2716           0 :          DEALLOCATE (fft_scratch%pgrid)
    2717             :       END IF
    2718       57674 :       IF (ASSOCIATED(fft_scratch%pgcube)) THEN
    2719       26538 :          DEALLOCATE (fft_scratch%pgcube)
    2720             :       END IF
    2721       57674 :       IF (ASSOCIATED(fft_scratch%xcor)) THEN
    2722           0 :          DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
    2723             :       END IF
    2724       57674 :       IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
    2725           0 :          DEALLOCATE (fft_scratch%pzcoord)
    2726             :       END IF
    2727       57674 :       IF (ASSOCIATED(fft_scratch%xzcount)) THEN
    2728           0 :          DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
    2729           0 :          DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
    2730           0 :          fft_scratch%in = 0
    2731           0 :          fft_scratch%rsratio = 1._dp
    2732             :       END IF
    2733       57674 :       IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
    2734           0 :          DEALLOCATE (fft_scratch%rbuf1)
    2735             :       END IF
    2736       57674 :       IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
    2737           0 :          DEALLOCATE (fft_scratch%rbuf2)
    2738             :       END IF
    2739       57674 :       IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
    2740           0 :          DEALLOCATE (fft_scratch%rbuf3)
    2741             :       END IF
    2742       57674 :       IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
    2743           0 :          DEALLOCATE (fft_scratch%rbuf4)
    2744             :       END IF
    2745       57674 :       IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
    2746           8 :          DEALLOCATE (fft_scratch%rbuf5)
    2747             :       END IF
    2748       57674 :       IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
    2749           8 :          DEALLOCATE (fft_scratch%rbuf6)
    2750             :       END IF
    2751             : 
    2752       57674 :       IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
    2753           0 :          CALL fft_scratch%cart_sub_comm(1)%free()
    2754             :       END IF
    2755       57674 :       IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
    2756           0 :          CALL fft_scratch%cart_sub_comm(2)%free()
    2757             :       END IF
    2758             : 
    2759       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(1))
    2760       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(2))
    2761       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(3))
    2762       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(4))
    2763       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(5))
    2764       57674 :       CALL fft_destroy_plan(fft_scratch%fft_plan(6))
    2765             : 
    2766       57674 :    END SUBROUTINE deallocate_fft_scratch_type
    2767             : 
    2768             : ! **************************************************************************************************
    2769             : !> \brief ...
    2770             : ! **************************************************************************************************
    2771       26826 :    SUBROUTINE release_fft_scratch_pool()
    2772             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch, fft_scratch_current
    2773             : 
    2774       26826 : !$    CPASSERT(.NOT. omp_in_parallel() .OR. 0 == omp_get_thread_num())
    2775             : 
    2776       26826 :       IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
    2777             : 
    2778       26826 :       fft_scratch => fft_scratch_first
    2779       57540 :       DO
    2780       84366 :          IF (ASSOCIATED(fft_scratch)) THEN
    2781       57540 :             fft_scratch_current => fft_scratch
    2782       57540 :             fft_scratch => fft_scratch_current%fft_scratch_next
    2783       57540 :             NULLIFY (fft_scratch_current%fft_scratch_next)
    2784             : 
    2785       57540 :             CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
    2786             : 
    2787      230160 :             DEALLOCATE (fft_scratch_current%fft_scratch)
    2788       57540 :             DEALLOCATE (fft_scratch_current)
    2789             :          ELSE
    2790             :             EXIT
    2791             :          END IF
    2792             :       END DO
    2793             : 
    2794       26826 :       init_fft_pool = 0
    2795             : 
    2796       26826 :    END SUBROUTINE release_fft_scratch_pool
    2797             : 
    2798             : ! **************************************************************************************************
    2799             : !> \brief ...
    2800             : ! **************************************************************************************************
    2801     3298884 :    SUBROUTINE resize_fft_scratch_pool()
    2802             : 
    2803             :       INTEGER                                            :: last_tick, nscratch
    2804             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current, fft_scratch_old
    2805             : 
    2806     3298884 :       nscratch = 0
    2807             : 
    2808     3298884 :       last_tick = HUGE(last_tick)
    2809     3298884 :       NULLIFY (fft_scratch_old)
    2810             : 
    2811             :       ! start at the global pool, count, and find a deletion candidate
    2812     3298884 :       fft_scratch_current => fft_scratch_first
    2813    17772956 :       DO
    2814    21071840 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2815    17772956 :             nscratch = nscratch + 1
    2816             :             ! is this a candidate for deletion (i.e. least recently used, and not in use)
    2817    17772956 :             IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
    2818    14474072 :                IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
    2819     6723374 :                   last_tick = fft_scratch_current%fft_scratch%last_tick
    2820     6723374 :                   fft_scratch_old => fft_scratch_current
    2821             :                END IF
    2822             :             END IF
    2823    17772956 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    2824             :          ELSE
    2825             :             EXIT
    2826             :          END IF
    2827             :       END DO
    2828             : 
    2829             :       ! we should delete a scratch
    2830     3298884 :       IF (nscratch > fft_pool_scratch_limit) THEN
    2831             :          ! note that we never deallocate the first (special) element of the list
    2832         134 :          IF (ASSOCIATED(fft_scratch_old)) THEN
    2833             :             fft_scratch_current => fft_scratch_first
    2834             :             DO
    2835        2278 :                IF (ASSOCIATED(fft_scratch_current)) THEN
    2836             :                   ! should we delete the next in the list?
    2837        2144 :                   IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
    2838             :                      ! fix the linked list
    2839         134 :                      fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
    2840             : 
    2841             :                      ! deallocate the element
    2842         134 :                      CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
    2843         536 :                      DEALLOCATE (fft_scratch_old%fft_scratch)
    2844         134 :                      DEALLOCATE (fft_scratch_old)
    2845             : 
    2846             :                   ELSE
    2847             :                      fft_scratch_current => fft_scratch_current%fft_scratch_next
    2848             :                   END IF
    2849             :                ELSE
    2850             :                   EXIT
    2851             :                END IF
    2852             :             END DO
    2853             : 
    2854             :          ELSE
    2855           0 :             CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
    2856             :          END IF
    2857             :       END IF
    2858             : 
    2859     3298884 :    END SUBROUTINE resize_fft_scratch_pool
    2860             : 
    2861             : ! **************************************************************************************************
    2862             : !> \brief ...
    2863             : !> \param fft_scratch ...
    2864             : !> \param tf_type ...
    2865             : !> \param n ...
    2866             : !> \param fft_sizes ...
    2867             : ! **************************************************************************************************
    2868     3298884 :    SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
    2869             :       TYPE(fft_scratch_type), POINTER          :: fft_scratch
    2870             :       INTEGER, INTENT(IN)                      :: tf_type
    2871             :       INTEGER, DIMENSION(:), INTENT(IN)        :: n
    2872             :       TYPE(fft_scratch_sizes), INTENT(IN), &
    2873             :          OPTIONAL                              :: fft_sizes
    2874             : 
    2875             :       CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
    2876             : 
    2877             :       INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
    2878             :                  mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
    2879             :                  nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
    2880             :       INTEGER, DIMENSION(3)                    :: pcoord
    2881             :       LOGICAL                                  :: equal
    2882             :       LOGICAL, DIMENSION(2)                    :: dims
    2883             :       TYPE(fft_scratch_pool_type), POINTER     :: fft_scratch_current, &
    2884             :                                                   fft_scratch_last, &
    2885             :                                                   fft_scratch_new
    2886             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2887             :       INTEGER                   :: ierr
    2888             :       INTEGER(KIND=C_SIZE_T)    :: length
    2889             :       TYPE(C_PTR)               :: cptr_r1buf, cptr_tbuf, &
    2890             :                                    cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
    2891             : #endif
    2892     3298884 :       CALL timeset(routineN, handle)
    2893             : 
    2894             :       ! this is the place to check that the scratch_pool does not grow without limits
    2895             :       ! before we add a new scratch check the size of the pool and release some of the list if needed
    2896     3298884 :       CALL resize_fft_scratch_pool()
    2897             : 
    2898             :       ! get the required scratch
    2899     3298884 : !$OMP ATOMIC
    2900             :       tick_fft_pool = tick_fft_pool + 1
    2901     3298884 :       fft_scratch_current => fft_scratch_first
    2902             :       DO
    2903    10316118 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2904    10276337 :             IF (fft_scratch_current%fft_scratch%in_use) THEN
    2905     3298884 :                fft_scratch_last => fft_scratch_current
    2906     3298884 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2907     3298884 :                CYCLE
    2908             :             END IF
    2909     6977453 :             IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
    2910      436456 :                fft_scratch_last => fft_scratch_current
    2911      436456 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2912      436456 :                CYCLE
    2913             :             END IF
    2914    16563668 :             IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
    2915     3200536 :                fft_scratch_last => fft_scratch_current
    2916     3200536 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2917     3200536 :                CYCLE
    2918             :             END IF
    2919     3340461 :             IF (PRESENT(fft_sizes)) THEN
    2920     2718238 :                IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN
    2921       81038 :                   fft_scratch_last => fft_scratch_current
    2922       81038 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2923       81038 :                   CYCLE
    2924             :                END IF
    2925     2637200 :                CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
    2926     2637200 :                IF (.NOT. equal) THEN
    2927         320 :                   fft_scratch_last => fft_scratch_current
    2928         320 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2929         320 :                   CYCLE
    2930             :                END IF
    2931             :             END IF
    2932             :             ! Success
    2933     3259103 :             fft_scratch => fft_scratch_current%fft_scratch
    2934     3259103 :             fft_scratch_current%fft_scratch%in_use = .TRUE.
    2935     3259103 :             EXIT
    2936             :          ELSE
    2937             :             ! We cannot find the scratch type in this pool
    2938             :             ! Generate a new scratch set
    2939       39781 :             ALLOCATE (fft_scratch_new)
    2940     1034306 :             ALLOCATE (fft_scratch_new%fft_scratch)
    2941             : 
    2942       39781 :             IF (tf_type .NE. 400) THEN
    2943       26538 :                fft_scratch_new%fft_scratch%sizes = fft_sizes
    2944       26538 :                np = fft_sizes%numtask
    2945             :                ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
    2946             :                          fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
    2947      238842 :                          fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
    2948             :             END IF
    2949             : 
    2950           0 :             SELECT CASE (tf_type)
    2951             :             CASE DEFAULT
    2952           0 :                CPABORT("Invalid scratch type.")
    2953             :             CASE (100) ! fft3d_pb: full cube distribution
    2954           0 :                CPASSERT(PRESENT(fft_sizes))
    2955           0 :                mx1 = fft_sizes%mx1
    2956           0 :                my1 = fft_sizes%my1
    2957           0 :                mx2 = fft_sizes%mx2
    2958           0 :                mz2 = fft_sizes%mz2
    2959           0 :                my3 = fft_sizes%my3
    2960           0 :                mz3 = fft_sizes%mz3
    2961           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    2962           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    2963           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
    2964           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
    2965           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    2966           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    2967           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    2968             : 
    2969           0 :                dim = fft_sizes%rs_group%num_pe_cart
    2970           0 :                pos = fft_sizes%rs_group%mepos_cart
    2971           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    2972           0 :                fft_scratch_new%fft_scratch%dim = dim
    2973           0 :                fft_scratch_new%fft_scratch%pos = pos
    2974           0 :                mcz1 = fft_sizes%mcz1
    2975           0 :                mcx2 = fft_sizes%mcx2
    2976           0 :                mcz2 = fft_sizes%mcz2
    2977           0 :                mcy3 = fft_sizes%mcy3
    2978           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    2979           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    2980           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
    2981           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
    2982             : 
    2983           0 :                dims = (/.TRUE., .FALSE./)
    2984           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
    2985           0 :                dims = (/.FALSE., .TRUE./)
    2986           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    2987             : 
    2988             :                !initialise pgcube
    2989           0 :                DO i = 0, DIM(1) - 1
    2990           0 :                   coord = (/i, pos(2)/)
    2991           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
    2992             :                END DO
    2993           0 :                DO i = 0, DIM(2) - 1
    2994           0 :                   coord = (/pos(1), i/)
    2995           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    2996             :                END DO
    2997             : 
    2998             :                !set up fft plans
    2999             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3000           0 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
    3001             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3002           0 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3003             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3004           0 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3005             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3006           0 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3007             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3008           0 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3009             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3010           0 :                                         fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3011             : 
    3012             :             CASE (101) ! fft3d_pb: full cube distribution (dim 1)
    3013           8 :                CPASSERT(PRESENT(fft_sizes))
    3014           8 :                mx1 = fft_sizes%mx1
    3015           8 :                my1 = fft_sizes%my1
    3016           8 :                mz1 = fft_sizes%mz1
    3017           8 :                my3 = fft_sizes%my3
    3018           8 :                mz3 = fft_sizes%mz3
    3019          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    3020          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    3021           8 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3022          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
    3023          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
    3024          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    3025          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    3026             : 
    3027          24 :                dim = fft_sizes%rs_group%num_pe_cart
    3028          24 :                pos = fft_sizes%rs_group%mepos_cart
    3029           8 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3030          24 :                fft_scratch_new%fft_scratch%dim = dim
    3031          24 :                fft_scratch_new%fft_scratch%pos = pos
    3032           8 :                mcy3 = fft_sizes%mcy3
    3033          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
    3034          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
    3035             : 
    3036             :                !set up fft plans
    3037             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3038           8 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3039             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
    3040           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3041             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3042           8 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3043             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3044           8 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3045             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
    3046           8 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3047             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3048           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3049             : 
    3050             :             CASE (200) ! fft3d_ps: plane distribution
    3051       26530 :                CPASSERT(PRESENT(fft_sizes))
    3052       26530 :                nx = fft_sizes%nx
    3053       26530 :                ny = fft_sizes%ny
    3054       26530 :                nz = fft_sizes%nz
    3055       26530 :                mx2 = fft_sizes%mx2
    3056       26530 :                lmax = fft_sizes%lmax
    3057       26530 :                mmax = fft_sizes%mmax
    3058       26530 :                lg = fft_sizes%lg
    3059       26530 :                mg = fft_sizes%mg
    3060       26530 :                np = fft_sizes%numtask
    3061       26530 :                nmray = fft_sizes%nmray
    3062       26530 :                nyzray = fft_sizes%nyzray
    3063             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3064             :                length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
    3065             :                ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
    3066             :                CPASSERT(ierr == 0)
    3067             :                CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/MAX(mmax, 1), MAX(lmax, 1)/))
    3068             :                length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
    3069             :                ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
    3070             :                CPASSERT(ierr == 0)
    3071             :                CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)/))
    3072             : #else
    3073       79590 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
    3074      106120 :                CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
    3075             : #endif
    3076       26530 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3077       79590 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
    3078       26530 :                nm = nmray*mx2
    3079       26530 :                IF (alltoall_sgl) THEN
    3080          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
    3081          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
    3082             :                ELSE
    3083      106088 :                   ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
    3084             :                END IF
    3085             : 
    3086             :                !set up fft plans
    3087             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
    3088       26530 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3089             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
    3090       26530 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3091             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3092       26530 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
    3093             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3094       26530 :                                         fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3095             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
    3096       26530 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3097             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
    3098       26530 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3099             : 
    3100             :             CASE (300) ! fft3d_ps: block distribution
    3101           0 :                CPASSERT(PRESENT(fft_sizes))
    3102           0 :                mx1 = fft_sizes%mx1
    3103           0 :                mx2 = fft_sizes%mx2
    3104           0 :                my1 = fft_sizes%my1
    3105           0 :                mz2 = fft_sizes%mz2
    3106           0 :                mcx2 = fft_sizes%mcx2
    3107           0 :                lg = fft_sizes%lg
    3108           0 :                mg = fft_sizes%mg
    3109           0 :                nmax = fft_sizes%nmax
    3110           0 :                nmray = fft_sizes%nmray
    3111           0 :                nyzray = fft_sizes%nyzray
    3112           0 :                m1 = fft_sizes%r_dim(1)
    3113           0 :                m2 = fft_sizes%r_dim(2)
    3114           0 :                nbx = fft_sizes%nbx
    3115           0 :                nbz = fft_sizes%nbz
    3116           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
    3117           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
    3118             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3119             :                length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
    3120             :                ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
    3121             :                CPASSERT(ierr == 0)
    3122             :                CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/MAX(n(3), 1), MAX(mx1*my1, 1)/))
    3123             :                length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
    3124             :                ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
    3125             :                CPASSERT(ierr == 0)
    3126             :                CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/MAX(mx2*mz2, 1), MAX(n(2), 1)/))
    3127             :                length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
    3128             :                ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
    3129             :                CPASSERT(ierr == 0)
    3130             :                CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/MAX(n(2), 1), MAX(mx2*mz2, 1)/))
    3131             :                length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
    3132             :                ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
    3133             :                CPASSERT(ierr == 0)
    3134             :                CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/MAX(nyzray, 1), MAX(n(1), 1)/))
    3135             :                length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
    3136             :                ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
    3137             :                CPASSERT(ierr == 0)
    3138             :                CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/MAX(mg, 1), MAX(lg, 1)/))
    3139             : #else
    3140           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
    3141           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
    3142           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
    3143           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
    3144           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
    3145             : #endif
    3146           0 :                IF (alltoall_sgl) THEN
    3147           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
    3148           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
    3149             :                ELSE
    3150           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
    3151           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
    3152             :                END IF
    3153           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
    3154           0 :                ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
    3155           0 :                ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
    3156           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
    3157             :                ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
    3158           0 :                          fft_scratch_new%fft_scratch%yzcount(0:np - 1))
    3159             :                ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
    3160           0 :                          fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
    3161           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3162             : 
    3163           0 :                dim = fft_sizes%rs_group%num_pe_cart
    3164           0 :                pos = fft_sizes%rs_group%mepos_cart
    3165           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3166           0 :                fft_scratch_new%fft_scratch%dim = dim
    3167           0 :                fft_scratch_new%fft_scratch%pos = pos
    3168           0 :                mcz1 = fft_sizes%mcz1
    3169           0 :                mcz2 = fft_sizes%mcz2
    3170           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    3171           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    3172             : 
    3173           0 :                dims = (/.FALSE., .TRUE./)
    3174           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    3175             : 
    3176             :                !initialise pgcube
    3177           0 :                DO i = 0, DIM(2) - 1
    3178           0 :                   coord = (/pos(1), i/)
    3179           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    3180             :                END DO
    3181             : 
    3182             :                !initialise pgrid
    3183           0 :                DO ix = 0, m1 - 1
    3184           0 :                   DO iz = 0, m2 - 1
    3185           0 :                      coord = (/ix, iz/)
    3186           0 :                      CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
    3187             :                   END DO
    3188             :                END DO
    3189             : 
    3190             :                !initialise pzcoord
    3191           0 :                DO i = 0, np - 1
    3192           0 :                   CALL fft_sizes%rs_group%coords(i, pcoord)
    3193           0 :                   fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
    3194             :                END DO
    3195             : 
    3196             :                !set up fft plans
    3197             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3198           0 :                                         fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
    3199             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3200           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
    3201             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3202           0 :                                         fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
    3203             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3204           0 :                                         fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
    3205             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3206           0 :                                         fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
    3207             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3208           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
    3209             : 
    3210             :             CASE (400) ! serial FFT
    3211       13243 :                np = 0
    3212       13243 :                CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
    3213       13243 :                CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
    3214             : 
    3215             :                !in place plans
    3216             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
    3217       13243 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3218             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
    3219       13243 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3220             :                ! out of place plans
    3221             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
    3222       13243 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3223             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
    3224       53024 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3225             : 
    3226             :             END SELECT
    3227             : 
    3228       39781 :             NULLIFY (fft_scratch_new%fft_scratch_next)
    3229             :             fft_scratch_new%fft_scratch%fft_scratch_id = &
    3230       39781 :                fft_scratch_last%fft_scratch%fft_scratch_id + 1
    3231       39781 :             fft_scratch_new%fft_scratch%in_use = .TRUE.
    3232      159124 :             fft_scratch_new%fft_scratch%nfft = n
    3233       39781 :             fft_scratch_last%fft_scratch_next => fft_scratch_new
    3234       39781 :             fft_scratch_new%fft_scratch%tf_type = tf_type
    3235       39781 :             fft_scratch => fft_scratch_new%fft_scratch
    3236       39781 :             EXIT
    3237             : 
    3238             :          END IF
    3239             :       END DO
    3240             : 
    3241     3298884 : !$OMP ATOMIC READ
    3242             :       fft_scratch%last_tick = tick_fft_pool
    3243             : 
    3244     3298884 :       CALL timestop(handle)
    3245             : 
    3246     3298884 :    END SUBROUTINE get_fft_scratch
    3247             : 
    3248             : ! **************************************************************************************************
    3249             : !> \brief ...
    3250             : !> \param fft_scratch ...
    3251             : ! **************************************************************************************************
    3252     3298884 :    SUBROUTINE release_fft_scratch(fft_scratch)
    3253             : 
    3254             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
    3255             : 
    3256             :       INTEGER                                            :: scratch_id
    3257             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current
    3258             : 
    3259     3298884 :       scratch_id = fft_scratch%fft_scratch_id
    3260             : 
    3261     3298884 :       fft_scratch_current => fft_scratch_first
    3262     7017234 :       DO
    3263    10316118 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    3264    10316118 :             IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
    3265     3298884 :                fft_scratch%in_use = .FALSE.
    3266     3298884 :                NULLIFY (fft_scratch)
    3267     3298884 :                EXIT
    3268             :             END IF
    3269     7017234 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    3270             :          ELSE
    3271             :             ! We cannot find the scratch type in this pool
    3272           0 :             CPABORT("Invalid scratch type.")
    3273           0 :             EXIT
    3274             :          END IF
    3275             :       END DO
    3276             : 
    3277     3298884 :    END SUBROUTINE release_fft_scratch
    3278             : 
    3279             : ! **************************************************************************************************
    3280             : !> \brief ...
    3281             : !> \param rs ...
    3282             : !> \param scount ...
    3283             : !> \param sdispl ...
    3284             : !> \param rq ...
    3285             : !> \param rcount ...
    3286             : !> \param rdispl ...
    3287             : !> \param group ...
    3288             : ! **************************************************************************************************
    3289           0 :    SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
    3290             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rs
    3291             :       INTEGER, DIMENSION(:), POINTER                     :: scount, sdispl
    3292             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rq
    3293             :       INTEGER, DIMENSION(:), POINTER                     :: rcount, rdispl
    3294             : 
    3295             :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    3296             : 
    3297           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: msgin, msgout
    3298             :       INTEGER                                            :: ip, n, nr, ns, pos
    3299           0 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: rreq, sreq
    3300             : 
    3301           0 :       CALL group%sync()
    3302           0 :       n = group%num_pe
    3303           0 :       pos = group%mepos
    3304           0 :       ALLOCATE (sreq(0:n - 1))
    3305           0 :       ALLOCATE (rreq(0:n - 1))
    3306           0 :       nr = 0
    3307           0 :       DO ip = 0, n - 1
    3308           0 :          IF (rcount(ip) == 0) CYCLE
    3309           0 :          IF (ip == pos) CYCLE
    3310           0 :          msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
    3311           0 :          CALL group%irecv(msgout, ip, rreq(nr))
    3312           0 :          nr = nr + 1
    3313             :       END DO
    3314           0 :       ns = 0
    3315           0 :       DO ip = 0, n - 1
    3316           0 :          IF (scount(ip) == 0) CYCLE
    3317           0 :          IF (ip == pos) CYCLE
    3318           0 :          msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
    3319           0 :          CALL group%isend(msgin, ip, sreq(ns))
    3320           0 :          ns = ns + 1
    3321             :       END DO
    3322           0 :       IF (rcount(pos) /= 0) THEN
    3323           0 :          IF (rcount(pos) /= scount(pos)) CPABORT("Invalid count.")
    3324           0 :          rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
    3325             :       END IF
    3326           0 :       CALL mp_waitall(sreq(0:ns - 1))
    3327           0 :       CALL mp_waitall(rreq(0:nr - 1))
    3328           0 :       DEALLOCATE (sreq)
    3329           0 :       DEALLOCATE (rreq)
    3330           0 :       CALL group%sync()
    3331             : 
    3332           0 :    END SUBROUTINE sparse_alltoall
    3333             : 
    3334             : ! **************************************************************************************************
    3335             : !> \brief  test data structures for equality. It is assumed that if they are
    3336             : !>         different for one mpi task they are different for all (??)
    3337             : !> \param fft_size_1 ...
    3338             : !> \param fft_size_2 ...
    3339             : !> \param equal ...
    3340             : ! **************************************************************************************************
    3341     2637200 :    SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
    3342             :       TYPE(fft_scratch_sizes)                            :: fft_size_1, fft_size_2
    3343             :       LOGICAL                                            :: equal
    3344             : 
    3345             :       equal = .TRUE.
    3346             : 
    3347     2637200 :       equal = equal .AND. fft_size_1%nx == fft_size_2%nx
    3348     2637200 :       equal = equal .AND. fft_size_1%ny == fft_size_2%ny
    3349     2637200 :       equal = equal .AND. fft_size_1%nz == fft_size_2%nz
    3350             : 
    3351     2637200 :       equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
    3352     2637200 :       equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
    3353     2637200 :       equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
    3354             : 
    3355     2637200 :       equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
    3356     2637200 :       equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
    3357     2637200 :       equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
    3358             : 
    3359     2637200 :       equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
    3360     2637200 :       equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
    3361     2637200 :       equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
    3362             : 
    3363     2637200 :       equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
    3364     2637200 :       equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
    3365     2637200 :       equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
    3366     2637200 :       equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
    3367             : 
    3368     2637200 :       equal = equal .AND. fft_size_1%lg == fft_size_2%lg
    3369     2637200 :       equal = equal .AND. fft_size_1%mg == fft_size_2%mg
    3370             : 
    3371     2637200 :       equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
    3372     2637200 :       equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
    3373             : 
    3374     2637200 :       equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
    3375     2637200 :       equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
    3376             : 
    3377     2637200 :       equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
    3378             : 
    3379     7911600 :       equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
    3380     7911600 :       equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
    3381     7911600 :       equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
    3382             : 
    3383     2637200 :       equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
    3384             : 
    3385     2637200 :    END SUBROUTINE is_equal
    3386             : 
    3387           0 : END MODULE fft_tools

Generated by: LCOV version 1.15