LCOV - code coverage report
Current view: top level - src/pw - fft_tools.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b8e0b09) Lines: 716 1356 52.8 %
Date: 2024-08-31 06:31:37 Functions: 18 31 58.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \par History
      10             : !>      JGH (30-Nov-2000): ESSL FFT Library added
      11             : !>      JGH (05-Jan-2001): Added SGI library FFT
      12             : !>      JGH (14-Jan-2001): Added parallel 3d FFT
      13             : !>      JGH (10-Feb-2006): New interface type
      14             : !>      JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
      15             : !>                         Possible problems can be related with setting arrays
      16             : !>                         not to zero
      17             : !>                         Some interfaces could be further simplified by avoiding
      18             : !>                         an initial copy. However, this assumes contiguous arrays
      19             : !>      IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
      20             : !>                         fft_scratch type, reducing number of calls dramatically
      21             : !>      IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
      22             : !>      IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
      23             : !>      IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
      24             : !>      IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
      25             : !>                         (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
      26             : !> \author JGH
      27             : ! **************************************************************************************************
      28             : MODULE fft_tools
      29             :    USE ISO_C_BINDING,                   ONLY: C_F_POINTER,&
      30             :                                               C_LOC,&
      31             :                                               C_PTR,&
      32             :                                               C_SIZE_T
      33             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      34             :    USE fft_lib,                         ONLY: &
      35             :         fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
      36             :         fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
      37             :    USE fft_plan,                        ONLY: fft_plan_type
      38             :    USE kinds,                           ONLY: dp,&
      39             :                                               dp_size,&
      40             :                                               sp
      41             :    USE mathconstants,                   ONLY: z_zero
      42             :    USE message_passing,                 ONLY: mp_cart_type,&
      43             :                                               mp_comm_null,&
      44             :                                               mp_comm_type,&
      45             :                                               mp_request_type,&
      46             :                                               mp_waitall
      47             :    USE offload_api,                     ONLY: offload_free_pinned_mem,&
      48             :                                               offload_malloc_pinned_mem
      49             : 
      50             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      51             : 
      52             : #include "../base/base_uses.f90"
      53             : 
      54             :    IMPLICIT NONE
      55             : 
      56             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
      57             : 
      58             :    ! Types for the pool of scratch data needed in FFT routines
      59             :    ! keep the subroutine "is_equal" up-to-date
      60             :    ! needs a default initialization
      61             :    TYPE fft_scratch_sizes
      62             :       INTEGER                              :: nx = 0, ny = 0, nz = 0
      63             :       INTEGER                              :: lmax = 0, mmax = 0, nmax = 0
      64             :       INTEGER                              :: mx1 = 0, mx2 = 0, mx3 = 0
      65             :       INTEGER                              :: my1 = 0, my2 = 0, my3 = 0
      66             :       INTEGER                              :: mz1 = 0, mz2 = 0, mz3 = 0
      67             :       INTEGER                              :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
      68             :       INTEGER                              :: lg = 0, mg = 0
      69             :       INTEGER                              :: nbx = 0, nbz = 0
      70             :       INTEGER                              :: nmray = 0, nyzray = 0
      71             :       TYPE(mp_cart_type)                   :: rs_group = mp_cart_type()
      72             :       INTEGER, DIMENSION(2)                :: g_pos = 0, r_pos = 0, r_dim = 0
      73             :       INTEGER                              :: numtask = 0
      74             :    END TYPE fft_scratch_sizes
      75             : 
      76             :    TYPE fft_scratch_type
      77             :       INTEGER                              :: fft_scratch_id = 0
      78             :       INTEGER                              :: tf_type = -1
      79             :       LOGICAL                              :: in_use = .TRUE.
      80             :       TYPE(mp_comm_type)                   :: group = mp_comm_type()
      81             :       INTEGER, DIMENSION(3)                :: nfft = -1
      82             :       ! to be used in cube_transpose_* routines
      83             :       TYPE(mp_cart_type), DIMENSION(2)     :: cart_sub_comm = mp_cart_type()
      84             :       INTEGER, DIMENSION(2)                :: dim = -1, pos = -1
      85             :       ! to be used in fft3d_s
      86             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      87             :          :: ziptr => NULL(), zoptr => NULL()
      88             :       ! to be used in fft3d_ps : block distribution
      89             :       COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
      90             :          :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
      91             :             p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
      92             :       ! to be used in fft3d_ps : plane distribution
      93             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
      94             :          :: r1buf => NULL(), r2buf => NULL()
      95             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      96             :          :: tbuf => NULL()
      97             :       ! to be used in fft3d_pb
      98             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
      99             :          :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
     100             :             a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
     101             :       ! to be used in communication routines
     102             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER       :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
     103             :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER     :: pgcube => NULL()
     104             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER       :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
     105             :       INTEGER                              :: in = 0, mip = -1
     106             :       REAL(KIND=dp)                        :: rsratio = 1.0_dp
     107             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
     108             :          :: xzbuf => NULL(), yzbuf => NULL()
     109             :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
     110             :          :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
     111             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     112             :          :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
     113             :             rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
     114             :       COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     115             :          :: ss => NULL(), tt => NULL()
     116             :       INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS     :: pgrid => NULL()
     117             :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS       :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
     118             :       TYPE(fft_scratch_sizes)              :: sizes = fft_scratch_sizes()
     119             :       TYPE(fft_plan_type), DIMENSION(6)   :: fft_plan = fft_plan_type()
     120             :       INTEGER                              :: last_tick = -1
     121             :    END TYPE fft_scratch_type
     122             : 
     123             :    TYPE fft_scratch_pool_type
     124             :       TYPE(fft_scratch_type), POINTER       :: fft_scratch => NULL()
     125             :       TYPE(fft_scratch_pool_type), POINTER  :: fft_scratch_next => NULL()
     126             :    END TYPE fft_scratch_pool_type
     127             : 
     128             :    INTEGER, SAVE                           :: init_fft_pool = 0
     129             :    ! the clock for fft pool. Allows to identify the least recently used scratch
     130             :    INTEGER, SAVE                           :: tick_fft_pool = 0
     131             :    ! limit the number of scratch pools to fft_pool_scratch_limit.
     132             :    INTEGER, SAVE                           :: fft_pool_scratch_limit = 15
     133             :    TYPE(fft_scratch_pool_type), POINTER, SAVE:: fft_scratch_first
     134             :    ! END of types for the pool of scratch data needed in FFT routines
     135             : 
     136             :    PRIVATE
     137             :    PUBLIC :: init_fft, fft3d, finalize_fft
     138             :    PUBLIC :: fft_alloc, fft_dealloc
     139             :    PUBLIC :: fft_radix_operations, fft_fw1d
     140             :    PUBLIC :: FWFFT, BWFFT
     141             :    PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
     142             :    PUBLIC :: FFT_RADIX_NEXT_ODD
     143             : 
     144             :    INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
     145             :    INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
     146             :    INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
     147             :    INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
     148             : 
     149             :    REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
     150             : 
     151             :    ! these saved variables are FFT globals
     152             :    INTEGER, SAVE :: fft_type = 0
     153             :    LOGICAL, SAVE :: alltoall_sgl = .FALSE.
     154             :    LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
     155             :    INTEGER, SAVE :: fft_plan_style = 1
     156             : 
     157             :    ! these are only needed for pw_gpu (-D__OFFLOAD)
     158             :    PUBLIC :: get_fft_scratch, release_fft_scratch
     159             :    PUBLIC :: cube_transpose_1, cube_transpose_2
     160             :    PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
     161             :    PUBLIC :: fft_scratch_sizes, fft_scratch_type
     162             :    PUBLIC :: fft_type, fft_plan_style
     163             : 
     164             :    INTERFACE fft3d
     165             :       MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
     166             :    END INTERFACE
     167             : 
     168             : ! **************************************************************************************************
     169             : 
     170             : CONTAINS
     171             : 
     172             : ! **************************************************************************************************
     173             : !> \brief ...
     174             : !> \param fftlib ...
     175             : !> \param alltoall ...
     176             : !> \param fftsg_sizes ...
     177             : !> \param pool_limit ...
     178             : !> \param wisdom_file ...
     179             : !> \param plan_style ...
     180             : !> \author JGH
     181             : ! **************************************************************************************************
     182        9041 :    SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
     183             :                        plan_style)
     184             : 
     185             :       CHARACTER(LEN=*), INTENT(IN)                       :: fftlib
     186             :       LOGICAL, INTENT(IN)                                :: alltoall, fftsg_sizes
     187             :       INTEGER, INTENT(IN)                                :: pool_limit
     188             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     189             :       INTEGER, INTENT(IN)                                :: plan_style
     190             : 
     191        9041 :       use_fftsg_sizes = fftsg_sizes
     192        9041 :       alltoall_sgl = alltoall
     193        9041 :       fft_pool_scratch_limit = pool_limit
     194        9041 :       fft_type = fft_library(fftlib)
     195        9041 :       fft_plan_style = plan_style
     196             : 
     197        9041 :       IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
     198             : 
     199        9041 :       CALL fft_do_init(fft_type, wisdom_file)
     200             : 
     201             :       ! setup the FFT scratch pool, if one is associated, clear first
     202        9041 :       CALL release_fft_scratch_pool()
     203        9041 :       CALL init_fft_scratch_pool()
     204             : 
     205        9041 :    END SUBROUTINE init_fft
     206             : 
     207             : ! **************************************************************************************************
     208             : !> \brief does whatever is needed to finalize the current fft setup
     209             : !> \param para_env ...
     210             : !> \param wisdom_file ...
     211             : !> \par History
     212             : !>      10.2007 created [Joost VandeVondele]
     213             : ! **************************************************************************************************
     214        8831 :    SUBROUTINE finalize_fft(para_env, wisdom_file)
     215             :       CLASS(mp_comm_type)                    :: para_env
     216             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     217             : 
     218             : ! release the FFT scratch pool
     219             : 
     220        8831 :       CALL release_fft_scratch_pool()
     221             : 
     222             :       ! finalize fft libs
     223             : 
     224        8831 :       CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
     225             : 
     226        8831 :    END SUBROUTINE finalize_fft
     227             : 
     228             : ! **************************************************************************************************
     229             : !> \brief Determine the allowed lengths of FFT's   '''
     230             : !> \param radix_in ...
     231             : !> \param radix_out ...
     232             : !> \param operation ...
     233             : !> \par History
     234             : !>      new library structure (JGH)
     235             : !> \author Ari Seitsonen
     236             : ! **************************************************************************************************
     237      108914 :    SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
     238             : 
     239             :       INTEGER, INTENT(IN)                                :: radix_in
     240             :       INTEGER, INTENT(OUT)                               :: radix_out
     241             :       INTEGER, INTENT(IN)                                :: operation
     242             : 
     243             :       INTEGER, PARAMETER                                 :: fft_type_sg = 1
     244             : 
     245             :       INTEGER                                            :: i, iloc, ldata
     246      108914 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: DATA
     247             : 
     248      108914 :       ldata = 1024
     249      108914 :       ALLOCATE (DATA(ldata))
     250   111636850 :       DATA = -1
     251             : 
     252             :       ! if the user wants to use fftsg sizes go for it
     253      108914 :       IF (use_fftsg_sizes) THEN
     254      108410 :          CALL fft_get_lengths(fft_type_sg, DATA, ldata)
     255             :       ELSE
     256         504 :          CALL fft_get_lengths(fft_type, DATA, ldata)
     257             :       END IF
     258             : 
     259      108914 :       iloc = 0
     260     1205252 :       DO i = 1, ldata
     261     1205252 :          IF (DATA(i) == radix_in) THEN
     262             :             iloc = i
     263             :             EXIT
     264             :          ELSE
     265     1175638 :             IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     266             :                CYCLE
     267     1175638 :             ELSE IF (DATA(i) > radix_in) THEN
     268             :                iloc = i
     269             :                EXIT
     270             :             END IF
     271             :          END IF
     272             :       END DO
     273             : 
     274      108914 :       IF (iloc == 0) THEN
     275           0 :          CPABORT("Index to radix array not found.")
     276             :       END IF
     277             : 
     278      108914 :       IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     279           0 :          IF (DATA(iloc) == radix_in) THEN
     280           0 :             radix_out = FFT_RADIX_ALLOWED
     281             :          ELSE
     282           0 :             radix_out = FFT_RADIX_DISALLOWED
     283             :          END IF
     284             : 
     285      108914 :       ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
     286         270 :          IF (DATA(iloc) == radix_in) THEN
     287         102 :             radix_out = DATA(iloc)
     288             :          ELSE
     289         168 :             IF (ABS(DATA(iloc - 1) - radix_in) <= &
     290             :                 ABS(DATA(iloc) - radix_in)) THEN
     291         162 :                radix_out = DATA(iloc - 1)
     292             :             ELSE
     293           6 :                radix_out = DATA(iloc)
     294             :             END IF
     295             :          END IF
     296             : 
     297      108644 :       ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
     298      107276 :          radix_out = DATA(iloc)
     299             : 
     300        1368 :       ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
     301        3028 :          DO i = iloc, ldata
     302        3028 :             IF (MOD(DATA(i), 2) == 1) THEN
     303        1368 :                radix_out = DATA(i)
     304        1368 :                EXIT
     305             :             END IF
     306             :          END DO
     307        1368 :          IF (MOD(radix_out, 2) == 0) THEN
     308           0 :             CPABORT("No odd radix found.")
     309             :          END IF
     310             : 
     311             :       ELSE
     312           0 :          CPABORT("Disallowed radix operation.")
     313             :       END IF
     314             : 
     315      108914 :       DEALLOCATE (DATA)
     316             : 
     317      108914 :    END SUBROUTINE fft_radix_operations
     318             : 
     319             : ! **************************************************************************************************
     320             : !> \brief Performs m 1-D forward FFT-s of size n.
     321             : !> \param n      size of the FFT
     322             : !> \param m      number of FFT-s
     323             : !> \param trans  shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
     324             : !> \param zin    input array
     325             : !> \param zout   output array
     326             : !> \param scale  scaling factor
     327             : !> \param stat   status of the operation, non-zero code indicates an error
     328             : ! **************************************************************************************************
     329         208 :    SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
     330             :       INTEGER, INTENT(in)                                :: n, m
     331             :       LOGICAL, INTENT(in)                                :: trans
     332             :       COMPLEX(kind=dp), DIMENSION(*), INTENT(inout)      :: zin, zout
     333             :       REAL(kind=dp), INTENT(in)                          :: scale
     334             :       INTEGER, INTENT(out)                               :: stat
     335             : 
     336             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft_fw1d'
     337             : 
     338             :       INTEGER                                            :: handle
     339             :       TYPE(fft_plan_type)                                :: fft_plan
     340             : 
     341         208 :       CALL timeset(routineN, handle)
     342             : 
     343         208 :       IF (fft_type == 3) THEN
     344         208 :          CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
     345         208 :          CALL fft_1dm(fft_plan, zin, zout, scale, stat)
     346         208 :          CALL fft_destroy_plan(fft_plan)
     347             :       ELSE
     348             :          CALL cp_warn(__LOCATION__, &
     349           0 :                       "FFT library in use cannot handle transformation of an arbitrary length.")
     350           0 :          stat = 1
     351             :       END IF
     352             : 
     353         208 :       CALL timestop(handle)
     354         832 :    END SUBROUTINE fft_fw1d
     355             : 
     356             : ! **************************************************************************************************
     357             : !> \brief Calls the 3D-FFT function from the initialized library
     358             : !> \param fsign ...
     359             : !> \param n ...
     360             : !> \param zin ...
     361             : !> \param zout ...
     362             : !> \param status ...
     363             : !> \param debug ...
     364             : !> \par History
     365             : !>      none
     366             : !> \author JGH
     367             : ! **************************************************************************************************
     368      621188 :    SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
     369             : 
     370             :       INTEGER, INTENT(IN)                                :: fsign
     371             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: n
     372             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     373             :          INTENT(INOUT)                                   :: zin
     374             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     375             :          INTENT(INOUT), OPTIONAL, TARGET                 :: zout
     376             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     377             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     378             : 
     379             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_s'
     380             : 
     381             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     382      621188 :          POINTER                                         :: zoptr
     383             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET       :: zdum
     384             :       INTEGER                                            :: handle, ld(3), lo(3), output_unit, sign, &
     385             :                                                             stat
     386             :       LOGICAL                                            :: fft_in_place, test
     387             :       REAL(KIND=dp)                                      :: in_sum, norm, out_sum
     388             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     389             : 
     390      621188 :       CALL timeset(routineN, handle)
     391      621188 :       output_unit = cp_logger_get_default_io_unit()
     392             : 
     393      621188 :       IF (fsign == FWFFT) THEN
     394     1174192 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     395      327640 :       ELSE IF (fsign == BWFFT) THEN
     396      327640 :          norm = 1.0_dp
     397             :       ELSE
     398           0 :          CPABORT("Unknown FFT direction!")
     399             :       END IF
     400             : 
     401      621188 :       IF (PRESENT(debug)) THEN
     402      583229 :          test = debug
     403             :       ELSE
     404             :          test = .FALSE.
     405             :       END IF
     406             : 
     407      621188 :       IF (PRESENT(zout)) THEN
     408             :          fft_in_place = .FALSE.
     409             :       ELSE
     410      621180 :          fft_in_place = .TRUE.
     411             :       END IF
     412             : 
     413      621188 :       IF (test) THEN
     414           0 :          in_sum = SUM(ABS(zin))
     415             :       END IF
     416             : 
     417      621188 :       ld(1) = SIZE(zin, 1)
     418      621188 :       ld(2) = SIZE(zin, 2)
     419      621188 :       ld(3) = SIZE(zin, 3)
     420             : 
     421      621188 :       IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
     422           0 :          CPABORT("Size and dimension (zin) have to be the same.")
     423             :       END IF
     424             : 
     425      621188 :       sign = fsign
     426      621188 :       CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
     427             : 
     428      621188 :       IF (fft_in_place) THEN
     429      621180 :          zoptr => zdum
     430      621180 :          IF (fsign == FWFFT) THEN
     431      293544 :             CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
     432             :          ELSE
     433      327636 :             CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
     434             :          END IF
     435             :       ELSE
     436           8 :          IF (fsign == FWFFT) THEN
     437           4 :             CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
     438             :          ELSE
     439           4 :             CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
     440             :          END IF
     441             :       END IF
     442             : 
     443      621188 :       CALL release_fft_scratch(fft_scratch)
     444             : 
     445      621188 :       IF (PRESENT(zout)) THEN
     446           8 :          lo(1) = SIZE(zout, 1)
     447           8 :          lo(2) = SIZE(zout, 2)
     448           8 :          lo(3) = SIZE(zout, 3)
     449           8 :          IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
     450           0 :             CPABORT("Size and dimension (zout) have to be the same.")
     451             :          END IF
     452             :       END IF
     453             : 
     454      621188 :       IF (PRESENT(status)) THEN
     455        9029 :          status = stat
     456             :       END IF
     457             : 
     458      621188 :       IF (test .AND. output_unit > 0) THEN
     459           0 :          IF (PRESENT(zout)) THEN
     460           0 :             out_sum = SUM(ABS(zout))
     461           0 :             WRITE (output_unit, '(A)') "  Out of place 3D FFT (local)  : fft3d_s"
     462           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     463           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input array dimensions ", ld
     464           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Output array dimensions ", lo
     465           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     466           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     467             :          ELSE
     468           0 :             out_sum = SUM(ABS(zin))
     469           0 :             WRITE (output_unit, '(A)') "  In place 3D FFT (local)  : fft3d_s"
     470           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     471           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input/output array dimensions ", ld
     472           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     473           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     474             :          END IF
     475             :       END IF
     476             : 
     477      621188 :       CALL timestop(handle)
     478             : 
     479      621188 :    END SUBROUTINE fft3d_s
     480             : 
     481             : ! **************************************************************************************************
     482             : !> \brief ...
     483             : !> \param fsign ...
     484             : !> \param n ...
     485             : !> \param cin ...
     486             : !> \param gin ...
     487             : !> \param rs_group ...
     488             : !> \param yzp ...
     489             : !> \param nyzray ...
     490             : !> \param bo ...
     491             : !> \param status ...
     492             : !> \param debug ...
     493             : ! **************************************************************************************************
     494     2625362 :    SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
     495     2625362 :                        bo, status, debug)
     496             : 
     497             :       INTEGER, INTENT(IN)                                :: fsign
     498             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: n
     499             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     500             :          INTENT(INOUT)                                   :: cin
     501             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     502             :          INTENT(INOUT)                                   :: gin
     503             :       TYPE(mp_cart_type), INTENT(IN)                     :: rs_group
     504             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
     505             :          INTENT(IN)                                      :: yzp
     506             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nyzray
     507             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     508             :          INTENT(IN)                                      :: bo
     509             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     510             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     511             : 
     512             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_ps'
     513             : 
     514             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     515     2625362 :          POINTER                                         :: pbuf, qbuf, rbuf, sbuf
     516             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     517     2625362 :          POINTER                                         :: tbuf
     518             :       INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
     519             :          nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
     520     2625362 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: p2p
     521             :       LOGICAL                                            :: test
     522             :       REAL(KIND=dp)                                      :: norm, sum_data
     523    18377534 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     524             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     525             : 
     526     2625362 :       CALL timeset(routineN, handle)
     527     2625362 :       output_unit = cp_logger_get_default_io_unit()
     528             : 
     529     2625362 :       IF (PRESENT(debug)) THEN
     530     2625362 :          test = debug
     531             :       ELSE
     532             :          test = .FALSE.
     533             :       END IF
     534             : 
     535     2625362 :       g_pos = rs_group%mepos
     536     2625362 :       numtask = rs_group%num_pe
     537     7876086 :       r_dim = rs_group%num_pe_cart
     538     7876086 :       r_pos = rs_group%mepos_cart
     539             : 
     540     2625362 :       IF (fsign == FWFFT) THEN
     541     5183680 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     542     1329442 :       ELSE IF (fsign == BWFFT) THEN
     543     1329442 :          norm = 1.0_dp
     544             :       ELSE
     545           0 :          CPABORT("Unknown FFT direction!")
     546             :       END IF
     547             : 
     548     2625362 :       sign = fsign
     549             : 
     550     2625362 :       lg = SIZE(gin, 1)
     551     2625362 :       mg = SIZE(gin, 2)
     552             : 
     553     2625362 :       nx = SIZE(cin, 1)
     554     2625362 :       ny = SIZE(cin, 2)
     555     2625362 :       nz = SIZE(cin, 3)
     556             : 
     557     2625362 :       IF (mg == 0) THEN
     558             :          mmax = 1
     559             :       ELSE
     560     2625362 :          mmax = mg
     561             :       END IF
     562     2625362 :       lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
     563             : 
     564     7876086 :       ALLOCATE (p2p(0:numtask - 1))
     565             : 
     566     2625362 :       CALL rs_group%rank_compare(rs_group, p2p)
     567             : 
     568     2625362 :       rp = p2p(g_pos)
     569     2625362 :       mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
     570     2625362 :       my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
     571     2625362 :       mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
     572     2625362 :       mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
     573             : 
     574     7876086 :       n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
     575     7876086 :       n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
     576     2625362 :       nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
     577     7876086 :       nmax = MAX(nmax, n1*MAXVAL(nyzray))
     578     7876086 :       n1 = MAXVAL(bo(2, 1, :, 2))
     579     7876086 :       n2 = MAXVAL(bo(2, 3, :, 2))
     580             : 
     581     2625362 :       fft_scratch_size%nx = nx
     582     2625362 :       fft_scratch_size%ny = ny
     583     2625362 :       fft_scratch_size%nz = nz
     584     2625362 :       fft_scratch_size%lmax = lmax
     585     2625362 :       fft_scratch_size%mmax = mmax
     586     2625362 :       fft_scratch_size%mx1 = mx1
     587     2625362 :       fft_scratch_size%mx2 = mx2
     588     2625362 :       fft_scratch_size%my1 = my1
     589     2625362 :       fft_scratch_size%mz2 = mz2
     590     2625362 :       fft_scratch_size%lg = lg
     591     2625362 :       fft_scratch_size%mg = mg
     592     2625362 :       fft_scratch_size%nbx = n1
     593     2625362 :       fft_scratch_size%nbz = n2
     594     7876086 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
     595     7876086 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
     596     7876086 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
     597     2625362 :       fft_scratch_size%mcz1 = mcz1
     598     2625362 :       fft_scratch_size%mcx2 = mcx2
     599     2625362 :       fft_scratch_size%mcz2 = mcz2
     600     2625362 :       fft_scratch_size%nmax = nmax
     601     7876086 :       fft_scratch_size%nmray = MAXVAL(nyzray)
     602     2625362 :       fft_scratch_size%nyzray = nyzray(g_pos)
     603     2625362 :       fft_scratch_size%rs_group = rs_group
     604     7876086 :       fft_scratch_size%g_pos = g_pos
     605     7876086 :       fft_scratch_size%r_pos = r_pos
     606     7876086 :       fft_scratch_size%r_dim = r_dim
     607     2625362 :       fft_scratch_size%numtask = numtask
     608             : 
     609     2625362 :       IF (test) THEN
     610           8 :          IF (g_pos == 0 .AND. output_unit > 0) THEN
     611           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_ps"
     612           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     613           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg, mg
     614           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", nx, ny, nz
     615             :          END IF
     616             :       END IF
     617             : 
     618     2625362 :       IF (r_dim(2) > 1) THEN
     619             : 
     620             :          !
     621             :          ! real space is distributed over x and y coordinate
     622             :          ! we have two stages of communication
     623             :          !
     624             : 
     625           0 :          IF (r_dim(1) == 1) THEN
     626           0 :             CPABORT("This processor distribution is not supported.")
     627             :          END IF
     628           0 :          CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
     629             : 
     630           0 :          IF (sign == FWFFT) THEN
     631             :             ! cin -> gin
     632             : 
     633           0 :             IF (test) THEN
     634           0 :                sum_data = ABS(SUM(cin))
     635           0 :                CALL rs_group%sum(sum_data)
     636           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     637           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     638           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     639           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     640           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     641           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     642             :                END IF
     643             :             END IF
     644             : 
     645           0 :             pbuf => fft_scratch%p1buf
     646           0 :             qbuf => fft_scratch%p2buf
     647             : 
     648             :             ! FFT along z
     649           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
     650             : 
     651           0 :             rbuf => fft_scratch%p3buf
     652             : 
     653           0 :             IF (test) THEN
     654           0 :                sum_data = ABS(SUM(qbuf))
     655           0 :                CALL rs_group%sum(sum_data)
     656           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     657           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
     658             :                END IF
     659             :             END IF
     660             : 
     661             :             ! Exchange data ( transpose of matrix )
     662           0 :             CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
     663             : 
     664           0 :             IF (test) THEN
     665           0 :                sum_data = ABS(SUM(rbuf))
     666           0 :                CALL rs_group%sum(sum_data)
     667           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     668           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) T", sum_data
     669             :                END IF
     670             :             END IF
     671             : 
     672           0 :             pbuf => fft_scratch%p4buf
     673             : 
     674             :             ! FFT along y
     675           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
     676             : 
     677           0 :             qbuf => fft_scratch%p5buf
     678             : 
     679           0 :             IF (test) THEN
     680           0 :                sum_data = ABS(SUM(pbuf))
     681           0 :                CALL rs_group%sum(sum_data)
     682           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     683           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) TS", sum_data
     684             :                END IF
     685             :             END IF
     686             : 
     687             :             ! Exchange data ( transpose of matrix ) and sort
     688             :             CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     689           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     690             : 
     691           0 :             IF (test) THEN
     692           0 :                sum_data = ABS(SUM(qbuf))
     693           0 :                CALL rs_group%sum(sum_data)
     694           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     695           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) TS", sum_data
     696             :                END IF
     697             :             END IF
     698             : 
     699             :             ! FFT along x
     700           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
     701             : 
     702           0 :             IF (test) THEN
     703           0 :                sum_data = ABS(SUM(gin))
     704           0 :                CALL rs_group%sum(sum_data)
     705           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     706           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     707             :                END IF
     708             :             END IF
     709             : 
     710           0 :          ELSE IF (sign == BWFFT) THEN
     711             :             ! gin -> cin
     712             : 
     713           0 :             IF (test) THEN
     714           0 :                sum_data = ABS(SUM(gin))
     715           0 :                CALL rs_group%sum(sum_data)
     716           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     717           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     718           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     719           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     720           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     721           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     722             :                END IF
     723             :             END IF
     724             : 
     725           0 :             pbuf => fft_scratch%p7buf
     726             : 
     727             :             ! FFT along x
     728           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
     729             : 
     730           0 :             qbuf => fft_scratch%p4buf
     731             : 
     732           0 :             IF (test) THEN
     733           0 :                sum_data = ABS(SUM(pbuf))
     734           0 :                CALL rs_group%sum(sum_data)
     735           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     736           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     737             :                END IF
     738             :             END IF
     739             : 
     740             :             ! Exchange data ( transpose of matrix ) and sort
     741             :             CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     742           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     743             : 
     744           0 :             IF (test) THEN
     745           0 :                sum_data = ABS(SUM(qbuf))
     746           0 :                CALL rs_group%sum(sum_data)
     747           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     748           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     749             :                END IF
     750             :             END IF
     751             : 
     752           0 :             rbuf => fft_scratch%p3buf
     753             : 
     754             :             ! FFT along y
     755           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
     756             : 
     757           0 :             pbuf => fft_scratch%p2buf
     758             : 
     759           0 :             IF (test) THEN
     760           0 :                sum_data = ABS(SUM(rbuf))
     761           0 :                CALL rs_group%sum(sum_data)
     762           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     763           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
     764             :                END IF
     765             :             END IF
     766             : 
     767             :             ! Exchange data ( transpose of matrix )
     768           0 :             CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
     769             : 
     770           0 :             IF (test) THEN
     771           0 :                sum_data = ABS(SUM(pbuf))
     772           0 :                CALL rs_group%sum(sum_data)
     773           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     774           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) T", sum_data
     775             :                END IF
     776             :             END IF
     777             : 
     778           0 :             qbuf => fft_scratch%p1buf
     779             : 
     780             :             ! FFT along z
     781           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
     782             : 
     783           0 :             IF (test) THEN
     784           0 :                sum_data = ABS(SUM(cin))
     785           0 :                CALL rs_group%sum(sum_data)
     786           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     787           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     788             :                END IF
     789             :             END IF
     790             : 
     791             :          ELSE
     792             : 
     793           0 :             CPABORT("Illegal fsign parameter.")
     794             : 
     795             :          END IF
     796             : 
     797           0 :          CALL release_fft_scratch(fft_scratch)
     798             : 
     799             :       ELSE
     800             : 
     801             :          !
     802             :          ! real space is only distributed over x coordinate
     803             :          ! we have one stage of communication, after the transform of
     804             :          ! direction x
     805             :          !
     806             : 
     807     2625362 :          CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
     808             : 
     809     2625362 :          sbuf => fft_scratch%r1buf
     810     2625362 :          tbuf => fft_scratch%tbuf
     811             : 
     812 80287972171 :          sbuf = z_zero
     813 80667928116 :          tbuf = z_zero
     814             : 
     815     2625362 :          IF (sign == FWFFT) THEN
     816             :             ! cin -> gin
     817             : 
     818     1295920 :             IF (test) THEN
     819        9284 :                sum_data = ABS(SUM(cin))
     820           4 :                CALL rs_group%sum(sum_data)
     821           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     822           2 :                   WRITE (output_unit, '(A)') "     One step communication algorithm "
     823           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     824           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     825           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     826             :                END IF
     827             :             END IF
     828             : 
     829             :             ! FFT along y and z
     830     1295920 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
     831     1295920 :             CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
     832             : 
     833     1295920 :             IF (test) THEN
     834        8740 :                sum_data = ABS(SUM(tbuf))
     835           4 :                CALL rs_group%sum(sum_data)
     836           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     837           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     838             :                END IF
     839             :             END IF
     840             : 
     841             :             ! Exchange data ( transpose of matrix ) and sort
     842             :             CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     843     1295920 :                          bo(:, :, :, 2), sbuf, fft_scratch)
     844             : 
     845     1295920 :             IF (test) THEN
     846        8776 :                sum_data = ABS(SUM(sbuf))
     847           4 :                CALL rs_group%sum(sum_data)
     848           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     849           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     850             :                END IF
     851             :             END IF
     852             :             ! FFT along x
     853     1295920 :             CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
     854             : 
     855     1295920 :             IF (test) THEN
     856        8708 :                sum_data = ABS(SUM(gin))
     857           4 :                CALL rs_group%sum(sum_data)
     858           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     859           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     860             :                END IF
     861             :             END IF
     862             : 
     863     1329442 :          ELSE IF (sign == BWFFT) THEN
     864             :             ! gin -> cin
     865             : 
     866     1329442 :             IF (test) THEN
     867        8708 :                sum_data = ABS(SUM(gin))
     868           4 :                CALL rs_group%sum(sum_data)
     869           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     870           2 :                   WRITE (output_unit, '(A)') "  One step communication algorithm "
     871           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     872           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     873           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     874             :                END IF
     875             :             END IF
     876             : 
     877             :             ! FFT along x
     878     1329442 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
     879             : 
     880     1329442 :             IF (test) THEN
     881        8776 :                sum_data = ABS(SUM(sbuf))
     882           4 :                CALL rs_group%sum(sum_data)
     883           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     884           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     885             :                END IF
     886             :             END IF
     887             : 
     888             :             ! Exchange data ( transpose of matrix ) and sort
     889             :             CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     890     1329442 :                          bo(:, :, :, 2), tbuf, fft_scratch)
     891             : 
     892     1329442 :             IF (test) THEN
     893        8740 :                sum_data = ABS(SUM(tbuf))
     894           4 :                CALL rs_group%sum(sum_data)
     895           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     896           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     897             :                END IF
     898             :             END IF
     899             : 
     900             :             ! FFT along y and z
     901     1329442 :             CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
     902     1329442 :             CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
     903             : 
     904     1329442 :             IF (test) THEN
     905        9284 :                sum_data = ABS(SUM(cin))
     906           4 :                CALL rs_group%sum(sum_data)
     907           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     908           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     909             :                END IF
     910             :             END IF
     911             :          ELSE
     912           0 :             CPABORT("Illegal fsign parameter.")
     913             :          END IF
     914             : 
     915     2625362 :          CALL release_fft_scratch(fft_scratch)
     916             : 
     917             :       END IF
     918             : 
     919     2625362 :       DEALLOCATE (p2p)
     920             : 
     921     2625362 :       IF (PRESENT(status)) THEN
     922           0 :          status = stat
     923             :       END IF
     924     2625362 :       CALL timestop(handle)
     925             : 
     926     5250724 :    END SUBROUTINE fft3d_ps
     927             : 
     928             : ! **************************************************************************************************
     929             : !> \brief ...
     930             : !> \param fsign ...
     931             : !> \param n ...
     932             : !> \param zin ...
     933             : !> \param gin ...
     934             : !> \param group ...
     935             : !> \param bo ...
     936             : !> \param status ...
     937             : !> \param debug ...
     938             : ! **************************************************************************************************
     939         208 :    SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
     940             : 
     941             :       INTEGER, INTENT(IN)                                :: fsign
     942             :       INTEGER, DIMENSION(3), INTENT(IN)                  :: n
     943             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     944             :          INTENT(INOUT)                                   :: zin
     945             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     946             :          INTENT(INOUT)                                   :: gin
     947             :       TYPE(mp_cart_type), INTENT(IN)                     :: group
     948             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     949             :          INTENT(IN)                                      :: bo
     950             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     951             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     952             : 
     953             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_pb'
     954             : 
     955             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     956         208 :          POINTER                                         :: abuf, bbuf
     957             :       INTEGER                                            :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
     958             :                                                             mcz2, mx1, mx2, mx3, my1, my2, my3, &
     959             :                                                             my_pos, mz1, mz2, mz3, output_unit, &
     960             :                                                             sign, stat
     961             :       INTEGER, DIMENSION(2)                              :: dim
     962             :       LOGICAL                                            :: test
     963             :       REAL(KIND=dp)                                      :: norm, sum_data
     964        1456 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     965             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     966             : 
     967             : !------------------------------------------------------------------------------
     968             : ! "Real Space"  1) xyZ      or      1) xYZ
     969             : !               2) xYz      or         not used
     970             : ! "G Space"     3) Xyz      or      3) XYz
     971             : !
     972             : ! There is one communicator (2-dimensional) for all distributions
     973             : ! np = n1 * n2, where np is the total number of processors
     974             : ! If n2 = 1, we have the second case and only one transpose step is needed
     975             : !
     976             : ! Assignment of dimensions to axis for different steps
     977             : ! First case: 1) n1=x; n2=y
     978             : !             2) n1=x; n2=z
     979             : !             3) n1=y; n2=z
     980             : ! Second case 1) n1=x
     981             : !             3) n1=z
     982             : !
     983             : ! The more general case with two communicators for the initial and final
     984             : ! distribution is not covered.
     985             : !------------------------------------------------------------------------------
     986             : 
     987         208 :       CALL timeset(routineN, handle)
     988         208 :       output_unit = cp_logger_get_default_io_unit()
     989             : 
     990         624 :       dim = group%num_pe_cart
     991         208 :       my_pos = group%mepos
     992             : 
     993         208 :       IF (PRESENT(debug)) THEN
     994         208 :          test = debug
     995             :       ELSE
     996             :          test = .FALSE.
     997             :       END IF
     998             : 
     999         208 :       IF (fsign == FWFFT) THEN
    1000         416 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
    1001         104 :       ELSE IF (fsign == BWFFT) THEN
    1002         104 :          norm = 1.0_dp
    1003             :       ELSE
    1004           0 :          CPABORT("Unknown FFT direction!")
    1005             :       END IF
    1006             : 
    1007         208 :       sign = fsign
    1008             : 
    1009         208 :       IF (test) THEN
    1010           8 :          lg(1) = SIZE(gin, 1)
    1011           8 :          lg(2) = SIZE(gin, 2)
    1012           8 :          lz(1) = SIZE(zin, 1)
    1013           8 :          lz(2) = SIZE(zin, 2)
    1014           8 :          lz(3) = SIZE(zin, 3)
    1015           8 :          IF (my_pos == 0 .AND. output_unit > 0) THEN
    1016           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_pb"
    1017           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
    1018           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg
    1019           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", lz
    1020             :          END IF
    1021             :       END IF
    1022             : 
    1023         208 :       mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
    1024         208 :       my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
    1025         208 :       mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
    1026         208 :       mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
    1027         208 :       my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
    1028         208 :       mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
    1029         208 :       mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
    1030         208 :       my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
    1031         208 :       mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
    1032         208 :       fft_scratch_size%mx1 = mx1
    1033         208 :       fft_scratch_size%mx2 = mx2
    1034         208 :       fft_scratch_size%mx3 = mx3
    1035         208 :       fft_scratch_size%my1 = my1
    1036         208 :       fft_scratch_size%my2 = my2
    1037         208 :       fft_scratch_size%my3 = my3
    1038         208 :       fft_scratch_size%mz1 = mz1
    1039         208 :       fft_scratch_size%mz2 = mz2
    1040         208 :       fft_scratch_size%mz3 = mz3
    1041         624 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
    1042         624 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
    1043         624 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
    1044         624 :       mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
    1045         208 :       fft_scratch_size%mcz1 = mcz1
    1046         208 :       fft_scratch_size%mcx2 = mcx2
    1047         208 :       fft_scratch_size%mcz2 = mcz2
    1048         208 :       fft_scratch_size%mcy3 = mcy3
    1049         208 :       fft_scratch_size%rs_group = group
    1050         624 :       fft_scratch_size%g_pos = my_pos
    1051         208 :       fft_scratch_size%numtask = DIM(1)*DIM(2)
    1052             : 
    1053         208 :       IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
    1054             : 
    1055             :          !
    1056             :          ! First case; two stages of communication
    1057             :          !
    1058             : 
    1059           0 :          CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
    1060             : 
    1061           0 :          IF (sign == FWFFT) THEN
    1062             :             ! Stage 1 -> 3
    1063             : 
    1064           0 :             bbuf => fft_scratch%a2buf
    1065             : 
    1066           0 :             IF (test) THEN
    1067           0 :                sum_data = ABS(SUM(zin))
    1068           0 :                CALL group%sum(sum_data)
    1069           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1070           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1071           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1072           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1073             :                END IF
    1074             :             END IF
    1075             : 
    1076             :             ! FFT along z
    1077           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
    1078             : 
    1079           0 :             abuf => fft_scratch%a3buf
    1080             : 
    1081           0 :             IF (test) THEN
    1082           0 :                sum_data = ABS(SUM(bbuf))
    1083           0 :                CALL group%sum(sum_data)
    1084           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1085           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1086             :                END IF
    1087             :             END IF
    1088             : 
    1089           0 :             CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
    1090             : 
    1091           0 :             bbuf => fft_scratch%a4buf
    1092             : 
    1093           0 :             IF (test) THEN
    1094           0 :                sum_data = ABS(SUM(abuf))
    1095           0 :                CALL group%sum(sum_data)
    1096           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1097           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1098           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1099             :                END IF
    1100             :             END IF
    1101             : 
    1102             :             ! FFT along y
    1103           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1104             : 
    1105           0 :             abuf => fft_scratch%a5buf
    1106             : 
    1107           0 :             IF (test) THEN
    1108           0 :                sum_data = ABS(SUM(bbuf))
    1109           0 :                CALL group%sum(sum_data)
    1110           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1111           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1112             :                END IF
    1113             :             END IF
    1114             : 
    1115           0 :             CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
    1116             : 
    1117           0 :             IF (test) THEN
    1118           0 :                sum_data = ABS(SUM(abuf))
    1119           0 :                CALL group%sum(sum_data)
    1120           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1121           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1122           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1123             :                END IF
    1124             :             END IF
    1125             : 
    1126             :             ! FFT along x
    1127           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1128             : 
    1129           0 :             IF (test) THEN
    1130           0 :                sum_data = ABS(SUM(gin))
    1131           0 :                CALL group%sum(sum_data)
    1132           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1133           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1134             :                END IF
    1135             :             END IF
    1136             : 
    1137           0 :          ELSEIF (sign == BWFFT) THEN
    1138             :             ! Stage 3 -> 1
    1139             : 
    1140           0 :             bbuf => fft_scratch%a5buf
    1141             : 
    1142           0 :             IF (test) THEN
    1143           0 :                sum_data = ABS(SUM(gin))
    1144           0 :                CALL group%sum(sum_data)
    1145           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1146           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1147           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1148           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1149             :                END IF
    1150             :             END IF
    1151             : 
    1152             :             ! FFT along x
    1153           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1154             : 
    1155           0 :             abuf => fft_scratch%a4buf
    1156             : 
    1157           0 :             IF (test) THEN
    1158           0 :                sum_data = ABS(SUM(bbuf))
    1159           0 :                CALL group%sum(sum_data)
    1160           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1161           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1162             :                END IF
    1163             :             END IF
    1164             : 
    1165           0 :             CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
    1166             : 
    1167           0 :             bbuf => fft_scratch%a3buf
    1168             : 
    1169           0 :             IF (test) THEN
    1170           0 :                sum_data = ABS(SUM(abuf))
    1171           0 :                CALL group%sum(sum_data)
    1172           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1173           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1174           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1175             :                END IF
    1176             :             END IF
    1177             : 
    1178             :             ! FFT along y
    1179           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1180             : 
    1181           0 :             abuf => fft_scratch%a2buf
    1182             : 
    1183           0 :             IF (test) THEN
    1184           0 :                sum_data = ABS(SUM(bbuf))
    1185           0 :                CALL group%sum(sum_data)
    1186           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1187           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1188             :                END IF
    1189             :             END IF
    1190             : 
    1191           0 :             CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
    1192             : 
    1193           0 :             IF (test) THEN
    1194           0 :                sum_data = ABS(SUM(abuf))
    1195           0 :                CALL group%sum(sum_data)
    1196           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1197           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1198           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1199             :                END IF
    1200             :             END IF
    1201             : 
    1202             :             ! FFT along z
    1203           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
    1204             : 
    1205           0 :             IF (test) THEN
    1206           0 :                sum_data = ABS(SUM(zin))
    1207           0 :                CALL group%sum(sum_data)
    1208           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1209           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1210             :                END IF
    1211             :             END IF
    1212             : 
    1213             :          ELSE
    1214           0 :             CPABORT("Illegal fsign parameter.")
    1215             :          END IF
    1216             : 
    1217           0 :          CALL release_fft_scratch(fft_scratch)
    1218             : 
    1219         208 :       ELSEIF (DIM(2) == 1) THEN
    1220             : 
    1221             :          !
    1222             :          ! Second case; one stage of communication
    1223             :          !
    1224             : 
    1225         208 :          CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
    1226             : 
    1227         208 :          IF (sign == FWFFT) THEN
    1228             :             ! Stage 1 -> 3
    1229             : 
    1230         104 :             IF (test) THEN
    1231        9284 :                sum_data = ABS(SUM(zin))
    1232           4 :                CALL group%sum(sum_data)
    1233           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1234           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1235           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1236           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1237           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1238             :                END IF
    1239             :             END IF
    1240             : 
    1241         104 :             abuf => fft_scratch%a3buf
    1242         104 :             bbuf => fft_scratch%a4buf
    1243             :             ! FFT along z and y
    1244         104 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
    1245         104 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1246             : 
    1247         104 :             abuf => fft_scratch%a5buf
    1248             : 
    1249         104 :             IF (test) THEN
    1250        8708 :                sum_data = ABS(SUM(bbuf))
    1251           4 :                CALL group%sum(sum_data)
    1252           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1253           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1254             :                END IF
    1255             :             END IF
    1256             : 
    1257         104 :             CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
    1258             : 
    1259         104 :             IF (test) THEN
    1260        8260 :                sum_data = ABS(SUM(abuf))
    1261           4 :                CALL group%sum(sum_data)
    1262           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1263           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1264           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1265             :                END IF
    1266             :             END IF
    1267             : 
    1268             :             ! FFT along x
    1269         104 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1270             : 
    1271         104 :             IF (test) THEN
    1272        8708 :                sum_data = ABS(SUM(gin))
    1273           4 :                CALL group%sum(sum_data)
    1274           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1275           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1276             :                END IF
    1277             :             END IF
    1278             : 
    1279         104 :          ELSEIF (sign == BWFFT) THEN
    1280             :             ! Stage 3 -> 1
    1281             : 
    1282         104 :             IF (test) THEN
    1283        8708 :                sum_data = ABS(SUM(gin))
    1284           4 :                CALL group%sum(sum_data)
    1285           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1286           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1287           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1288           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1289             :                END IF
    1290             :             END IF
    1291             : 
    1292         104 :             bbuf => fft_scratch%a5buf
    1293             : 
    1294             :             ! FFT along x
    1295         104 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1296             : 
    1297         104 :             abuf => fft_scratch%a4buf
    1298             : 
    1299         104 :             IF (test) THEN
    1300        8260 :                sum_data = ABS(SUM(bbuf))
    1301           4 :                CALL group%sum(sum_data)
    1302           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1303           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1304             :                END IF
    1305             :             END IF
    1306             : 
    1307         104 :             CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
    1308             : 
    1309         104 :             bbuf => fft_scratch%a3buf
    1310             : 
    1311         104 :             IF (test) THEN
    1312        8708 :                sum_data = ABS(SUM(abuf))
    1313           4 :                CALL group%sum(sum_data)
    1314           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1315           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1316           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1317           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1318             :                END IF
    1319             :             END IF
    1320             : 
    1321             :             ! FFT along y
    1322         104 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1323             : 
    1324             :             ! FFT along z
    1325         104 :             CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
    1326             : 
    1327         104 :             IF (test) THEN
    1328        9284 :                sum_data = ABS(SUM(zin))
    1329           4 :                CALL group%sum(sum_data)
    1330           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1331           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1332             :                END IF
    1333             :             END IF
    1334             : 
    1335             :          ELSE
    1336           0 :             CPABORT("Illegal fsign parameter.")
    1337             :          END IF
    1338             : 
    1339         208 :          CALL release_fft_scratch(fft_scratch)
    1340             : 
    1341             :       ELSE
    1342             : 
    1343           0 :          CPABORT("This partition not implemented.")
    1344             : 
    1345             :       END IF
    1346             : 
    1347         208 :       IF (PRESENT(status)) THEN
    1348           0 :          status = stat
    1349             :       END IF
    1350             : 
    1351         208 :       CALL timestop(handle)
    1352             : 
    1353         416 :    END SUBROUTINE fft3d_pb
    1354             : 
    1355             : ! **************************************************************************************************
    1356             : !> \brief ...
    1357             : !> \param sb ...
    1358             : !> \param group ...
    1359             : !> \param my_pos ...
    1360             : !> \param p2p ...
    1361             : !> \param yzp ...
    1362             : !> \param nray ...
    1363             : !> \param bo ...
    1364             : !> \param tb ...
    1365             : !> \param fft_scratch ...
    1366             : !> \par History
    1367             : !>      15. Feb. 2006 : single precision all_to_all
    1368             : !> \author JGH (14-Jan-2001)
    1369             : ! **************************************************************************************************
    1370     1329442 :    SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1371             : 
    1372             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1373             :          INTENT(IN)                                      :: sb
    1374             : 
    1375             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1376             :       INTEGER, INTENT(IN)                                :: my_pos
    1377             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1378             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1379             :          INTENT(IN)                                      :: yzp
    1380             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1381             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1382             :          INTENT(IN)                                      :: bo
    1383             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1384             :          INTENT(INOUT)                                   :: tb
    1385             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1386             : 
    1387             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'x_to_yz'
    1388             : 
    1389             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1390     1329442 :          POINTER                                         :: rr
    1391             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1392     1329442 :          POINTER                                         :: ss, tt
    1393             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1394             :                                                             nm, np, nr, nx
    1395     1329442 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1396             : 
    1397     1329442 :       CALL timeset(routineN, handle)
    1398             : 
    1399     1329442 :       np = SIZE(p2p)
    1400     1329442 :       scount => fft_scratch%scount
    1401     1329442 :       rcount => fft_scratch%rcount
    1402     1329442 :       sdispl => fft_scratch%sdispl
    1403     1329442 :       rdispl => fft_scratch%rdispl
    1404             : 
    1405     1329442 :       IF (alltoall_sgl) THEN
    1406         230 :          ss => fft_scratch%ss
    1407         230 :          tt => fft_scratch%tt
    1408     3441099 :          ss(:, :) = CMPLX(sb(:, :), KIND=sp)
    1409     3353860 :          tt(:, :) = 0._sp
    1410             :       ELSE
    1411     1329212 :          rr => fft_scratch%rr
    1412             :       END IF
    1413             : 
    1414     1329442 :       mpr = p2p(my_pos)
    1415     3988326 :       nm = MAXVAL(nray(0:np - 1))
    1416     1329442 :       nr = nray(my_pos)
    1417             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1418             : !$OMP             PRIVATE(ix,nx), &
    1419     1329442 : !$OMP             SHARED(np,p2p,bo,nr,scount,sdispl)
    1420             :       DO ip = 0, np - 1
    1421             :          ix = p2p(ip)
    1422             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1423             :          scount(ip) = nr*nx
    1424             :          sdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1425             :       END DO
    1426             : !$OMP END PARALLEL DO
    1427     1329442 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1428             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1429             : !$OMP             PRIVATE(nr), &
    1430     1329442 : !$OMP             SHARED(np,nray,nx,rcount,rdispl,nm)
    1431             :       DO ip = 0, np - 1
    1432             :          nr = nray(ip)
    1433             :          rcount(ip) = nr*nx
    1434             :          rdispl(ip) = nm*nx*ip
    1435             :       END DO
    1436             : !$OMP END PARALLEL DO
    1437     1329442 :       IF (alltoall_sgl) THEN
    1438         230 :          CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
    1439             :       ELSE
    1440     1329212 :          CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
    1441             :       END IF
    1442             : 
    1443     1329442 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1444             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1445             : !$OMP             PRIVATE(ixx,ir,iy,iz,ix) &
    1446     1329442 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
    1447             :       DO ip = 0, np - 1
    1448             :          DO ix = 1, nx
    1449             :             ixx = nray(ip)*(ix - 1)
    1450             :             IF (alltoall_sgl) THEN
    1451             :                DO ir = 1, nray(ip)
    1452             :                   iy = yzp(1, ir, ip)
    1453             :                   iz = yzp(2, ir, ip)
    1454             :                   tb(iy, iz, ix) = tt(ir + ixx, ip)
    1455             :                END DO
    1456             :             ELSE
    1457             :                DO ir = 1, nray(ip)
    1458             :                   iy = yzp(1, ir, ip)
    1459             :                   iz = yzp(2, ir, ip)
    1460             :                   tb(iy, iz, ix) = rr(ir + ixx, ip)
    1461             :                END DO
    1462             :             END IF
    1463             :          END DO
    1464             :       END DO
    1465             : !$OMP END PARALLEL DO
    1466             : 
    1467     1329442 :       CALL timestop(handle)
    1468             : 
    1469     1329442 :    END SUBROUTINE x_to_yz
    1470             : 
    1471             : ! **************************************************************************************************
    1472             : !> \brief ...
    1473             : !> \param tb ...
    1474             : !> \param group ...
    1475             : !> \param my_pos ...
    1476             : !> \param p2p ...
    1477             : !> \param yzp ...
    1478             : !> \param nray ...
    1479             : !> \param bo ...
    1480             : !> \param sb ...
    1481             : !> \param fft_scratch ...
    1482             : !> \par History
    1483             : !>      15. Feb. 2006 : single precision all_to_all
    1484             : !> \author JGH (14-Jan-2001)
    1485             : ! **************************************************************************************************
    1486     1295920 :    SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
    1487             : 
    1488             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1489             :          INTENT(IN)                                      :: tb
    1490             : 
    1491             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1492             :       INTEGER, INTENT(IN)                                :: my_pos
    1493             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1494             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1495             :          INTENT(IN)                                      :: yzp
    1496             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1497             :       INTEGER, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1498             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1499             :          INTENT(INOUT)                                   :: sb
    1500             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1501             : 
    1502             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_x'
    1503             : 
    1504             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1505     1295920 :          POINTER                                         :: rr
    1506             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1507     1295920 :          POINTER                                         :: ss, tt
    1508             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1509             :                                                             nm, np, nr, nx
    1510     1295920 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1511             : 
    1512     1295920 :       CALL timeset(routineN, handle)
    1513             : 
    1514     1295920 :       np = SIZE(p2p)
    1515     1295920 :       mpr = p2p(my_pos)
    1516     1295920 :       scount => fft_scratch%scount
    1517     1295920 :       rcount => fft_scratch%rcount
    1518     1295920 :       sdispl => fft_scratch%sdispl
    1519     1295920 :       rdispl => fft_scratch%rdispl
    1520             : 
    1521     1295920 :       IF (alltoall_sgl) THEN
    1522         238 :          ss => fft_scratch%ss
    1523         238 :          tt => fft_scratch%tt
    1524     3703835 :          ss = 0._sp
    1525     3609884 :          tt = 0._sp
    1526             :       ELSE
    1527     1295682 :          rr => fft_scratch%rr
    1528             :       END IF
    1529             : 
    1530     1295920 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1531             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1532             : !$OMP             PRIVATE(ip, ixx, ir, iy, iz, ix) &
    1533     1295920 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
    1534             :       DO ip = 0, np - 1
    1535             :          DO ix = 1, nx
    1536             :             ixx = nray(ip)*(ix - 1)
    1537             :             IF (alltoall_sgl) THEN
    1538             :                DO ir = 1, nray(ip)
    1539             :                   iy = yzp(1, ir, ip)
    1540             :                   iz = yzp(2, ir, ip)
    1541             :                   tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
    1542             :                END DO
    1543             :             ELSE
    1544             :                DO ir = 1, nray(ip)
    1545             :                   iy = yzp(1, ir, ip)
    1546             :                   iz = yzp(2, ir, ip)
    1547             :                   rr(ir + ixx, ip) = tb(iy, iz, ix)
    1548             :                END DO
    1549             :             END IF
    1550             :          END DO
    1551             :       END DO
    1552             : !$OMP END PARALLEL DO
    1553     3887760 :       nm = MAXVAL(nray(0:np - 1))
    1554     1295920 :       nr = nray(my_pos)
    1555             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1556             : !$OMP             PRIVATE(ix,nx), &
    1557     1295920 : !$OMP             SHARED(np,p2p,bo,rcount,rdispl,nr)
    1558             :       DO ip = 0, np - 1
    1559             :          ix = p2p(ip)
    1560             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1561             :          rcount(ip) = nr*nx
    1562             :          rdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1563             :       END DO
    1564             : !$OMP END PARALLEL DO
    1565     1295920 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1566             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1567             : !$OMP             PRIVATE(nr), &
    1568     1295920 : !$OMP             SHARED(np,nray,scount,sdispl,nx,nm)
    1569             :       DO ip = 0, np - 1
    1570             :          nr = nray(ip)
    1571             :          scount(ip) = nr*nx
    1572             :          sdispl(ip) = nm*nx*ip
    1573             :       END DO
    1574             : !$OMP END PARALLEL DO
    1575             : 
    1576     1295920 :       IF (alltoall_sgl) THEN
    1577         238 :          CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
    1578     3703835 :          sb = ss
    1579             :       ELSE
    1580     1295682 :          CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
    1581             :       END IF
    1582             : 
    1583     1295920 :       CALL timestop(handle)
    1584             : 
    1585     1295920 :    END SUBROUTINE yz_to_x
    1586             : 
    1587             : ! **************************************************************************************************
    1588             : !> \brief ...
    1589             : !> \param sb ...
    1590             : !> \param group ...
    1591             : !> \param dims ...
    1592             : !> \param my_pos ...
    1593             : !> \param p2p ...
    1594             : !> \param yzp ...
    1595             : !> \param nray ...
    1596             : !> \param bo ...
    1597             : !> \param tb ...
    1598             : !> \param fft_scratch ...
    1599             : !> \par History
    1600             : !>      15. Feb. 2006 : single precision all_to_all
    1601             : !> \author JGH (18-Jan-2001)
    1602             : ! **************************************************************************************************
    1603           0 :    SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1604             : 
    1605             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1606             :          INTENT(IN)                                      :: sb
    1607             : 
    1608             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1609             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1610             :       INTEGER, INTENT(IN)                                :: my_pos
    1611             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1612             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1613             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1614             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1615             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1616             :       TYPE(fft_scratch_type), INTENT(INOUT)                 :: fft_scratch
    1617             : 
    1618             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_xz'
    1619             : 
    1620           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1621           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1622             :       INTEGER                                            :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
    1623             :                                                             jj, jx, jy, jz, myx, myz, np, npx, &
    1624             :                                                             npz, nx, nz, rs_pos
    1625           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1626           0 :                                                                         xcor, zcor
    1627           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1628             : 
    1629           0 :       CALL timeset(routineN, handle)
    1630             : 
    1631           0 :       np = SIZE(p2p)
    1632             : 
    1633           0 :       rs_pos = p2p(my_pos)
    1634             : 
    1635           0 :       IF (alltoall_sgl) THEN
    1636           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1637           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1638             :       ELSE
    1639           0 :          yzbuf => fft_scratch%yzbuf
    1640           0 :          xzbuf => fft_scratch%xzbuf
    1641             :       END IF
    1642           0 :       npx = dims(1)
    1643           0 :       npz = dims(2)
    1644           0 :       pgrid => fft_scratch%pgrid
    1645           0 :       xcor => fft_scratch%xcor
    1646           0 :       zcor => fft_scratch%zcor
    1647           0 :       pzcoord => fft_scratch%pzcoord
    1648           0 :       scount => fft_scratch%scount
    1649           0 :       rcount => fft_scratch%rcount
    1650           0 :       sdispl => fft_scratch%sdispl
    1651           0 :       rdispl => fft_scratch%rdispl
    1652             : 
    1653           0 :       nx = SIZE(sb, 2)
    1654             : 
    1655             : ! If the send and recv counts are not already cached, then
    1656             : ! calculate and store them
    1657           0 :       IF (fft_scratch%in == 0) THEN
    1658             : 
    1659           0 :          scount = 0
    1660             : 
    1661           0 :          DO ix = 0, npx - 1
    1662           0 :             ip = pgrid(ix, 0)
    1663           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1664             :          END DO
    1665           0 :          DO iz = 0, npz - 1
    1666           0 :             ip = pgrid(0, iz)
    1667           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1668             :          END DO
    1669           0 :          DO jx = 1, nx
    1670           0 :             IF (alltoall_sgl) THEN
    1671           0 :                DO ir = 1, nray(my_pos)
    1672           0 :                   jy = yzp(1, ir, my_pos)
    1673           0 :                   jz = yzp(2, ir, my_pos)
    1674           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1675           0 :                   scount(ip) = scount(ip) + 1
    1676             :                END DO
    1677             :             ELSE
    1678           0 :                DO ir = 1, nray(my_pos)
    1679           0 :                   jy = yzp(1, ir, my_pos)
    1680           0 :                   jz = yzp(2, ir, my_pos)
    1681           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1682           0 :                   scount(ip) = scount(ip) + 1
    1683             :                END DO
    1684             :             END IF
    1685             :          END DO
    1686             : 
    1687           0 :          CALL group%alltoall(scount, rcount, 1)
    1688           0 :          fft_scratch%yzcount = scount
    1689           0 :          fft_scratch%xzcount = rcount
    1690             : 
    1691             :          ! Work out the correct displacements in the buffers
    1692           0 :          sdispl(0) = 0
    1693           0 :          rdispl(0) = 0
    1694           0 :          DO ip = 1, np - 1
    1695           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1696           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1697             :          END DO
    1698             : 
    1699           0 :          fft_scratch%yzdispl = sdispl
    1700           0 :          fft_scratch%xzdispl = rdispl
    1701             : 
    1702           0 :          icrs = 0
    1703           0 :          DO ip = 0, np - 1
    1704           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1705           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1706             :          END DO
    1707           0 :          CALL group%sum(icrs)
    1708           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1709             : 
    1710           0 :          fft_scratch%in = 1
    1711             :       ELSE
    1712           0 :          scount = fft_scratch%yzcount
    1713           0 :          rcount = fft_scratch%xzcount
    1714           0 :          sdispl = fft_scratch%yzdispl
    1715           0 :          rdispl = fft_scratch%xzdispl
    1716             :       END IF
    1717             : 
    1718             : ! Do the actual packing
    1719             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1720             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1721             : !$OMP             SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
    1722             : !$OMP             SHARED(yzbuf,sb,scount,sdispl,my_pos),&
    1723           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl)
    1724             :       DO ip = 0, np - 1
    1725             :          IF (scount(ip) == 0) CYCLE
    1726             :          ipl = p2p(ip)
    1727             :          jj = 0
    1728             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1729             :          DO ir = 1, nray(my_pos)
    1730             :             jz = yzp(2, ir, my_pos)
    1731             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1732             :                jj = jj + 1
    1733             :                jy = yzp(1, ir, my_pos)
    1734             :                IF (alltoall_sgl) THEN
    1735             :                   DO jx = 0, nx - 1
    1736             :                      yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
    1737             :                   END DO
    1738             :                ELSE
    1739             :                   DO jx = 0, nx - 1
    1740             :                      yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
    1741             :                   END DO
    1742             :                END IF
    1743             :             END IF
    1744             :          END DO
    1745             :       END DO
    1746             : !$OMP END PARALLEL DO
    1747             : 
    1748           0 :       IF (alltoall_sgl) THEN
    1749           0 :          CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
    1750             :       ELSE
    1751           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1752           0 :             CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
    1753             :          ELSE
    1754           0 :             CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
    1755             :          END IF
    1756             :       END IF
    1757             : 
    1758           0 :       myx = fft_scratch%sizes%r_pos(1)
    1759           0 :       myz = fft_scratch%sizes%r_pos(2)
    1760           0 :       nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
    1761             : 
    1762             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1763             : !$OMP             PRIVATE(ipr,jj,ir,jx,jy,jz),&
    1764             : !$OMP             SHARED(tb,np,p2p,bo,rs_pos,nray),&
    1765             : !$OMP             SHARED(yzp,alltoall_sgl,zcor,myz),&
    1766           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
    1767             :       DO ip = 0, np - 1
    1768             :          ipr = p2p(ip)
    1769             :          jj = 0
    1770             :          DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
    1771             :             DO ir = 1, nray(ip)
    1772             :                jz = yzp(2, ir, ip)
    1773             :                IF (alltoall_sgl) THEN
    1774             :                   IF (zcor(jz) == myz) THEN
    1775             :                      jj = jj + 1
    1776             :                      jy = yzp(1, ir, ip)
    1777             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1778             :                      tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
    1779             :                   END IF
    1780             :                ELSE
    1781             :                   IF (zcor(jz) == myz) THEN
    1782             :                      jj = jj + 1
    1783             :                      jy = yzp(1, ir, ip)
    1784             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1785             :                      tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
    1786             :                   END IF
    1787             :                END IF
    1788             :             END DO
    1789             :          END DO
    1790             :       END DO
    1791             : !$OMP END PARALLEL DO
    1792             : 
    1793           0 :       CALL timestop(handle)
    1794             : 
    1795           0 :    END SUBROUTINE yz_to_xz
    1796             : 
    1797             : ! **************************************************************************************************
    1798             : !> \brief ...
    1799             : !> \param sb ...
    1800             : !> \param group ...
    1801             : !> \param dims ...
    1802             : !> \param my_pos ...
    1803             : !> \param p2p ...
    1804             : !> \param yzp ...
    1805             : !> \param nray ...
    1806             : !> \param bo ...
    1807             : !> \param tb ...
    1808             : !> \param fft_scratch ...
    1809             : !> \par History
    1810             : !>      15. Feb. 2006 : single precision all_to_all
    1811             : !> \author JGH (19-Jan-2001)
    1812             : ! **************************************************************************************************
    1813           0 :    SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1814             : 
    1815             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1816             :          INTENT(IN)                                      :: sb
    1817             : 
    1818             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1819             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1820             :       INTEGER, INTENT(IN)                                :: my_pos
    1821             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1822             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1823             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1824             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1825             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1826             :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1827             : 
    1828             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xz_to_yz'
    1829             : 
    1830           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1831           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1832             :       INTEGER                                            :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
    1833             :                                                             jj, jx, jy, jz, mp, myx, myz, np, npx, &
    1834             :                                                             npz, nx, nz
    1835           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1836           0 :                                                                         xcor, zcor
    1837           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1838             : 
    1839           0 :       CALL timeset(routineN, handle)
    1840             : 
    1841           0 :       np = SIZE(p2p)
    1842             : 
    1843           0 :       IF (alltoall_sgl) THEN
    1844           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1845           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1846             :       ELSE
    1847           0 :          yzbuf => fft_scratch%yzbuf
    1848           0 :          xzbuf => fft_scratch%xzbuf
    1849             :       END IF
    1850           0 :       npx = dims(1)
    1851           0 :       npz = dims(2)
    1852           0 :       pgrid => fft_scratch%pgrid
    1853           0 :       xcor => fft_scratch%xcor
    1854           0 :       zcor => fft_scratch%zcor
    1855           0 :       pzcoord => fft_scratch%pzcoord
    1856           0 :       scount => fft_scratch%scount
    1857           0 :       rcount => fft_scratch%rcount
    1858           0 :       sdispl => fft_scratch%sdispl
    1859           0 :       rdispl => fft_scratch%rdispl
    1860             : 
    1861             : ! If the send and recv counts are not already cached, then
    1862             : ! calculate and store them
    1863           0 :       IF (fft_scratch%in == 0) THEN
    1864             : 
    1865           0 :          rcount = 0
    1866           0 :          nx = MAXVAL(bo(2, 1, :))
    1867             : 
    1868           0 :          DO ix = 0, npx - 1
    1869           0 :             ip = pgrid(ix, 0)
    1870           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1871             :          END DO
    1872           0 :          DO iz = 0, npz - 1
    1873           0 :             ip = pgrid(0, iz)
    1874           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1875             :          END DO
    1876           0 :          DO jx = 1, nx
    1877           0 :             DO ir = 1, nray(my_pos)
    1878           0 :                jy = yzp(1, ir, my_pos)
    1879           0 :                jz = yzp(2, ir, my_pos)
    1880           0 :                ip = pgrid(xcor(jx), zcor(jz))
    1881           0 :                rcount(ip) = rcount(ip) + 1
    1882             :             END DO
    1883             :          END DO
    1884             : 
    1885           0 :          CALL group%alltoall(rcount, scount, 1)
    1886           0 :          fft_scratch%xzcount = scount
    1887           0 :          fft_scratch%yzcount = rcount
    1888             : 
    1889             :          ! Work out the correct displacements in the buffers
    1890           0 :          sdispl(0) = 0
    1891           0 :          rdispl(0) = 0
    1892           0 :          DO ip = 1, np - 1
    1893           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1894           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1895             :          END DO
    1896             : 
    1897           0 :          fft_scratch%xzdispl = sdispl
    1898           0 :          fft_scratch%yzdispl = rdispl
    1899             : 
    1900           0 :          icrs = 0
    1901           0 :          DO ip = 0, np - 1
    1902           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1903           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1904             :          END DO
    1905           0 :          CALL group%sum(icrs)
    1906           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1907             : 
    1908           0 :          fft_scratch%in = 1
    1909             :       ELSE
    1910           0 :          scount = fft_scratch%xzcount
    1911           0 :          rcount = fft_scratch%yzcount
    1912           0 :          sdispl = fft_scratch%xzdispl
    1913           0 :          rdispl = fft_scratch%yzdispl
    1914             :       END IF
    1915             : 
    1916             : ! Now do the actual packing
    1917           0 :       myx = fft_scratch%sizes%r_pos(1)
    1918           0 :       myz = fft_scratch%sizes%r_pos(2)
    1919           0 :       mp = p2p(my_pos)
    1920           0 :       nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
    1921           0 :       nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
    1922             : 
    1923             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1924             : !$OMP             PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
    1925             : !$OMP             SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
    1926             : !$OMP             SHARED(alltoall_sgl,nx,scount,sdispl),&
    1927           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,sb,nz)
    1928             :       DO ip = 0, np - 1
    1929             :          jj = 0
    1930             :          ipl = p2p(ip)
    1931             :          DO ir = 1, nray(ip)
    1932             :             jz = yzp(2, ir, ip)
    1933             :             IF (zcor(jz) == myz) THEN
    1934             :                jj = jj + 1
    1935             :                jy = yzp(1, ir, ip)
    1936             :                jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
    1937             :                IF (alltoall_sgl) THEN
    1938             :                   DO jx = 0, nx - 1
    1939             :                      ixx = jj + jx*scount(ipl)/nx
    1940             :                      xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
    1941             :                   END DO
    1942             :                ELSE
    1943             :                   DO jx = 0, nx - 1
    1944             :                      ixx = jj + jx*scount(ipl)/nx
    1945             :                      xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
    1946             :                   END DO
    1947             :                END IF
    1948             :             END IF
    1949             :          END DO
    1950             :       END DO
    1951             : !$OMP END PARALLEL DO
    1952             : 
    1953           0 :       IF (alltoall_sgl) THEN
    1954           0 :          CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
    1955             :       ELSE
    1956           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1957           0 :             CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
    1958             :          ELSE
    1959           0 :             CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
    1960             :          END IF
    1961             :       END IF
    1962             : 
    1963             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1964             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1965             : !$OMP             SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
    1966             : !$OMP             SHARED(rcount,rdispl,tb,yzbuf,zcor),&
    1967           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl,np)
    1968             :       DO ip = 0, np - 1
    1969             :          IF (rcount(ip) == 0) CYCLE
    1970             :          ipl = p2p(ip)
    1971             :          jj = 0
    1972             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1973             :          DO ir = 1, nray(my_pos)
    1974             :             jz = yzp(2, ir, my_pos)
    1975             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1976             :                jj = jj + 1
    1977             :                jy = yzp(1, ir, my_pos)
    1978             :                IF (alltoall_sgl) THEN
    1979             :                   DO jx = 0, nx - 1
    1980             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1981             :                   END DO
    1982             :                ELSE
    1983             :                   DO jx = 0, nx - 1
    1984             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1985             :                   END DO
    1986             :                END IF
    1987             :             END IF
    1988             :          END DO
    1989             :       END DO
    1990             : !$OMP END PARALLEL DO
    1991             : 
    1992           0 :       CALL timestop(handle)
    1993             : 
    1994           0 :    END SUBROUTINE xz_to_yz
    1995             : 
    1996             : ! **************************************************************************************************
    1997             : !> \brief ...
    1998             : !> \param cin ...
    1999             : !> \param boin ...
    2000             : !> \param boout ...
    2001             : !> \param sout ...
    2002             : !> \param fft_scratch ...
    2003             : !> \par History
    2004             : !>      none
    2005             : !> \author JGH (20-Jan-2001)
    2006             : ! **************************************************************************************************
    2007           0 :    SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
    2008             : 
    2009             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2010             :          INTENT(IN)                                      :: cin
    2011             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2012             :          INTENT(IN)                                      :: boin, boout
    2013             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2014             :          INTENT(OUT)                                     :: sout
    2015             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2016             : 
    2017             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_1'
    2018             : 
    2019             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2020           0 :          POINTER                                         :: rbuf
    2021             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixy, iz, mip, &
    2022             :                                                             mz, np, nx, ny, nz
    2023           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2024           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2025             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2026             : 
    2027           0 :       CALL timeset(routineN, handle)
    2028             : 
    2029           0 :       mip = fft_scratch%mip
    2030           0 :       dim = fft_scratch%dim
    2031           0 :       pos = fft_scratch%pos
    2032           0 :       scount => fft_scratch%scount
    2033           0 :       rcount => fft_scratch%rcount
    2034           0 :       sdispl => fft_scratch%sdispl
    2035           0 :       rdispl => fft_scratch%rdispl
    2036           0 :       pgrid => fft_scratch%pgcube
    2037           0 :       np = DIM(2)
    2038             : 
    2039           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2040           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2041             : 
    2042             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2043             : !$OMP             PRIVATE(ipl,ny), &
    2044           0 : !$OMP             SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
    2045             :       DO ip = 0, np - 1
    2046             :          ipl = pgrid(ip, 2)
    2047             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2048             :          scount(ip) = nx*nz*ny
    2049             :          sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
    2050             :       END DO
    2051             : !$OMP END PARALLEL DO
    2052           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2053           0 :       mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
    2054             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2055             : !$OMP             PRIVATE(ipl,nz), &
    2056           0 : !$OMP             SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
    2057             :       DO ip = 0, np - 1
    2058             :          ipl = pgrid(ip, 2)
    2059             :          nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2060             :          rcount(ip) = nx*nz*ny
    2061             :          rdispl(ip) = nx*ny*mz*ip
    2062             :       END DO
    2063             : !$OMP END PARALLEL DO
    2064             : 
    2065           0 :       rbuf => fft_scratch%rbuf1
    2066             : 
    2067           0 :       CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2068             : 
    2069             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2070             : !$OMP             PRIVATE(ip,ipl,nz,iz,is,ir) &
    2071           0 : !$OMP             SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
    2072             :       DO ixy = 1, nx*ny
    2073             :          DO ip = 0, np - 1
    2074             :             ipl = pgrid(ip, 2)
    2075             :             nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2076             :             DO iz = 1, nz
    2077             :                is = boin(1, 3, ipl) + iz - 1
    2078             :                ir = iz + nz*(ixy - 1)
    2079             :                sout(is, ixy) = rbuf(ir, ip)
    2080             :             END DO
    2081             :          END DO
    2082             :       END DO
    2083             : !$OMP END PARALLEL DO
    2084             : 
    2085           0 :       CALL timestop(handle)
    2086             : 
    2087           0 :    END SUBROUTINE cube_transpose_1
    2088             : 
    2089             : ! **************************************************************************************************
    2090             : !> \brief ...
    2091             : !> \param cin ...
    2092             : !> \param boin ...
    2093             : !> \param boout ...
    2094             : !> \param sout ...
    2095             : !> \param fft_scratch ...
    2096             : ! **************************************************************************************************
    2097           0 :    SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
    2098             : 
    2099             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2100             :          INTENT(IN)                                      :: cin
    2101             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2102             :          INTENT(IN)                                      :: boin, boout
    2103             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2104             :          INTENT(OUT)                                     :: sout
    2105             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2106             : 
    2107             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_2'
    2108             : 
    2109             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2110           0 :          POINTER                                         :: rbuf
    2111             :       INTEGER                                            :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
    2112             :                                                             np, nx, ny, nz
    2113           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2114           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2115             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2116             :       TYPE(mp_comm_type)                                 :: sub_group
    2117             : 
    2118           0 :       CALL timeset(routineN, handle)
    2119             : 
    2120           0 :       sub_group = fft_scratch%cart_sub_comm(2)
    2121           0 :       mip = fft_scratch%mip
    2122           0 :       dim = fft_scratch%dim
    2123           0 :       pos = fft_scratch%pos
    2124           0 :       scount => fft_scratch%scount
    2125           0 :       rcount => fft_scratch%rcount
    2126           0 :       sdispl => fft_scratch%sdispl
    2127           0 :       rdispl => fft_scratch%rdispl
    2128           0 :       pgrid => fft_scratch%pgcube
    2129           0 :       np = DIM(2)
    2130             : 
    2131           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2132           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2133           0 :       mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
    2134             : 
    2135           0 :       rbuf => fft_scratch%rbuf2
    2136             : 
    2137             : !$OMP PARALLEL DEFAULT(NONE), &
    2138             : !$OMP          PRIVATE(ip,ipl,nz,iz,ir), &
    2139           0 : !$OMP          SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
    2140             : !$OMP DO COLLAPSE(2)
    2141             :       DO ixy = 1, nx*ny
    2142             :          DO ip = 0, np - 1
    2143             :             ipl = pgrid(ip, 2)
    2144             :             nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2145             :             DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
    2146             :                ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
    2147             :                rbuf(ir, ip) = cin(iz, ixy)
    2148             :             END DO
    2149             :          END DO
    2150             :       END DO
    2151             : !$OMP END DO
    2152             : !$OMP DO
    2153             :       DO ip = 0, np - 1
    2154             :          ipl = pgrid(ip, 2)
    2155             :          nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2156             :          scount(ip) = nx*ny*nz
    2157             :          sdispl(ip) = nx*ny*mz*ip
    2158             :       END DO
    2159             : !$OMP END DO
    2160             : !$OMP END PARALLEL
    2161           0 :       nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
    2162             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2163             : !$OMP             PRIVATE(ipl,ny), &
    2164           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
    2165             :       DO ip = 0, np - 1
    2166             :          ipl = pgrid(ip, 2)
    2167             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2168             :          rcount(ip) = nx*ny*nz
    2169             :          rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
    2170             :       END DO
    2171             : !$OMP END PARALLEL DO
    2172             : 
    2173           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2174             : 
    2175           0 :       CALL timestop(handle)
    2176             : 
    2177           0 :    END SUBROUTINE cube_transpose_2
    2178             : 
    2179             : ! **************************************************************************************************
    2180             : !> \brief ...
    2181             : !> \param cin ...
    2182             : !> \param boin ...
    2183             : !> \param boout ...
    2184             : !> \param sout ...
    2185             : !> \param fft_scratch ...
    2186             : ! **************************************************************************************************
    2187           0 :    SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
    2188             : 
    2189             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2190             :          INTENT(IN)                                      :: cin
    2191             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2192             :          INTENT(IN)                                      :: boin, boout
    2193             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2194             :          INTENT(OUT)                                     :: sout
    2195             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2196             : 
    2197             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_3'
    2198             : 
    2199             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2200           0 :          POINTER                                         :: rbuf
    2201             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixz, iy, lb, &
    2202             :                                                             mip, my, my_id, np, num_threads, nx, &
    2203             :                                                             ny, nz, ub
    2204           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2205           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2206             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2207             :       TYPE(mp_comm_type)                                 :: sub_group
    2208             : 
    2209           0 :       CALL timeset(routineN, handle)
    2210             : 
    2211           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2212           0 :       mip = fft_scratch%mip
    2213           0 :       dim = fft_scratch%dim
    2214           0 :       pos = fft_scratch%pos
    2215           0 :       np = DIM(1)
    2216           0 :       scount => fft_scratch%scount
    2217           0 :       rcount => fft_scratch%rcount
    2218           0 :       sdispl => fft_scratch%sdispl
    2219           0 :       rdispl => fft_scratch%rdispl
    2220           0 :       pgrid => fft_scratch%pgcube
    2221             : 
    2222           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2223           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2224             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2225             : !$OMP             PRIVATE(ipl, nx), &
    2226           0 : !$OMP             SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
    2227             :       DO ip = 0, np - 1
    2228             :          ipl = pgrid(ip, 1)
    2229             :          nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
    2230             :          scount(ip) = nx*nz*ny
    2231             :          sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
    2232             :       END DO
    2233             : !$OMP END PARALLEL DO
    2234           0 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2235           0 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2236             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2237             : !$OMP             PRIVATE(ipl, ny), &
    2238           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
    2239             :       DO ip = 0, np - 1
    2240             :          ipl = pgrid(ip, 1)
    2241             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2242             :          rcount(ip) = nx*nz*ny
    2243             :          rdispl(ip) = nx*my*nz*ip
    2244             :       END DO
    2245             : !$OMP END PARALLEL DO
    2246             : 
    2247           0 :       rbuf => fft_scratch%rbuf3
    2248           0 :       num_threads = 1
    2249           0 :       my_id = 0
    2250             : !$OMP PARALLEL DEFAULT(NONE), &
    2251             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub) &
    2252           0 : !$OMP          SHARED(rbuf)
    2253             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2254             : !$    my_id = omp_get_thread_num()
    2255             :       IF (my_id < num_threads) THEN
    2256             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2257             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2258             :          rbuf(:, lb:ub) = 0.0_dp
    2259             :       END IF
    2260             : !$OMP END PARALLEL
    2261             : 
    2262           0 :       CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2263             : 
    2264             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2265             : !$OMP             PRIVATE(ip,ipl,ny,iy,is,ir) &
    2266           0 : !$OMP             SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
    2267             :       DO ixz = 1, nx*nz
    2268             :          DO ip = 0, np - 1
    2269             :             ipl = pgrid(ip, 1)
    2270             :             ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2271             :             DO iy = 1, ny
    2272             :                is = boin(1, 2, ipl) + iy - 1
    2273             :                ir = iy + ny*(ixz - 1)
    2274             :                sout(is, ixz) = rbuf(ir, ip)
    2275             :             END DO
    2276             :          END DO
    2277             :       END DO
    2278             : !$OMP END PARALLEL DO
    2279             : 
    2280           0 :       CALL timestop(handle)
    2281             : 
    2282           0 :    END SUBROUTINE cube_transpose_3
    2283             : 
    2284             : ! **************************************************************************************************
    2285             : !> \brief ...
    2286             : !> \param cin ...
    2287             : !> \param boin ...
    2288             : !> \param boout ...
    2289             : !> \param sout ...
    2290             : !> \param fft_scratch ...
    2291             : ! **************************************************************************************************
    2292           0 :    SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
    2293             : 
    2294             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2295             :          INTENT(IN)                                      :: cin
    2296             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2297             :          INTENT(IN)                                      :: boin, boout
    2298             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2299             :          INTENT(OUT)                                     :: sout
    2300             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2301             : 
    2302             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_4'
    2303             : 
    2304             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2305           0 :          POINTER                                         :: rbuf
    2306             :       INTEGER                                            :: handle, ip, ipl, ir, iy, izx, lb, mip, &
    2307             :                                                             my, my_id, np, num_threads, nx, ny, &
    2308             :                                                             nz, ub
    2309           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2310           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2311             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2312             :       TYPE(mp_comm_type)                                 :: sub_group
    2313             : 
    2314           0 :       CALL timeset(routineN, handle)
    2315             : 
    2316           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2317           0 :       mip = fft_scratch%mip
    2318           0 :       dim = fft_scratch%dim
    2319           0 :       pos = fft_scratch%pos
    2320           0 :       np = DIM(1)
    2321           0 :       scount => fft_scratch%scount
    2322           0 :       rcount => fft_scratch%rcount
    2323           0 :       sdispl => fft_scratch%sdispl
    2324           0 :       rdispl => fft_scratch%rdispl
    2325           0 :       pgrid => fft_scratch%pgcube
    2326             : 
    2327           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2328           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2329           0 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2330             : 
    2331           0 :       rbuf => fft_scratch%rbuf4
    2332           0 :       num_threads = 1
    2333           0 :       my_id = 0
    2334             : !$OMP PARALLEL DEFAULT(NONE), &
    2335             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
    2336           0 : !$OMP          SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
    2337             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2338             : !$    my_id = omp_get_thread_num()
    2339             :       IF (my_id < num_threads) THEN
    2340             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2341             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2342             :          rbuf(:, lb:ub) = 0.0_dp
    2343             :       END IF
    2344             : !$OMP BARRIER
    2345             : 
    2346             : !$OMP DO COLLAPSE(2)
    2347             :       DO izx = 1, nz*nx
    2348             :          DO ip = 0, np - 1
    2349             :             ipl = pgrid(ip, 1)
    2350             :             ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2351             :             DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
    2352             :                ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
    2353             :                rbuf(ir, ip) = cin(iy, izx)
    2354             :             END DO
    2355             :          END DO
    2356             :       END DO
    2357             : !$OMP END DO
    2358             : !$OMP DO
    2359             :       DO ip = 0, np - 1
    2360             :          ipl = pgrid(ip, 1)
    2361             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2362             :          scount(ip) = nx*ny*nz
    2363             :          sdispl(ip) = nx*nz*my*ip
    2364             :       END DO
    2365             : !$OMP END DO
    2366             : !$OMP END PARALLEL
    2367           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2368             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2369             : !$OMP             PRIVATE(ipl,nx), &
    2370           0 : !$OMP             SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
    2371             :       DO ip = 0, np - 1
    2372             :          ipl = pgrid(ip, 1)
    2373             :          nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
    2374             :          rcount(ip) = nx*ny*nz
    2375             :          rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
    2376             :       END DO
    2377             : !$OMP END PARALLEL DO
    2378             : 
    2379           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2380             : 
    2381           0 :       CALL timestop(handle)
    2382             : 
    2383           0 :    END SUBROUTINE cube_transpose_4
    2384             : 
    2385             : ! **************************************************************************************************
    2386             : !> \brief ...
    2387             : !> \param cin ...
    2388             : !> \param group ...
    2389             : !> \param boin ...
    2390             : !> \param boout ...
    2391             : !> \param sout ...
    2392             : !> \param fft_scratch ...
    2393             : ! **************************************************************************************************
    2394         104 :    SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
    2395             : 
    2396             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2397             :          INTENT(IN)                                      :: cin
    2398             : 
    2399             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    2400             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2401             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2402             :       TYPE(fft_scratch_type), INTENT(IN)                :: fft_scratch
    2403             : 
    2404             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_5'
    2405             : 
    2406         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2407             :       INTEGER                                            :: handle, ip, ir, is, ixz, iy, lb, mip, &
    2408             :                                                             my, my_id, np, num_threads, nx, ny, &
    2409             :                                                             nz, ub
    2410         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2411             : 
    2412         104 :       CALL timeset(routineN, handle)
    2413             : 
    2414         104 :       np = fft_scratch%sizes%numtask
    2415         104 :       mip = fft_scratch%mip
    2416         104 :       scount => fft_scratch%scount
    2417         104 :       rcount => fft_scratch%rcount
    2418         104 :       sdispl => fft_scratch%sdispl
    2419         104 :       rdispl => fft_scratch%rdispl
    2420             : 
    2421         104 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2422         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2423             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2424             : !$OMP             PRIVATE(nx), &
    2425         104 : !$OMP             SHARED(np,boout,ny,nz,scount,sdispl)
    2426             :       DO ip = 0, np - 1
    2427             :          nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
    2428             :          scount(ip) = nx*nz*ny
    2429             :          sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
    2430             :       END DO
    2431             : !$OMP END PARALLEL DO
    2432         104 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2433         312 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2434             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2435             : !$OMP             PRIVATE(ny), &
    2436         104 : !$OMP             SHARED(np,boin,nx,nz,rcount,rdispl,my)
    2437             :       DO ip = 0, np - 1
    2438             :          ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2439             :          rcount(ip) = nx*nz*ny
    2440             :          rdispl(ip) = nx*my*nz*ip
    2441             :       END DO
    2442             : !$OMP END PARALLEL DO
    2443             : 
    2444         104 :       rbuf => fft_scratch%rbuf5
    2445         104 :       num_threads = 1
    2446         104 :       my_id = 0
    2447             : !$OMP PARALLEL DEFAULT(NONE), &
    2448             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub), &
    2449         104 : !$OMP          SHARED(rbuf)
    2450             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2451             : !$    my_id = omp_get_thread_num()
    2452             :       IF (my_id < num_threads) THEN
    2453             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2454             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2455             :          rbuf(:, lb:ub) = 0.0_dp
    2456             :       END IF
    2457             : !$OMP END PARALLEL
    2458             : 
    2459         104 :       CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2460             : 
    2461             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2462             : !$OMP             PRIVATE(ip,ny,iy,is,ir) &
    2463         104 : !$OMP             SHARED(nx,nz,np,boin,sout,rbuf)
    2464             :       DO ixz = 1, nx*nz
    2465             :          DO ip = 0, np - 1
    2466             :             ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2467             :             DO iy = 1, ny
    2468             :                is = boin(1, 2, ip) + iy - 1
    2469             :                ir = iy + ny*(ixz - 1)
    2470             :                sout(is, ixz) = rbuf(ir, ip)
    2471             :             END DO
    2472             :          END DO
    2473             :       END DO
    2474             : !$OMP END PARALLEL DO
    2475             : 
    2476         104 :       CALL timestop(handle)
    2477             : 
    2478         104 :    END SUBROUTINE cube_transpose_5
    2479             : 
    2480             : ! **************************************************************************************************
    2481             : !> \brief ...
    2482             : !> \param cin ...
    2483             : !> \param group ...
    2484             : !> \param boin ...
    2485             : !> \param boout ...
    2486             : !> \param sout ...
    2487             : !> \param fft_scratch ...
    2488             : ! **************************************************************************************************
    2489         104 :    SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
    2490             : 
    2491             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2492             :          INTENT(IN)                                      :: cin
    2493             : 
    2494             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    2495             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2496             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2497             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2498             : 
    2499             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_6'
    2500             : 
    2501         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2502             :       INTEGER                                            :: handle, ip, ir, iy, izx, lb, mip, my, &
    2503             :                                                             my_id, np, num_threads, nx, ny, nz, ub
    2504         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2505             : 
    2506         104 :       CALL timeset(routineN, handle)
    2507             : 
    2508         104 :       np = fft_scratch%sizes%numtask
    2509         104 :       mip = fft_scratch%mip
    2510         104 :       scount => fft_scratch%scount
    2511         104 :       rcount => fft_scratch%rcount
    2512         104 :       sdispl => fft_scratch%sdispl
    2513         104 :       rdispl => fft_scratch%rdispl
    2514             : 
    2515         104 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2516         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2517         312 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2518             : 
    2519         104 :       rbuf => fft_scratch%rbuf5
    2520         104 :       num_threads = 1
    2521         104 :       my_id = 0
    2522             : !$OMP PARALLEL DEFAULT(NONE), &
    2523             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
    2524         104 : !$OMP          SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
    2525             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2526             : !$    my_id = omp_get_thread_num()
    2527             :       IF (my_id < num_threads) THEN
    2528             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2529             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2530             :          rbuf(:, lb:ub) = 0.0_dp
    2531             :       END IF
    2532             : !$OMP BARRIER
    2533             : 
    2534             : !$OMP DO COLLAPSE(2)
    2535             :       DO izx = 1, nz*nx
    2536             :          DO ip = 0, np - 1
    2537             :             ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2538             :             DO iy = boout(1, 2, ip), boout(2, 2, ip)
    2539             :                ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
    2540             :                rbuf(ir, ip) = cin(iy, izx)
    2541             :             END DO
    2542             :          END DO
    2543             :       END DO
    2544             : !$OMP END DO
    2545             : !$OMP DO
    2546             :       DO ip = 0, np - 1
    2547             :          ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2548             :          scount(ip) = nx*ny*nz
    2549             :          sdispl(ip) = nx*nz*my*ip
    2550             :       END DO
    2551             : !$OMP END DO
    2552             : !$OMP END PARALLEL
    2553         104 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2554             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2555             : !$OMP             PRIVATE(nx), &
    2556         104 : !$OMP             SHARED(np,boin,rcount,rdispl,nz,ny)
    2557             :       DO ip = 0, np - 1
    2558             :          nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
    2559             :          rcount(ip) = nx*ny*nz
    2560             :          rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
    2561             :       END DO
    2562             : !$OMP END PARALLEL DO
    2563             : 
    2564         104 :       CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2565             : 
    2566         104 :       CALL timestop(handle)
    2567             : 
    2568         104 :    END SUBROUTINE cube_transpose_6
    2569             : 
    2570             : ! **************************************************************************************************
    2571             : !> \brief ...
    2572             : ! **************************************************************************************************
    2573        9041 :    SUBROUTINE init_fft_scratch_pool()
    2574             : 
    2575        9041 :       CALL release_fft_scratch_pool()
    2576             : 
    2577             :       ! Allocate first scratch and mark it as used
    2578        9041 :       ALLOCATE (fft_scratch_first)
    2579      262189 :       ALLOCATE (fft_scratch_first%fft_scratch)
    2580             :       ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
    2581        9041 :       fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
    2582             : 
    2583        9041 :       init_fft_pool = init_fft_pool + 1
    2584             : 
    2585        9041 :    END SUBROUTINE init_fft_scratch_pool
    2586             : 
    2587             : ! **************************************************************************************************
    2588             : !> \brief ...
    2589             : !> \param fft_scratch ...
    2590             : ! **************************************************************************************************
    2591       43301 :    SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
    2592             :       TYPE(fft_scratch_type), INTENT(INOUT)    :: fft_scratch
    2593             : 
    2594             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2595             :       INTEGER                   :: ierr
    2596             :       COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
    2597             : #endif
    2598             : 
    2599             :       ! deallocate structures
    2600       43301 :       IF (ASSOCIATED(fft_scratch%ziptr)) THEN
    2601       10289 :          CALL fft_dealloc(fft_scratch%ziptr)
    2602             :       END IF
    2603       43301 :       IF (ASSOCIATED(fft_scratch%zoptr)) THEN
    2604       10289 :          CALL fft_dealloc(fft_scratch%zoptr)
    2605             :       END IF
    2606       43301 :       IF (ASSOCIATED(fft_scratch%p1buf)) THEN
    2607           0 :          CALL fft_dealloc(fft_scratch%p1buf)
    2608             :       END IF
    2609       43301 :       IF (ASSOCIATED(fft_scratch%p2buf)) THEN
    2610             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2611             :          dummy_ptr_z => fft_scratch%p2buf(1, 1)
    2612             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2613             : #else
    2614           0 :          CALL fft_dealloc(fft_scratch%p2buf)
    2615             : #endif
    2616             :       END IF
    2617       43301 :       IF (ASSOCIATED(fft_scratch%p3buf)) THEN
    2618             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2619             :          dummy_ptr_z => fft_scratch%p3buf(1, 1)
    2620             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2621             : #else
    2622           0 :          CALL fft_dealloc(fft_scratch%p3buf)
    2623             : #endif
    2624             :       END IF
    2625       43301 :       IF (ASSOCIATED(fft_scratch%p4buf)) THEN
    2626             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2627             :          dummy_ptr_z => fft_scratch%p4buf(1, 1)
    2628             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2629             : #else
    2630           0 :          CALL fft_dealloc(fft_scratch%p4buf)
    2631             : #endif
    2632             :       END IF
    2633       43301 :       IF (ASSOCIATED(fft_scratch%p5buf)) THEN
    2634             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2635             :          dummy_ptr_z => fft_scratch%p5buf(1, 1)
    2636             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2637             : #else
    2638           0 :          CALL fft_dealloc(fft_scratch%p5buf)
    2639             : #endif
    2640             :       END IF
    2641       43301 :       IF (ASSOCIATED(fft_scratch%p6buf)) THEN
    2642           0 :          CALL fft_dealloc(fft_scratch%p6buf)
    2643             :       END IF
    2644       43301 :       IF (ASSOCIATED(fft_scratch%p7buf)) THEN
    2645             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2646             :          dummy_ptr_z => fft_scratch%p7buf(1, 1)
    2647             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2648             : #else
    2649           0 :          CALL fft_dealloc(fft_scratch%p7buf)
    2650             : #endif
    2651             :       END IF
    2652       43301 :       IF (ASSOCIATED(fft_scratch%r1buf)) THEN
    2653             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2654             :          dummy_ptr_z => fft_scratch%r1buf(1, 1)
    2655             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2656             : #else
    2657       23963 :          CALL fft_dealloc(fft_scratch%r1buf)
    2658             : #endif
    2659             :       END IF
    2660       43301 :       IF (ASSOCIATED(fft_scratch%r2buf)) THEN
    2661       23963 :          CALL fft_dealloc(fft_scratch%r2buf)
    2662             :       END IF
    2663       43301 :       IF (ASSOCIATED(fft_scratch%tbuf)) THEN
    2664             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2665             :          dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
    2666             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2667             : #else
    2668       23963 :          CALL fft_dealloc(fft_scratch%tbuf)
    2669             : #endif
    2670             :       END IF
    2671       43301 :       IF (ASSOCIATED(fft_scratch%a1buf)) THEN
    2672           8 :          CALL fft_dealloc(fft_scratch%a1buf)
    2673             :       END IF
    2674       43301 :       IF (ASSOCIATED(fft_scratch%a2buf)) THEN
    2675           8 :          CALL fft_dealloc(fft_scratch%a2buf)
    2676             :       END IF
    2677       43301 :       IF (ASSOCIATED(fft_scratch%a3buf)) THEN
    2678           8 :          CALL fft_dealloc(fft_scratch%a3buf)
    2679             :       END IF
    2680       43301 :       IF (ASSOCIATED(fft_scratch%a4buf)) THEN
    2681           8 :          CALL fft_dealloc(fft_scratch%a4buf)
    2682             :       END IF
    2683       43301 :       IF (ASSOCIATED(fft_scratch%a5buf)) THEN
    2684           8 :          CALL fft_dealloc(fft_scratch%a5buf)
    2685             :       END IF
    2686       43301 :       IF (ASSOCIATED(fft_scratch%a6buf)) THEN
    2687           8 :          CALL fft_dealloc(fft_scratch%a6buf)
    2688             :       END IF
    2689       43301 :       IF (ASSOCIATED(fft_scratch%scount)) THEN
    2690           0 :          DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
    2691       23971 :                      fft_scratch%sdispl, fft_scratch%rdispl)
    2692             :       END IF
    2693       43301 :       IF (ASSOCIATED(fft_scratch%rr)) THEN
    2694       23955 :          DEALLOCATE (fft_scratch%rr)
    2695             :       END IF
    2696       43301 :       IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
    2697           0 :          DEALLOCATE (fft_scratch%xzbuf)
    2698             :       END IF
    2699       43301 :       IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
    2700           0 :          DEALLOCATE (fft_scratch%yzbuf)
    2701             :       END IF
    2702       43301 :       IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
    2703           0 :          DEALLOCATE (fft_scratch%xzbuf_sgl)
    2704             :       END IF
    2705       43301 :       IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
    2706           0 :          DEALLOCATE (fft_scratch%yzbuf_sgl)
    2707             :       END IF
    2708       43301 :       IF (ASSOCIATED(fft_scratch%ss)) THEN
    2709           8 :          DEALLOCATE (fft_scratch%ss)
    2710             :       END IF
    2711       43301 :       IF (ASSOCIATED(fft_scratch%tt)) THEN
    2712           8 :          DEALLOCATE (fft_scratch%tt)
    2713             :       END IF
    2714       43301 :       IF (ASSOCIATED(fft_scratch%pgrid)) THEN
    2715           0 :          DEALLOCATE (fft_scratch%pgrid)
    2716             :       END IF
    2717       43301 :       IF (ASSOCIATED(fft_scratch%pgcube)) THEN
    2718       23971 :          DEALLOCATE (fft_scratch%pgcube)
    2719             :       END IF
    2720       43301 :       IF (ASSOCIATED(fft_scratch%xcor)) THEN
    2721           0 :          DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
    2722             :       END IF
    2723       43301 :       IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
    2724           0 :          DEALLOCATE (fft_scratch%pzcoord)
    2725             :       END IF
    2726       43301 :       IF (ASSOCIATED(fft_scratch%xzcount)) THEN
    2727           0 :          DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
    2728           0 :          DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
    2729           0 :          fft_scratch%in = 0
    2730           0 :          fft_scratch%rsratio = 1._dp
    2731             :       END IF
    2732       43301 :       IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
    2733           0 :          DEALLOCATE (fft_scratch%rbuf1)
    2734             :       END IF
    2735       43301 :       IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
    2736           0 :          DEALLOCATE (fft_scratch%rbuf2)
    2737             :       END IF
    2738       43301 :       IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
    2739           0 :          DEALLOCATE (fft_scratch%rbuf3)
    2740             :       END IF
    2741       43301 :       IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
    2742           0 :          DEALLOCATE (fft_scratch%rbuf4)
    2743             :       END IF
    2744       43301 :       IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
    2745           8 :          DEALLOCATE (fft_scratch%rbuf5)
    2746             :       END IF
    2747       43301 :       IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
    2748           8 :          DEALLOCATE (fft_scratch%rbuf6)
    2749             :       END IF
    2750             : 
    2751       43301 :       IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
    2752           0 :          CALL fft_scratch%cart_sub_comm(1)%free()
    2753             :       END IF
    2754       43301 :       IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
    2755           0 :          CALL fft_scratch%cart_sub_comm(2)%free()
    2756             :       END IF
    2757             : 
    2758       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(1))
    2759       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(2))
    2760       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(3))
    2761       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(4))
    2762       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(5))
    2763       43301 :       CALL fft_destroy_plan(fft_scratch%fft_plan(6))
    2764             : 
    2765       43301 :    END SUBROUTINE deallocate_fft_scratch_type
    2766             : 
    2767             : ! **************************************************************************************************
    2768             : !> \brief ...
    2769             : ! **************************************************************************************************
    2770       26913 :    SUBROUTINE release_fft_scratch_pool()
    2771             : 
    2772             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch, fft_scratch_current
    2773             : 
    2774       26913 :       IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
    2775             : 
    2776       26913 :       fft_scratch => fft_scratch_first
    2777       42473 :       DO
    2778       69386 :          IF (ASSOCIATED(fft_scratch)) THEN
    2779       42473 :             fft_scratch_current => fft_scratch
    2780       42473 :             fft_scratch => fft_scratch_current%fft_scratch_next
    2781       42473 :             NULLIFY (fft_scratch_current%fft_scratch_next)
    2782             : 
    2783       42473 :             CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
    2784             : 
    2785      169892 :             DEALLOCATE (fft_scratch_current%fft_scratch)
    2786       42473 :             DEALLOCATE (fft_scratch_current)
    2787             :          ELSE
    2788             :             EXIT
    2789             :          END IF
    2790             :       END DO
    2791             : 
    2792       26913 :       init_fft_pool = 0
    2793             : 
    2794       26913 :    END SUBROUTINE release_fft_scratch_pool
    2795             : 
    2796             : ! **************************************************************************************************
    2797             : !> \brief ...
    2798             : ! **************************************************************************************************
    2799     3246758 :    SUBROUTINE resize_fft_scratch_pool()
    2800             : 
    2801             :       INTEGER                                            :: last_tick, nscratch
    2802             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current, fft_scratch_old
    2803             : 
    2804     3246758 :       nscratch = 0
    2805             : 
    2806     3246758 :       last_tick = HUGE(last_tick)
    2807     3246758 :       NULLIFY (fft_scratch_old)
    2808             : 
    2809             :       ! start at the global pool, count, and find a deletion candidate
    2810     3246758 :       fft_scratch_current => fft_scratch_first
    2811    23673822 :       DO
    2812    26920580 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2813    23673822 :             nscratch = nscratch + 1
    2814             :             ! is this a candidate for deletion (i.e. least recently used, and not in use)
    2815    23673822 :             IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
    2816    20427064 :                IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
    2817     4010491 :                   last_tick = fft_scratch_current%fft_scratch%last_tick
    2818     4010491 :                   fft_scratch_old => fft_scratch_current
    2819             :                END IF
    2820             :             END IF
    2821    23673822 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    2822             :          ELSE
    2823             :             EXIT
    2824             :          END IF
    2825             :       END DO
    2826             : 
    2827             :       ! we should delete a scratch
    2828     3246758 :       IF (nscratch > fft_pool_scratch_limit) THEN
    2829             :          ! note that we never deallocate the first (special) element of the list
    2830         828 :          IF (ASSOCIATED(fft_scratch_old)) THEN
    2831             :             fft_scratch_current => fft_scratch_first
    2832             :             DO
    2833       14076 :                IF (ASSOCIATED(fft_scratch_current)) THEN
    2834             :                   ! should we delete the next in the list?
    2835       13248 :                   IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
    2836             :                      ! fix the linked list
    2837         828 :                      fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
    2838             : 
    2839             :                      ! deallocate the element
    2840         828 :                      CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
    2841        3312 :                      DEALLOCATE (fft_scratch_old%fft_scratch)
    2842         828 :                      DEALLOCATE (fft_scratch_old)
    2843             : 
    2844             :                   ELSE
    2845             :                      fft_scratch_current => fft_scratch_current%fft_scratch_next
    2846             :                   END IF
    2847             :                ELSE
    2848             :                   EXIT
    2849             :                END IF
    2850             :             END DO
    2851             : 
    2852             :          ELSE
    2853           0 :             CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
    2854             :          END IF
    2855             :       END IF
    2856             : 
    2857     3246758 :    END SUBROUTINE resize_fft_scratch_pool
    2858             : 
    2859             : ! **************************************************************************************************
    2860             : !> \brief ...
    2861             : !> \param fft_scratch ...
    2862             : !> \param tf_type ...
    2863             : !> \param n ...
    2864             : !> \param fft_sizes ...
    2865             : ! **************************************************************************************************
    2866     3246758 :    SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
    2867             :       TYPE(fft_scratch_type), POINTER          :: fft_scratch
    2868             :       INTEGER, INTENT(IN)                      :: tf_type
    2869             :       INTEGER, DIMENSION(:), INTENT(IN)        :: n
    2870             :       TYPE(fft_scratch_sizes), INTENT(IN), &
    2871             :          OPTIONAL                               :: fft_sizes
    2872             : 
    2873             :       CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
    2874             : 
    2875             :       INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
    2876             :                  mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
    2877             :                  nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
    2878             :       INTEGER, DIMENSION(3)                    :: pcoord
    2879             :       LOGICAL                                  :: equal
    2880             :       LOGICAL, DIMENSION(2)                    :: dims
    2881             :       TYPE(fft_scratch_pool_type), POINTER     :: fft_scratch_current, &
    2882             :                                                   fft_scratch_last, &
    2883             :                                                   fft_scratch_new
    2884             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2885             :       INTEGER                   :: ierr
    2886             :       INTEGER(KIND=C_SIZE_T)    :: length
    2887             :       TYPE(C_PTR)               :: cptr_r1buf, cptr_tbuf, &
    2888             :                                    cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
    2889             : #endif
    2890     3246758 :       CALL timeset(routineN, handle)
    2891             : 
    2892             :       ! this is the place to check that the scratch_pool does not grow without limits
    2893             :       ! before we add a new scratch check the size of the pool and release some of the list if needed
    2894     3246758 :       CALL resize_fft_scratch_pool()
    2895             : 
    2896             :       ! get the required scratch
    2897     3246758 :       tick_fft_pool = tick_fft_pool + 1
    2898     3246758 :       fft_scratch_current => fft_scratch_first
    2899             :       DO
    2900    15675297 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2901    15641037 :             IF (fft_scratch_current%fft_scratch%in_use) THEN
    2902     3246758 :                fft_scratch_last => fft_scratch_current
    2903     3246758 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2904     3246758 :                CYCLE
    2905             :             END IF
    2906    12394279 :             IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
    2907     4524073 :                fft_scratch_last => fft_scratch_current
    2908     4524073 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2909     4524073 :                CYCLE
    2910             :             END IF
    2911    18343369 :             IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
    2912     4379581 :                fft_scratch_last => fft_scratch_current
    2913     4379581 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2914     4379581 :                CYCLE
    2915             :             END IF
    2916     3490625 :             IF (PRESENT(fft_sizes)) THEN
    2917     2879726 :                IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN
    2918      277807 :                   fft_scratch_last => fft_scratch_current
    2919      277807 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2920      277807 :                   CYCLE
    2921             :                END IF
    2922     2601919 :                CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
    2923     2601919 :                IF (.NOT. equal) THEN
    2924         320 :                   fft_scratch_last => fft_scratch_current
    2925         320 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2926         320 :                   CYCLE
    2927             :                END IF
    2928             :             END IF
    2929             :             ! Success
    2930     3212498 :             fft_scratch => fft_scratch_current%fft_scratch
    2931     3212498 :             fft_scratch_current%fft_scratch%in_use = .TRUE.
    2932     3212498 :             EXIT
    2933             :          ELSE
    2934             :             ! We cannot find the scratch type in this pool
    2935             :             ! Generate a new scratch set
    2936       34260 :             ALLOCATE (fft_scratch_new)
    2937      890760 :             ALLOCATE (fft_scratch_new%fft_scratch)
    2938             : 
    2939       34260 :             IF (tf_type .NE. 400) THEN
    2940       23971 :                fft_scratch_new%fft_scratch%sizes = fft_sizes
    2941       23971 :                np = fft_sizes%numtask
    2942             :                ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
    2943             :                          fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
    2944      215739 :                          fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
    2945             :             END IF
    2946             : 
    2947           0 :             SELECT CASE (tf_type)
    2948             :             CASE DEFAULT
    2949           0 :                CPABORT("")
    2950             :             CASE (100) ! fft3d_pb: full cube distribution
    2951           0 :                mx1 = fft_sizes%mx1
    2952           0 :                my1 = fft_sizes%my1
    2953           0 :                mx2 = fft_sizes%mx2
    2954           0 :                mz2 = fft_sizes%mz2
    2955           0 :                my3 = fft_sizes%my3
    2956           0 :                mz3 = fft_sizes%mz3
    2957           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    2958           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    2959           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
    2960           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
    2961           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    2962           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    2963           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    2964             : 
    2965           0 :                dim = fft_sizes%rs_group%num_pe_cart
    2966           0 :                pos = fft_sizes%rs_group%mepos_cart
    2967           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    2968           0 :                fft_scratch_new%fft_scratch%dim = dim
    2969           0 :                fft_scratch_new%fft_scratch%pos = pos
    2970           0 :                mcz1 = fft_sizes%mcz1
    2971           0 :                mcx2 = fft_sizes%mcx2
    2972           0 :                mcz2 = fft_sizes%mcz2
    2973           0 :                mcy3 = fft_sizes%mcy3
    2974           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    2975           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    2976           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
    2977           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
    2978             : 
    2979           0 :                dims = (/.TRUE., .FALSE./)
    2980           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
    2981           0 :                dims = (/.FALSE., .TRUE./)
    2982           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    2983             : 
    2984             :                !initialise pgcube
    2985           0 :                DO i = 0, DIM(1) - 1
    2986           0 :                   coord = (/i, pos(2)/)
    2987           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
    2988             :                END DO
    2989           0 :                DO i = 0, DIM(2) - 1
    2990           0 :                   coord = (/pos(1), i/)
    2991           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    2992             :                END DO
    2993             : 
    2994             :                !set up fft plans
    2995             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    2996           0 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
    2997             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    2998           0 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    2999             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3000           0 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3001             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3002           0 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3003             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3004           0 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3005             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3006           0 :                                         fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3007             : 
    3008             :             CASE (101) ! fft3d_pb: full cube distribution (dim 1)
    3009           8 :                mx1 = fft_sizes%mx1
    3010           8 :                my1 = fft_sizes%my1
    3011           8 :                mz1 = fft_sizes%mz1
    3012           8 :                my3 = fft_sizes%my3
    3013           8 :                mz3 = fft_sizes%mz3
    3014          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    3015          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    3016           8 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3017          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
    3018          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
    3019          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    3020          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    3021             : 
    3022          24 :                dim = fft_sizes%rs_group%num_pe_cart
    3023          24 :                pos = fft_sizes%rs_group%mepos_cart
    3024           8 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3025          24 :                fft_scratch_new%fft_scratch%dim = dim
    3026          24 :                fft_scratch_new%fft_scratch%pos = pos
    3027           8 :                mcy3 = fft_sizes%mcy3
    3028          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
    3029          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
    3030             : 
    3031             :                !set up fft plans
    3032             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3033           8 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3034             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
    3035           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3036             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3037           8 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3038             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3039           8 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3040             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
    3041           8 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3042             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3043           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3044             : 
    3045             :             CASE (200) ! fft3d_ps: plane distribution
    3046       23963 :                nx = fft_sizes%nx
    3047       23963 :                ny = fft_sizes%ny
    3048       23963 :                nz = fft_sizes%nz
    3049       23963 :                mx2 = fft_sizes%mx2
    3050       23963 :                lmax = fft_sizes%lmax
    3051       23963 :                mmax = fft_sizes%mmax
    3052       23963 :                lg = fft_sizes%lg
    3053       23963 :                mg = fft_sizes%mg
    3054       23963 :                np = fft_sizes%numtask
    3055       23963 :                nmray = fft_sizes%nmray
    3056       23963 :                nyzray = fft_sizes%nyzray
    3057             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3058             :                length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
    3059             :                ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
    3060             :                CPASSERT(ierr == 0)
    3061             :                CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/MAX(mmax, 1), MAX(lmax, 1)/))
    3062             :                length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
    3063             :                ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
    3064             :                CPASSERT(ierr == 0)
    3065             :                CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)/))
    3066             : #else
    3067       71889 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
    3068       95852 :                CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
    3069             : #endif
    3070       23963 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3071       71889 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
    3072       23963 :                nm = nmray*mx2
    3073       23963 :                IF (alltoall_sgl) THEN
    3074          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
    3075          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
    3076             :                ELSE
    3077       95820 :                   ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
    3078             :                END IF
    3079             : 
    3080             :                !set up fft plans
    3081             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
    3082       23963 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3083             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
    3084       23963 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3085             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3086       23963 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
    3087             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3088       23963 :                                         fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3089             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
    3090       23963 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3091             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
    3092       23963 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3093             : 
    3094             :             CASE (300) ! fft3d_ps: block distribution
    3095           0 :                mx1 = fft_sizes%mx1
    3096           0 :                mx2 = fft_sizes%mx2
    3097           0 :                my1 = fft_sizes%my1
    3098           0 :                mz2 = fft_sizes%mz2
    3099           0 :                mcx2 = fft_sizes%mcx2
    3100           0 :                lg = fft_sizes%lg
    3101           0 :                mg = fft_sizes%mg
    3102           0 :                nmax = fft_sizes%nmax
    3103           0 :                nmray = fft_sizes%nmray
    3104           0 :                nyzray = fft_sizes%nyzray
    3105           0 :                m1 = fft_sizes%r_dim(1)
    3106           0 :                m2 = fft_sizes%r_dim(2)
    3107           0 :                nbx = fft_sizes%nbx
    3108           0 :                nbz = fft_sizes%nbz
    3109           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
    3110           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
    3111             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3112             :                length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
    3113             :                ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
    3114             :                CPASSERT(ierr == 0)
    3115             :                CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/MAX(n(3), 1), MAX(mx1*my1, 1)/))
    3116             :                length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
    3117             :                ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
    3118             :                CPASSERT(ierr == 0)
    3119             :                CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/MAX(mx2*mz2, 1), MAX(n(2), 1)/))
    3120             :                length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
    3121             :                ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
    3122             :                CPASSERT(ierr == 0)
    3123             :                CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/MAX(n(2), 1), MAX(mx2*mz2, 1)/))
    3124             :                length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
    3125             :                ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
    3126             :                CPASSERT(ierr == 0)
    3127             :                CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/MAX(nyzray, 1), MAX(n(1), 1)/))
    3128             :                length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
    3129             :                ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
    3130             :                CPASSERT(ierr == 0)
    3131             :                CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/MAX(mg, 1), MAX(lg, 1)/))
    3132             : #else
    3133           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
    3134           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
    3135           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
    3136           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
    3137           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
    3138             : #endif
    3139           0 :                IF (alltoall_sgl) THEN
    3140           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
    3141           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
    3142             :                ELSE
    3143           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
    3144           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
    3145             :                END IF
    3146           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
    3147           0 :                ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
    3148           0 :                ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
    3149           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
    3150             :                ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
    3151           0 :                          fft_scratch_new%fft_scratch%yzcount(0:np - 1))
    3152             :                ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
    3153           0 :                          fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
    3154           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3155             : 
    3156           0 :                dim = fft_sizes%rs_group%num_pe_cart
    3157           0 :                pos = fft_sizes%rs_group%mepos_cart
    3158           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3159           0 :                fft_scratch_new%fft_scratch%dim = dim
    3160           0 :                fft_scratch_new%fft_scratch%pos = pos
    3161           0 :                mcz1 = fft_sizes%mcz1
    3162           0 :                mcz2 = fft_sizes%mcz2
    3163           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    3164           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    3165             : 
    3166           0 :                dims = (/.FALSE., .TRUE./)
    3167           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    3168             : 
    3169             :                !initialise pgcube
    3170           0 :                DO i = 0, DIM(2) - 1
    3171           0 :                   coord = (/pos(1), i/)
    3172           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    3173             :                END DO
    3174             : 
    3175             :                !initialise pgrid
    3176           0 :                DO ix = 0, m1 - 1
    3177           0 :                   DO iz = 0, m2 - 1
    3178           0 :                      coord = (/ix, iz/)
    3179           0 :                      CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
    3180             :                   END DO
    3181             :                END DO
    3182             : 
    3183             :                !initialise pzcoord
    3184           0 :                DO i = 0, np - 1
    3185           0 :                   CALL fft_sizes%rs_group%coords(i, pcoord)
    3186           0 :                   fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
    3187             :                END DO
    3188             : 
    3189             :                !set up fft plans
    3190             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3191           0 :                                         fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
    3192             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3193           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
    3194             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3195           0 :                                         fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
    3196             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3197           0 :                                         fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
    3198             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3199           0 :                                         fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
    3200             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3201           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
    3202             : 
    3203             :             CASE (400) ! serial FFT
    3204       10289 :                np = 0
    3205       10289 :                CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
    3206       10289 :                CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
    3207             : 
    3208             :                !in place plans
    3209             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
    3210       10289 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3211             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
    3212       10289 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3213             :                ! out of place plans
    3214             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
    3215       10289 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3216             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
    3217       44549 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3218             : 
    3219             :             END SELECT
    3220             : 
    3221       34260 :             NULLIFY (fft_scratch_new%fft_scratch_next)
    3222             :             fft_scratch_new%fft_scratch%fft_scratch_id = &
    3223       34260 :                fft_scratch_last%fft_scratch%fft_scratch_id + 1
    3224       34260 :             fft_scratch_new%fft_scratch%in_use = .TRUE.
    3225      137040 :             fft_scratch_new%fft_scratch%nfft = n
    3226       34260 :             fft_scratch_last%fft_scratch_next => fft_scratch_new
    3227       34260 :             fft_scratch_new%fft_scratch%tf_type = tf_type
    3228       34260 :             fft_scratch => fft_scratch_new%fft_scratch
    3229       34260 :             EXIT
    3230             : 
    3231             :          END IF
    3232             :       END DO
    3233             : 
    3234     3246758 :       fft_scratch%last_tick = tick_fft_pool
    3235             : 
    3236     3246758 :       CALL timestop(handle)
    3237             : 
    3238     3246758 :    END SUBROUTINE get_fft_scratch
    3239             : 
    3240             : ! **************************************************************************************************
    3241             : !> \brief ...
    3242             : !> \param fft_scratch ...
    3243             : ! **************************************************************************************************
    3244     3246758 :    SUBROUTINE release_fft_scratch(fft_scratch)
    3245             : 
    3246             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
    3247             : 
    3248             :       INTEGER                                            :: scratch_id
    3249             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current
    3250             : 
    3251     3246758 :       scratch_id = fft_scratch%fft_scratch_id
    3252             : 
    3253     3246758 :       fft_scratch_current => fft_scratch_first
    3254    12428539 :       DO
    3255    15675297 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    3256    15675297 :             IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
    3257     3246758 :                fft_scratch%in_use = .FALSE.
    3258     3246758 :                NULLIFY (fft_scratch)
    3259     3246758 :                EXIT
    3260             :             END IF
    3261    12428539 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    3262             :          ELSE
    3263             :             ! We cannot find the scratch type in this pool
    3264           0 :             CPABORT("")
    3265           0 :             EXIT
    3266             :          END IF
    3267             :       END DO
    3268             : 
    3269     3246758 :    END SUBROUTINE release_fft_scratch
    3270             : 
    3271             : ! **************************************************************************************************
    3272             : !> \brief ...
    3273             : !> \param rs ...
    3274             : !> \param scount ...
    3275             : !> \param sdispl ...
    3276             : !> \param rq ...
    3277             : !> \param rcount ...
    3278             : !> \param rdispl ...
    3279             : !> \param group ...
    3280             : ! **************************************************************************************************
    3281           0 :    SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
    3282             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rs
    3283             :       INTEGER, DIMENSION(:), POINTER                     :: scount, sdispl
    3284             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rq
    3285             :       INTEGER, DIMENSION(:), POINTER                     :: rcount, rdispl
    3286             : 
    3287             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    3288             : 
    3289           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: msgin, msgout
    3290             :       INTEGER                                            :: ip, n, nr, ns, pos
    3291           0 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: rreq, sreq
    3292             : 
    3293           0 :       CALL group%sync()
    3294           0 :       n = group%num_pe
    3295           0 :       pos = group%mepos
    3296           0 :       ALLOCATE (sreq(0:n - 1))
    3297           0 :       ALLOCATE (rreq(0:n - 1))
    3298           0 :       nr = 0
    3299           0 :       DO ip = 0, n - 1
    3300           0 :          IF (rcount(ip) == 0) CYCLE
    3301           0 :          IF (ip == pos) CYCLE
    3302           0 :          msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
    3303           0 :          CALL group%irecv(msgout, ip, rreq(nr))
    3304           0 :          nr = nr + 1
    3305             :       END DO
    3306           0 :       ns = 0
    3307           0 :       DO ip = 0, n - 1
    3308           0 :          IF (scount(ip) == 0) CYCLE
    3309           0 :          IF (ip == pos) CYCLE
    3310           0 :          msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
    3311           0 :          CALL group%isend(msgin, ip, sreq(ns))
    3312           0 :          ns = ns + 1
    3313             :       END DO
    3314           0 :       IF (rcount(pos) /= 0) THEN
    3315           0 :          IF (rcount(pos) /= scount(pos)) CPABORT("")
    3316           0 :          rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
    3317             :       END IF
    3318           0 :       CALL mp_waitall(sreq(0:ns - 1))
    3319           0 :       CALL mp_waitall(rreq(0:nr - 1))
    3320           0 :       DEALLOCATE (sreq)
    3321           0 :       DEALLOCATE (rreq)
    3322           0 :       CALL group%sync()
    3323             : 
    3324           0 :    END SUBROUTINE sparse_alltoall
    3325             : 
    3326             : ! **************************************************************************************************
    3327             : !> \brief  test data structures for equality. It is assumed that if they are
    3328             : !>         different for one mpi task they are different for all (??)
    3329             : !> \param fft_size_1 ...
    3330             : !> \param fft_size_2 ...
    3331             : !> \param equal ...
    3332             : ! **************************************************************************************************
    3333     2601919 :    SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
    3334             :       TYPE(fft_scratch_sizes)                            :: fft_size_1, fft_size_2
    3335             :       LOGICAL                                            :: equal
    3336             : 
    3337             :       equal = .TRUE.
    3338             : 
    3339     2601919 :       equal = equal .AND. fft_size_1%nx == fft_size_2%nx
    3340     2601919 :       equal = equal .AND. fft_size_1%ny == fft_size_2%ny
    3341     2601919 :       equal = equal .AND. fft_size_1%nz == fft_size_2%nz
    3342             : 
    3343     2601919 :       equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
    3344     2601919 :       equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
    3345     2601919 :       equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
    3346             : 
    3347     2601919 :       equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
    3348     2601919 :       equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
    3349     2601919 :       equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
    3350             : 
    3351     2601919 :       equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
    3352     2601919 :       equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
    3353     2601919 :       equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
    3354             : 
    3355     2601919 :       equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
    3356     2601919 :       equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
    3357     2601919 :       equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
    3358     2601919 :       equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
    3359             : 
    3360     2601919 :       equal = equal .AND. fft_size_1%lg == fft_size_2%lg
    3361     2601919 :       equal = equal .AND. fft_size_1%mg == fft_size_2%mg
    3362             : 
    3363     2601919 :       equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
    3364     2601919 :       equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
    3365             : 
    3366     2601919 :       equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
    3367     2601919 :       equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
    3368             : 
    3369     2601919 :       equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
    3370             : 
    3371     7805757 :       equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
    3372     7805757 :       equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
    3373     7805757 :       equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
    3374             : 
    3375     2601919 :       equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
    3376             : 
    3377     2601919 :    END SUBROUTINE is_equal
    3378             : 
    3379           0 : END MODULE fft_tools

Generated by: LCOV version 1.15