LCOV - code coverage report
Current view: top level - src/pw/fft - fftw3_lib.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:d1f8d1b) Lines: 266 358 74.3 %
Date: 2024-11-29 06:42:44 Functions: 15 25 60.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : MODULE fftw3_lib
       8             :    USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
       9             :                             C_CHAR, &
      10             :                             C_DOUBLE, &
      11             :                             C_DOUBLE_COMPLEX, &
      12             :                             C_INT, &
      13             :                             C_PTR
      14             : #if defined(__FFTW3)
      15             :    USE ISO_C_BINDING, ONLY: &
      16             :       C_FLOAT, &
      17             :       C_FLOAT_COMPLEX, &
      18             :       C_FUNPTR, &
      19             :       C_INT32_T, &
      20             :       C_INTPTR_T, &
      21             :       C_LOC, &
      22             :       C_NULL_CHAR, &
      23             :       C_SIZE_T, C_F_POINTER
      24             : #endif
      25             :    USE cp_files, ONLY: get_unit_number
      26             :    USE fft_kinds, ONLY: dp
      27             :    USE fft_plan, ONLY: fft_plan_type
      28             : 
      29             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      30             : 
      31             : #include "../../base/base_uses.f90"
      32             : 
      33             :    IMPLICIT NONE
      34             :    PRIVATE
      35             : 
      36             :    PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
      37             :    PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
      38             :    PUBLIC :: fftw_alloc, fftw_dealloc
      39             : 
      40             : #if defined ( __FFTW3 )
      41             : #if defined ( __FFTW3_MKL )
      42             : #include "fftw/fftw3.f03"
      43             : #else
      44             : #include "fftw3.f03"
      45             : #endif
      46             : #endif
      47             : 
      48             :    INTERFACE fftw_alloc
      49             :       MODULE PROCEDURE :: fftw_alloc_complex_1d
      50             :       MODULE PROCEDURE :: fftw_alloc_complex_2d
      51             :       MODULE PROCEDURE :: fftw_alloc_complex_3d
      52             :    END INTERFACE
      53             : 
      54             :    INTERFACE fftw_dealloc
      55             :       MODULE PROCEDURE :: fftw_dealloc_complex_1d
      56             :       MODULE PROCEDURE :: fftw_dealloc_complex_2d
      57             :       MODULE PROCEDURE :: fftw_dealloc_complex_3d
      58             :    END INTERFACE
      59             : 
      60             : CONTAINS
      61             : 
      62             :    #:set maxdim = 3
      63             :    #:for dim in range(1, maxdim+1)
      64             : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
      65             :       #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
      66      106540 :       SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
      67             :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
      68             :          INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
      69             : 
      70             : #if defined(__FFTW3)
      71             :          TYPE(C_PTR) :: data_ptr
      72      373052 :          data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
      73      373052 :          CALL C_F_POINTER(data_ptr, array, n)
      74             : #else
      75             : ! Just allocate the array
      76             :          ALLOCATE (array(${dim_extended}$))
      77             : #endif
      78             : 
      79      106540 :       END SUBROUTINE fftw_alloc_complex_${dim}$d
      80             : 
      81      106540 :       SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
      82             :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
      83             : 
      84             : #if defined(__FFTW3)
      85      106540 :          CALL fftw_free(C_LOC(array))
      86      106540 :          NULLIFY (array)
      87             : #else
      88             : ! Just deallocate the array
      89             :          DEALLOCATE (array)
      90             : #endif
      91             : 
      92      106540 :       END SUBROUTINE fftw_dealloc_complex_${dim}$d
      93             :    #:endfor
      94             : 
      95             : #if defined ( __FFTW3 )
      96             : ! **************************************************************************************************
      97             : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
      98             : ! **************************************************************************************************
      99           0 :    SUBROUTINE dummy_routine_to_call_mark_used()
     100             :       MARK_USED(FFTW_R2HC)
     101             :       MARK_USED(FFTW_HC2R)
     102             :       MARK_USED(FFTW_DHT)
     103             :       MARK_USED(FFTW_REDFT00)
     104             :       MARK_USED(FFTW_REDFT01)
     105             :       MARK_USED(FFTW_REDFT10)
     106             :       MARK_USED(FFTW_REDFT11)
     107             :       MARK_USED(FFTW_RODFT00)
     108             :       MARK_USED(FFTW_RODFT01)
     109             :       MARK_USED(FFTW_RODFT10)
     110             :       MARK_USED(FFTW_RODFT11)
     111             :       MARK_USED(FFTW_FORWARD)
     112             :       MARK_USED(FFTW_BACKWARD)
     113             :       MARK_USED(FFTW_MEASURE)
     114             :       MARK_USED(FFTW_DESTROY_INPUT)
     115             :       MARK_USED(FFTW_UNALIGNED)
     116             :       MARK_USED(FFTW_CONSERVE_MEMORY)
     117             :       MARK_USED(FFTW_EXHAUSTIVE)
     118             :       MARK_USED(FFTW_PRESERVE_INPUT)
     119             :       MARK_USED(FFTW_PATIENT)
     120             :       MARK_USED(FFTW_ESTIMATE)
     121             :       MARK_USED(FFTW_WISDOM_ONLY)
     122             :       MARK_USED(FFTW_ESTIMATE_PATIENT)
     123             :       MARK_USED(FFTW_BELIEVE_PCOST)
     124             :       MARK_USED(FFTW_NO_DFT_R2HC)
     125             :       MARK_USED(FFTW_NO_NONTHREADED)
     126             :       MARK_USED(FFTW_NO_BUFFERING)
     127             :       MARK_USED(FFTW_NO_INDIRECT_OP)
     128             :       MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
     129             :       MARK_USED(FFTW_NO_RANK_SPLITS)
     130             :       MARK_USED(FFTW_NO_VRANK_SPLITS)
     131             :       MARK_USED(FFTW_NO_VRECURSE)
     132             :       MARK_USED(FFTW_NO_SIMD)
     133             :       MARK_USED(FFTW_NO_SLOW)
     134             :       MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
     135             :       MARK_USED(FFTW_ALLOW_PRUNING)
     136           0 :    END SUBROUTINE
     137             : #endif
     138             : 
     139             : ! **************************************************************************************************
     140             : !> \brief ...
     141             : !> \param wisdom_file ...
     142             : !> \param ionode ...
     143             : ! **************************************************************************************************
     144        8921 :    SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
     145             : 
     146             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     147             :       LOGICAL                                  :: ionode
     148             : 
     149             : #if defined ( __FFTW3 )
     150        8921 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     151             :       INTEGER                                  :: file_name_length, i, iunit, istat
     152             :       INTEGER(KIND=C_INT)                      :: isuccess
     153             :       ! Write out FFTW3 wisdom to file (if we can)
     154             :       ! only the ionode updates the wisdom
     155        8921 :       IF (ionode) THEN
     156        4564 :          iunit = get_unit_number()
     157             :          ! Check whether the file can be opened in the necessary manner
     158        4564 :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
     159        4564 :          IF (istat == 0) THEN
     160           2 :             CLOSE (iunit)
     161           2 :             file_name_length = LEN_TRIM(wisdom_file)
     162           6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     163          18 :             DO i = 1, file_name_length
     164          18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     165             :             END DO
     166           2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     167           2 :             isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
     168           2 :             IF (isuccess == 0) &
     169             :                CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
     170           0 :                             "Wisdom was not exported.")
     171             :          END IF
     172             :       END IF
     173             : 
     174        8921 :       CALL fftw_cleanup()
     175             : #else
     176             :       MARK_USED(wisdom_file)
     177             :       MARK_USED(ionode)
     178             : #endif
     179             : 
     180        8921 :    END SUBROUTINE
     181             : 
     182             : ! **************************************************************************************************
     183             : !> \brief ...
     184             : !> \param wisdom_file ...
     185             : ! **************************************************************************************************
     186        9131 :    SUBROUTINE fftw3_do_init(wisdom_file)
     187             : 
     188             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     189             : 
     190             : #if defined ( __FFTW3 )
     191        9131 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     192             :       INTEGER                                  :: file_name_length, i, istat, iunit
     193             :       INTEGER(KIND=C_INT)                      :: isuccess
     194             :       LOGICAL :: file_exists
     195             : 
     196             : #if defined(__MKL)
     197             : !$    LOGICAL                                  :: mkl_is_safe
     198             : #endif
     199             : 
     200             : ! If using the Intel compiler then we need to declare
     201             : ! a C interface to a global variable in MKL that sets
     202             : ! the number of threads which can concurrently execute
     203             : ! an FFT
     204             : ! We need __INTEL_COMPILER so we can be sure that the compiler
     205             : ! understands the !DEC$ version definitions
     206             : #if defined (__INTEL_COMPILER) && defined (__MKL)
     207             : !$    include "mkl.fi"
     208             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
     209             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
     210             : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
     211             : !$    COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
     212             : !$    INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
     213             : !$    BIND(c) :: /fftw3_mkl/
     214             : !DEC$ ENDIF
     215             : !DEC$ ENDIF
     216             : #elif defined (__MKL)
     217             : ! Preprocessing is enabled by default, and below header is not language specific
     218             : #include <mkl_version.h>
     219             : #endif
     220             : 
     221        9131 :       isuccess = fftw_init_threads()
     222        9131 :       IF (isuccess == 0) &
     223           0 :          CPABORT("Error initializing FFTW with threads")
     224             : 
     225             :       ! Read FFTW wisdom (if available)
     226             :       ! all nodes are opening the file here...
     227        9131 :       INQUIRE (FILE=wisdom_file, exist=file_exists)
     228        9131 :       IF (file_exists) THEN
     229           2 :          iunit = get_unit_number()
     230           2 :          file_name_length = LEN_TRIM(wisdom_file)
     231             :          ! Check whether the file can be opened in the necessary manner
     232             :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
     233           2 :                ACTION="READ", IOSTAT=istat)
     234           2 :          IF (istat == 0) THEN
     235           2 :             CLOSE (iunit)
     236           2 :             file_name_length = LEN_TRIM(wisdom_file)
     237           6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     238          18 :             DO i = 1, file_name_length
     239          18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     240             :             END DO
     241           2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     242           2 :             isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
     243           2 :             IF (isuccess == 0) &
     244             :                CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
     245             :                             "Maybe the file was created with a different configuration than CP2K is run with. "// &
     246           0 :                             "CP2K continues without importing wisdom.")
     247             :          END IF
     248             :       END IF
     249             : 
     250             : #if defined (__MKL)
     251             :       ! Now check if we have a real FFTW3 library, or are using MKL wrappers
     252             : 
     253             : !$    IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
     254             : ! If we are not using the Intel compiler, there is no way to tell which
     255             : ! MKL version is in use, so fail safe...
     256             : !$       mkl_is_safe = .FALSE.
     257             : #if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
     258             : !$       mkl_is_safe = .TRUE.
     259             : #elif defined (__INTEL_COMPILER)
     260             : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
     261             : ! MKL version and make the appropriate action
     262             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
     263             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
     264             : !$       mkl_dft_number_of_user_threads = omp_get_max_threads()
     265             : !DEC$ ENDIF
     266             : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
     267             : !$       mkl_is_safe = .TRUE.
     268             : !DEC$ ENDIF
     269             : !DEC$ ENDIF
     270             : #endif
     271             : !$       IF (.NOT. mkl_is_safe) THEN
     272             : !$          CALL cp_abort(__LOCATION__, "Intel's FFTW3 interface to MKL is not "// &
     273             : !$                        "thread-safe prior to MKL 11.1.0!  Please "// &
     274             : !$                        "rebuild CP2K, linking against FFTW 3 from "// &
     275             : !$                        "www.fftw.org or a newer version of MKL. "// &
     276             : !$                        "Now exiting...")
     277             : !$       END IF
     278             : !$    END IF
     279             : #endif
     280             : #else
     281             :       MARK_USED(wisdom_file)
     282             : #endif
     283             : 
     284        9131 :    END SUBROUTINE
     285             : 
     286             : ! **************************************************************************************************
     287             : !> \brief ...
     288             : !> \param DATA ...
     289             : !> \param max_length ...
     290             : !> \par History
     291             : !>      JGH 23-Jan-2006 : initial version
     292             : !>      Adapted for new interface
     293             : !>      IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
     294             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     295             : !>      IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
     296             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     297             : !>      IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
     298             : !> \author JGH
     299             : ! **************************************************************************************************
     300         504 :    SUBROUTINE fftw3_get_lengths(DATA, max_length)
     301             : 
     302             :       INTEGER, DIMENSION(*)                              :: DATA
     303             :       INTEGER, INTENT(INOUT)                             :: max_length
     304             : 
     305             :       INTEGER                                            :: h, i, j, k, m, maxn, maxn_elevens, &
     306             :                                                             maxn_fives, maxn_sevens, &
     307             :                                                             maxn_thirteens, maxn_threes, &
     308             :                                                             maxn_twos, ndata, nmax, number
     309         504 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dlocal, idx
     310             : 
     311             : !------------------------------------------------------------------------------
     312             : ! compute ndata
     313             : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
     314             : !!    powers of small prime numbers though...
     315             : 
     316         504 :       maxn_twos = 15
     317         504 :       maxn_threes = 3
     318         504 :       maxn_fives = 2
     319         504 :       maxn_sevens = 1
     320         504 :       maxn_elevens = 1
     321         504 :       maxn_thirteens = 0
     322         504 :       maxn = 37748736
     323             : 
     324         504 :       ndata = 0
     325        8568 :       DO h = 0, maxn_twos
     326        8064 :          nmax = HUGE(0)/2**h
     327       40824 :          DO i = 0, maxn_threes
     328      137088 :             DO j = 0, maxn_fives
     329      322560 :                DO k = 0, maxn_sevens
     330      677376 :                   DO m = 0, maxn_elevens
     331      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     332             : 
     333      387072 :                      IF (number > nmax) CYCLE
     334             : 
     335      387072 :                      number = number*2**h
     336      387072 :                      IF (number >= maxn) CYCLE
     337             : 
     338      580608 :                      ndata = ndata + 1
     339             :                   END DO
     340             :                END DO
     341             :             END DO
     342             :          END DO
     343             :       END DO
     344             : 
     345        2016 :       ALLOCATE (dlocal(ndata), idx(ndata))
     346             : 
     347         504 :       ndata = 0
     348      371448 :       dlocal(:) = 0
     349        8568 :       DO h = 0, maxn_twos
     350        8064 :          nmax = HUGE(0)/2**h
     351       40824 :          DO i = 0, maxn_threes
     352      137088 :             DO j = 0, maxn_fives
     353      322560 :                DO k = 0, maxn_sevens
     354      677376 :                   DO m = 0, maxn_elevens
     355      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     356             : 
     357      387072 :                      IF (number > nmax) CYCLE
     358             : 
     359      387072 :                      number = number*2**h
     360      387072 :                      IF (number >= maxn) CYCLE
     361             : 
     362      370944 :                      ndata = ndata + 1
     363      580608 :                      dlocal(ndata) = number
     364             :                   END DO
     365             :                END DO
     366             :             END DO
     367             :          END DO
     368             :       END DO
     369             : 
     370         504 :       CALL sortint(dlocal, ndata, idx)
     371         504 :       ndata = MIN(ndata, max_length)
     372      371448 :       DATA(1:ndata) = dlocal(1:ndata)
     373         504 :       max_length = ndata
     374             : 
     375         504 :       DEALLOCATE (dlocal, idx)
     376             : 
     377         504 :    END SUBROUTINE fftw3_get_lengths
     378             : 
     379             : ! **************************************************************************************************
     380             : !> \brief ...
     381             : !> \param iarr ...
     382             : !> \param n ...
     383             : !> \param index ...
     384             : ! **************************************************************************************************
     385         504 :    SUBROUTINE sortint(iarr, n, index)
     386             : 
     387             :       INTEGER, INTENT(IN)                                :: n
     388             :       INTEGER, INTENT(INOUT)                             :: iarr(1:n)
     389             :       INTEGER, INTENT(OUT)                               :: INDEX(1:n)
     390             : 
     391             :       INTEGER, PARAMETER                                 :: m = 7, nstack = 50
     392             : 
     393             :       INTEGER                                            :: a, i, ib, ir, istack(1:nstack), itemp, &
     394             :                                                             j, jstack, k, l, temp
     395             : 
     396             : !------------------------------------------------------------------------------
     397             : 
     398      371448 :       DO i = 1, n
     399      371448 :          INDEX(i) = i
     400             :       END DO
     401             :       jstack = 0
     402             :       l = 1
     403             :       ir = n
     404             :       DO WHILE (.TRUE.)
     405      139608 :       IF (ir - l < m) THEN
     406      301392 :          DO j = l + 1, ir
     407      231336 :             a = iarr(j)
     408      231336 :             ib = INDEX(j)
     409      549360 :             DO i = j - 1, 0, -1
     410      549360 :                IF (i == 0) EXIT
     411      548352 :                IF (iarr(i) <= a) EXIT
     412      318024 :                iarr(i + 1) = iarr(i)
     413      549360 :                INDEX(i + 1) = INDEX(i)
     414             :             END DO
     415      231336 :             iarr(i + 1) = a
     416      301392 :             INDEX(i + 1) = ib
     417             :          END DO
     418       70056 :          IF (jstack == 0) RETURN
     419       69552 :          ir = istack(jstack)
     420       69552 :          l = istack(jstack - 1)
     421       69552 :          jstack = jstack - 2
     422             :       ELSE
     423       69552 :          k = (l + ir)/2
     424       69552 :          temp = iarr(k)
     425       69552 :          iarr(k) = iarr(l + 1)
     426       69552 :          iarr(l + 1) = temp
     427       69552 :          itemp = INDEX(k)
     428       69552 :          INDEX(k) = INDEX(l + 1)
     429       69552 :          INDEX(l + 1) = itemp
     430       69552 :          IF (iarr(l + 1) > iarr(ir)) THEN
     431       32760 :             temp = iarr(l + 1)
     432       32760 :             iarr(l + 1) = iarr(ir)
     433       32760 :             iarr(ir) = temp
     434       32760 :             itemp = INDEX(l + 1)
     435       32760 :             INDEX(l + 1) = INDEX(ir)
     436       32760 :             INDEX(ir) = itemp
     437             :          END IF
     438       69552 :          IF (iarr(l) > iarr(ir)) THEN
     439       26208 :             temp = iarr(l)
     440       26208 :             iarr(l) = iarr(ir)
     441       26208 :             iarr(ir) = temp
     442       26208 :             itemp = INDEX(l)
     443       26208 :             INDEX(l) = INDEX(ir)
     444       26208 :             INDEX(ir) = itemp
     445             :          END IF
     446       69552 :          IF (iarr(l + 1) > iarr(l)) THEN
     447       26208 :             temp = iarr(l + 1)
     448       26208 :             iarr(l + 1) = iarr(l)
     449       26208 :             iarr(l) = temp
     450       26208 :             itemp = INDEX(l + 1)
     451       26208 :             INDEX(l + 1) = INDEX(l)
     452       26208 :             INDEX(l) = itemp
     453             :          END IF
     454       69552 :          i = l + 1
     455       69552 :          j = ir
     456       69552 :          a = iarr(l)
     457       69552 :          ib = INDEX(l)
     458      443016 :          DO WHILE (.TRUE.)
     459      512568 :             i = i + 1
     460     1663704 :             DO WHILE (iarr(i) < a)
     461     1151136 :                i = i + 1
     462             :             END DO
     463      512568 :             j = j - 1
     464     1270080 :             DO WHILE (iarr(j) > a)
     465      757512 :                j = j - 1
     466             :             END DO
     467      512568 :             IF (j < i) EXIT
     468      443016 :             temp = iarr(i)
     469      443016 :             iarr(i) = iarr(j)
     470      443016 :             iarr(j) = temp
     471      443016 :             itemp = INDEX(i)
     472      443016 :             INDEX(i) = INDEX(j)
     473      443016 :             INDEX(j) = itemp
     474             :          END DO
     475       69552 :          iarr(l) = iarr(j)
     476       69552 :          iarr(j) = a
     477       69552 :          INDEX(l) = INDEX(j)
     478       69552 :          INDEX(j) = ib
     479       69552 :          jstack = jstack + 2
     480       69552 :          IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
     481       69552 :          IF (ir - i + 1 >= j - l) THEN
     482       35784 :             istack(jstack) = ir
     483       35784 :             istack(jstack - 1) = i
     484       35784 :             ir = j - 1
     485             :          ELSE
     486       33768 :             istack(jstack) = j - 1
     487       33768 :             istack(jstack - 1) = l
     488       33768 :             l = i
     489             :          END IF
     490             :       END IF
     491             : 
     492             :       END DO
     493             : 
     494             :    END SUBROUTINE sortint
     495             : 
     496             : ! **************************************************************************************************
     497             : 
     498             : ! **************************************************************************************************
     499             : !> \brief ...
     500             : !> \param plan ...
     501             : !> \param fft_rank ...
     502             : !> \param dim_n ...
     503             : !> \param dim_istride ...
     504             : !> \param dim_ostride ...
     505             : !> \param hm_rank ...
     506             : !> \param hm_n ...
     507             : !> \param hm_istride ...
     508             : !> \param hm_ostride ...
     509             : !> \param zin ...
     510             : !> \param zout ...
     511             : !> \param fft_direction ...
     512             : !> \param fftw_plan_type ...
     513             : !> \param valid ...
     514             : ! **************************************************************************************************
     515       52828 :    SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
     516             :                                      dim_istride, dim_ostride, hm_rank, &
     517             :                                      hm_n, hm_istride, hm_ostride, &
     518             :                                      zin, zout, fft_direction, fftw_plan_type, &
     519             :                                      valid)
     520             : 
     521             :       IMPLICIT NONE
     522             : 
     523             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan
     524             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin, zout
     525             :       INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
     526             :                              hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
     527             :                              fft_direction, fftw_plan_type, hm_rank
     528             :       LOGICAL, INTENT(OUT)                               :: valid
     529             : 
     530             : #if defined (__FFTW3)
     531             :       TYPE(fftw_iodim) :: dim(2), hm(2)
     532             :       INTEGER :: i
     533             : 
     534      158484 :       DO i = 1, 2
     535      105656 :          DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
     536      158484 :          hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
     537             :       END DO
     538             : 
     539             :       plan = fftw_plan_guru_dft(fft_rank, &
     540             :                                 dim, hm_rank, hm, &
     541             :                                 zin, zout, &
     542       52828 :                                 fft_direction, fftw_plan_type)
     543             : 
     544       52828 :       valid = C_ASSOCIATED(plan)
     545             : 
     546             : #else
     547             :       MARK_USED(plan)
     548             :       MARK_USED(fft_rank)
     549             :       MARK_USED(dim_n)
     550             :       MARK_USED(dim_istride)
     551             :       MARK_USED(dim_ostride)
     552             :       MARK_USED(hm_rank)
     553             :       MARK_USED(hm_n)
     554             :       MARK_USED(hm_istride)
     555             :       MARK_USED(hm_ostride)
     556             :       MARK_USED(fft_direction)
     557             :       MARK_USED(fftw_plan_type)
     558             :       !MARK_USED does not work with assumed size arguments
     559             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     560             :       valid = .FALSE.
     561             : 
     562             : #endif
     563             : 
     564       52828 :    END SUBROUTINE
     565             : 
     566             : ! **************************************************************************************************
     567             : 
     568             : ! **************************************************************************************************
     569             : !> \brief ...
     570             : !> \return ...
     571             : ! **************************************************************************************************
     572       52828 :    FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
     573             : 
     574             :       IMPLICIT NONE
     575             : 
     576             :       LOGICAL :: is_mkl
     577             : #if defined ( __FFTW3 )
     578             :       LOGICAL :: guru_supported
     579             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     580             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     581             :       TYPE(C_PTR)                          :: test_plan
     582             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
     583             : 
     584             :       ! Attempt to create a plan with the guru interface for a 2d sub-space
     585             :       ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
     586             :       ! FFTW3 threaded 3D transform instead of the hand-optimised version
     587       52828 :       dim_n(1) = 1
     588       52828 :       dim_n(2) = 1
     589       52828 :       dim_istride(1) = 1
     590       52828 :       dim_istride(2) = 1
     591       52828 :       dim_ostride(1) = 1
     592       52828 :       dim_ostride(2) = 1
     593       52828 :       howmany_n(1) = 1
     594       52828 :       howmany_n(2) = 1
     595       52828 :       howmany_istride(1) = 1
     596       52828 :       howmany_istride(2) = 1
     597       52828 :       howmany_ostride(1) = 1
     598       52828 :       howmany_ostride(2) = 1
     599       52828 :       zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
     600             :       CALL fftw3_create_guru_plan(test_plan, 1, &
     601             :                                   dim_n, dim_istride, dim_ostride, &
     602             :                                   2, howmany_n, howmany_istride, howmany_ostride, &
     603             :                                   zin, zin, &
     604       52828 :                                   FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
     605       52828 :       IF (guru_supported) THEN
     606       52828 :          CALL fftw_destroy_plan(test_plan)
     607       52828 :          is_mkl = .FALSE.
     608             :       ELSE
     609             :          is_mkl = .TRUE.
     610             :       END IF
     611             : 
     612             : #else
     613             :       is_mkl = .FALSE.
     614             : #endif
     615             : 
     616       52828 :    END FUNCTION
     617             : 
     618             : ! **************************************************************************************************
     619             : 
     620             : ! **************************************************************************************************
     621             : !> \brief ...
     622             : !> \param nrows ...
     623             : !> \param nt ...
     624             : !> \param rows_per_thread ...
     625             : !> \param rows_per_thread_r ...
     626             : !> \param th_planA ...
     627             : !> \param th_planB ...
     628             : ! **************************************************************************************************
     629           0 :    SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
     630             :                                         th_planA, th_planB)
     631             : 
     632             :       INTEGER, INTENT(IN)                                :: nrows, nt
     633             :       INTEGER, INTENT(OUT)                               :: rows_per_thread, rows_per_thread_r, &
     634             :                                                             th_planA, th_planB
     635             : 
     636           0 :       IF (MOD(nrows, nt) .EQ. 0) THEN
     637           0 :          rows_per_thread = nrows/nt
     638           0 :          rows_per_thread_r = 0
     639           0 :          th_planA = nt
     640           0 :          th_planB = 0
     641             :       ELSE
     642           0 :          rows_per_thread = nrows/nt + 1
     643           0 :          rows_per_thread_r = nrows/nt
     644           0 :          th_planA = MOD(nrows, nt)
     645           0 :          th_planB = nt - th_planA
     646             :       END IF
     647             : 
     648           0 :    END SUBROUTINE
     649             : 
     650             : ! **************************************************************************************************
     651             : 
     652             : ! **************************************************************************************************
     653             : !> \brief ...
     654             : !> \param plan ...
     655             : !> \param plan_r ...
     656             : !> \param dim_n ...
     657             : !> \param dim_istride ...
     658             : !> \param dim_ostride ...
     659             : !> \param hm_n ...
     660             : !> \param hm_istride ...
     661             : !> \param hm_ostride ...
     662             : !> \param input ...
     663             : !> \param output ...
     664             : !> \param fft_direction ...
     665             : !> \param fftw_plan_type ...
     666             : !> \param rows_per_th ...
     667             : !> \param rows_per_th_r ...
     668             : ! **************************************************************************************************
     669           0 :    SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
     670             :                                     hm_n, hm_istride, hm_ostride, &
     671             :                                     input, output, &
     672             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     673             :                                     rows_per_th_r)
     674             : 
     675             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan, plan_r
     676             :       INTEGER, INTENT(INOUT)                             :: dim_n(2), dim_istride(2), &
     677             :                                                             dim_ostride(2), hm_n(2), &
     678             :                                                             hm_istride(2), hm_ostride(2)
     679             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: input, output
     680             :       INTEGER, INTENT(INOUT)                             :: fft_direction, fftw_plan_type
     681             :       INTEGER, INTENT(IN)                                :: rows_per_th, rows_per_th_r
     682             : 
     683             :       LOGICAL                                            :: valid
     684             : 
     685             : ! First plans will have an additional row
     686             : 
     687           0 :       hm_n(2) = rows_per_th
     688             :       CALL fftw3_create_guru_plan(plan, 1, &
     689             :                                   dim_n, dim_istride, dim_ostride, &
     690             :                                   2, hm_n, hm_istride, hm_ostride, &
     691             :                                   input, output, &
     692           0 :                                   fft_direction, fftw_plan_type, valid)
     693             : 
     694           0 :       IF (.NOT. valid) THEN
     695           0 :          CPABORT("fftw3_create_plan")
     696             :       END IF
     697             : 
     698             :       !!!! Remainder
     699           0 :       hm_n(2) = rows_per_th_r
     700             :       CALL fftw3_create_guru_plan(plan_r, 1, &
     701             :                                   dim_n, dim_istride, dim_ostride, &
     702             :                                   2, hm_n, hm_istride, hm_ostride, &
     703             :                                   input, output, &
     704           0 :                                   fft_direction, fftw_plan_type, valid)
     705           0 :       IF (.NOT. valid) THEN
     706           0 :          CPABORT("fftw3_create_plan (remaining)")
     707             :       END IF
     708             : 
     709           0 :    END SUBROUTINE
     710             : 
     711             : ! **************************************************************************************************
     712             : 
     713             : ! **************************************************************************************************
     714             : !> \brief ...
     715             : !> \param plan ...
     716             : !> \param zin ...
     717             : !> \param zout ...
     718             : !> \param plan_style ...
     719             : ! **************************************************************************************************
     720       52828 :    SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
     721             : 
     722             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     723             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin
     724             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zout
     725             :       INTEGER                                            :: plan_style
     726             : #if defined ( __FFTW3 )
     727             :       INTEGER                                            :: n1, n2, n3
     728             :       INTEGER                                            :: nt
     729             :       INTEGER                                            :: rows_per_th
     730             :       INTEGER                                            :: rows_per_th_r
     731             :       INTEGER                                            :: fft_direction
     732             :       INTEGER                                            :: th_planA, th_planB
     733       52828 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: tmp(:)
     734             : 
     735             :       ! GURU Interface
     736             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     737             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     738             : 
     739             :       INTEGER :: fftw_plan_type
     740      105616 :       SELECT CASE (plan_style)
     741             :       CASE (1)
     742       52788 :          fftw_plan_type = FFTW_ESTIMATE
     743             :       CASE (2)
     744           8 :          fftw_plan_type = FFTW_MEASURE
     745             :       CASE (3)
     746          24 :          fftw_plan_type = FFTW_PATIENT
     747             :       CASE (4)
     748           8 :          fftw_plan_type = FFTW_EXHAUSTIVE
     749             :       CASE DEFAULT
     750       52828 :          CPABORT("fftw3_create_plan_3d")
     751             :       END SELECT
     752             : 
     753             : #if defined (__FFTW3_UNALIGNED)
     754             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
     755             : #endif
     756             : 
     757       52828 :       IF (plan%fsign == +1) THEN
     758       26414 :          fft_direction = FFTW_FORWARD
     759             :       ELSE
     760       26414 :          fft_direction = FFTW_BACKWARD
     761             :       END IF
     762             : 
     763       52828 :       n1 = plan%n_3d(1)
     764       52828 :       n2 = plan%n_3d(2)
     765       52828 :       n3 = plan%n_3d(3)
     766             : 
     767       52828 :       nt = 1
     768       52828 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
     769             : !$OMP MASTER
     770             : !$    nt = omp_get_num_threads()
     771             : !$OMP END MASTER
     772             : !$OMP END PARALLEL
     773             : 
     774             :       IF ((fftw3_is_mkl_wrapper()) .OR. &
     775       52828 :           (.NOT. plan_style == 1) .OR. &
     776             :           (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
     777             :          ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
     778             :          ! the grid size is small (and we are single-threaded) then
     779             :          ! FFTW3 does a better job than handmade optimization
     780             :          ! so plan a single 3D FFT which will execute using all the threads
     781             : 
     782       52828 :          plan%separated_plans = .FALSE.
     783       52828 : !$       CALL fftw_plan_with_nthreads(nt)
     784             : 
     785       52828 :          IF (plan%fft_in_place) THEN
     786       26414 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
     787             :          ELSE
     788       26414 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
     789             :          END IF
     790             :       ELSE
     791           0 :          ALLOCATE (tmp(n1*n2*n3))
     792             :          ! ************************* PLANS WITH TRANSPOSITIONS ****************************
     793             :          !  In the cases described above, we manually thread each stage of the 3D FFT.
     794             :          !
     795             :          !  The following plans replace the 3D FFT call by running 1D FFTW across all
     796             :          !  3 directions of the array.
     797             :          !
     798             :          !  Output of FFTW is transposed to ensure that the next round of FFTW access
     799             :          !  contiguous information.
     800             :          !
     801             :          !  Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
     802             :          !  M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     803             :          !  Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
     804             :          !  will perform the final transposition. Performance evaluation showed that using an external
     805             :          !  DO loop to do the final transposition performed better than directly transposing the output.
     806             :          !  However, this might vary depending on the compiler/platform, so a potential tuning spot
     807             :          !  is to perform the final transposition within the fftw library rather than using the external loop
     808             :          !  See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
     809             :          !
     810             :          !  Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
     811             :          !
     812             :          !  OpenMP : Work is distributed on the Z plane.
     813             :          !           All transpositions are out-of-place to facilitate multi-threading
     814             :          !
     815             :          !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
     816             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     817           0 :                                         th_planA, th_planB)
     818             : 
     819           0 :          dim_n(1) = n1
     820           0 :          dim_istride(1) = 1
     821           0 :          dim_ostride(1) = n2
     822           0 :          howmany_n(1) = n2
     823           0 :          howmany_n(2) = rows_per_th
     824           0 :          howmany_istride(1) = n1
     825           0 :          howmany_istride(2) = n1*n2
     826           0 :          howmany_ostride(1) = 1
     827           0 :          howmany_ostride(2) = n1*n2
     828             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     829             :                                     dim_n, dim_istride, dim_ostride, howmany_n, &
     830             :                                     howmany_istride, howmany_ostride, &
     831             :                                     zin, tmp, &
     832             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     833           0 :                                     rows_per_th_r)
     834             : 
     835             :          !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
     836             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     837           0 :                                         th_planA, th_planB)
     838           0 :          dim_n(1) = n2
     839             :          dim_istride(1) = 1
     840           0 :          dim_ostride(1) = n3
     841           0 :          howmany_n(1) = n1
     842           0 :          howmany_n(2) = rows_per_th
     843           0 :          howmany_istride(1) = n2
     844             :          howmany_istride(2) = n1*n2
     845             :          !!! transposed Z axis on output
     846           0 :          howmany_ostride(1) = n2*n3
     847           0 :          howmany_ostride(2) = 1
     848             : 
     849             :          CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     850             :                                     dim_n, dim_istride, dim_ostride, &
     851             :                                     howmany_n, howmany_istride, howmany_ostride, &
     852             :                                     tmp, zin, &
     853             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     854           0 :                                     rows_per_th_r)
     855             : 
     856             :          !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     857             :          CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
     858           0 :                                         th_planA, th_planB)
     859           0 :          dim_n(1) = n3
     860             :          dim_istride(1) = 1
     861           0 :          dim_ostride(1) = 1          ! To transpose: n2*n1
     862           0 :          howmany_n(1) = n2
     863           0 :          howmany_n(2) = rows_per_th
     864           0 :          howmany_istride(1) = n3
     865           0 :          howmany_istride(2) = n2*n3
     866           0 :          howmany_ostride(1) = n3     ! To transpose: n1
     867           0 :          howmany_ostride(2) = n2*n3  ! To transpose: 1
     868             : 
     869             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     870             :                                     dim_n, dim_istride, dim_ostride, &
     871             :                                     howmany_n, howmany_istride, howmany_ostride, &
     872             :                                     zin, tmp, &
     873             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     874           0 :                                     rows_per_th_r)
     875             : 
     876           0 :          plan%separated_plans = .TRUE.
     877             : 
     878           0 :          DEALLOCATE (tmp)
     879             :       END IF
     880             : 
     881             : #else
     882             :       MARK_USED(plan)
     883             :       MARK_USED(plan_style)
     884             :       !MARK_USED does not work with assumed size arguments
     885             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     886             : #endif
     887             : 
     888       52828 :    END SUBROUTINE fftw3_create_plan_3d
     889             : 
     890             : ! **************************************************************************************************
     891             : 
     892             : ! **************************************************************************************************
     893             : !> \brief ...
     894             : !> \param plan ...
     895             : !> \param plan_r ...
     896             : !> \param split_dim ...
     897             : !> \param nt ...
     898             : !> \param tid ...
     899             : !> \param input ...
     900             : !> \param istride ...
     901             : !> \param output ...
     902             : !> \param ostride ...
     903             : ! **************************************************************************************************
     904           0 :    SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
     905             :                                           input, istride, output, ostride)
     906             : 
     907             :       INTEGER, INTENT(IN)                           :: split_dim, nt, tid
     908             :       INTEGER, INTENT(IN)                           :: istride, ostride
     909             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
     910             :       TYPE(C_PTR)                                   :: plan, plan_r
     911             : #if defined (__FFTW3)
     912             :       INTEGER                                     :: i_off, o_off
     913             :       INTEGER                                     :: th_planA, th_planB
     914             :       INTEGER :: rows_per_thread, rows_per_thread_r
     915             : 
     916             :       CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
     917             :                                      rows_per_thread_r, &
     918           0 :                                      th_planA, th_planB)
     919             : 
     920           0 :       IF (th_planB .GT. 0) THEN
     921           0 :          IF (tid .LT. th_planA) THEN
     922           0 :             i_off = (tid)*(istride*(rows_per_thread)) + 1
     923           0 :             o_off = (tid)*(ostride*(rows_per_thread)) + 1
     924           0 :             IF (rows_per_thread .GT. 0) THEN
     925             :                CALL fftw_execute_dft(plan, input(i_off), &
     926           0 :                                      output(o_off))
     927             :             END IF
     928           0 :          ELSE IF ((tid - th_planA) < th_planB) THEN
     929             : 
     930             :             i_off = (th_planA)*istride*(rows_per_thread) + &
     931           0 :                     (tid - th_planA)*istride*(rows_per_thread_r) + 1
     932             :             o_off = (th_planA)*ostride*(rows_per_thread) + &
     933           0 :                     (tid - th_planA)*ostride*(rows_per_thread_r) + 1
     934             : 
     935             :             CALL fftw_execute_dft(plan_r, input(i_off), &
     936           0 :                                   output(o_off))
     937             :          END IF
     938             : 
     939             :       ELSE
     940           0 :          i_off = (tid)*(istride*(rows_per_thread)) + 1
     941           0 :          o_off = (tid)*(ostride*(rows_per_thread)) + 1
     942             : 
     943             :          CALL fftw_execute_dft(plan, input(i_off), &
     944           0 :                                output(o_off))
     945             : 
     946             :       END IF
     947             : #else
     948             :       MARK_USED(plan)
     949             :       MARK_USED(plan_r)
     950             :       MARK_USED(split_dim)
     951             :       MARK_USED(nt)
     952             :       MARK_USED(tid)
     953             :       MARK_USED(istride)
     954             :       MARK_USED(ostride)
     955             :       !MARK_USED does not work with assumed size arguments
     956             :       IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
     957             : #endif
     958             : 
     959           0 :    END SUBROUTINE
     960             : 
     961             : ! **************************************************************************************************
     962             : 
     963             : ! **************************************************************************************************
     964             : !> \brief ...
     965             : !> \param plan ...
     966             : !> \param scale ...
     967             : !> \param zin ...
     968             : !> \param zout ...
     969             : !> \param stat ...
     970             : ! **************************************************************************************************
     971      632582 :    SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
     972             : 
     973             :       TYPE(fft_plan_type), INTENT(IN)                      :: plan
     974             :       REAL(KIND=dp), INTENT(IN)                            :: scale
     975             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
     976             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
     977             :       INTEGER, INTENT(OUT)                                 :: stat
     978             : #if defined ( __FFTW3 )
     979      632582 :       COMPLEX(KIND=dp), POINTER                            :: xout(:)
     980      632582 :       COMPLEX(KIND=dp), ALLOCATABLE                        :: tmp1(:)
     981             :       INTEGER                                              :: n1, n2, n3
     982             :       INTEGER                                              :: tid, nt
     983             :       INTEGER                                              :: i, j, k
     984             : 
     985      632582 :       n1 = plan%n_3d(1)
     986      632582 :       n2 = plan%n_3d(2)
     987      632582 :       n3 = plan%n_3d(3)
     988             : 
     989      632582 :       stat = 1
     990             : 
     991             :       ! We use a POINTER to the output array to avoid duplicating code
     992      632582 :       IF (plan%fft_in_place) THEN
     993      632578 :          xout => zin(:n1*n2*n3)
     994             :       ELSE
     995           4 :          xout => zout(:n1*n2*n3)
     996             :       END IF
     997             : 
     998             :       ! Either compute the full 3D FFT using a multithreaded plan
     999      632582 :       IF (.NOT. plan%separated_plans) THEN
    1000      632582 :          CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
    1001             :       ELSE
    1002             :          ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
    1003           0 :          ALLOCATE (tmp1(n1*n2*n3))   ! Temporary vector used for transpositions
    1004           0 :          !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
    1005             :          tid = 0
    1006             :          nt = 1
    1007             : 
    1008             : !$       tid = omp_get_thread_num()
    1009             : !$       nt = omp_get_num_threads()
    1010             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
    1011             :                                           n3, nt, tid, &
    1012             :                                           zin, n1*n2, tmp1, n1*n2)
    1013             : 
    1014             :          !$OMP BARRIER
    1015             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
    1016             :                                           n3, nt, tid, &
    1017             :                                           tmp1, n1*n2, xout, 1)
    1018             :          !$OMP BARRIER
    1019             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
    1020             :                                           n1, nt, tid, &
    1021             :                                           xout, n2*n3, tmp1, n2*n3)
    1022             :          !$OMP BARRIER
    1023             : 
    1024             :          !$OMP DO COLLAPSE(3)
    1025             :          DO i = 1, n1
    1026             :             DO j = 1, n2
    1027             :                DO k = 1, n3
    1028             :                   xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
    1029             :                      tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
    1030             :                END DO
    1031             :             END DO
    1032             :          END DO
    1033             :          !$OMP END DO
    1034             : 
    1035             :          !$OMP END PARALLEL
    1036             :       END IF
    1037             : 
    1038      632582 :       IF (scale /= 1.0_dp) THEN
    1039      300000 :          CALL zdscal(n1*n2*n3, scale, xout, 1)
    1040             :       END IF
    1041             : 
    1042             : #else
    1043             :       MARK_USED(plan)
    1044             :       MARK_USED(scale)
    1045             :       !MARK_USED does not work with assumed size arguments
    1046             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1047             :       stat = 0
    1048             : 
    1049             : #endif
    1050             : 
    1051      632582 :    END SUBROUTINE fftw33d
    1052             : 
    1053             : ! **************************************************************************************************
    1054             : 
    1055             : ! **************************************************************************************************
    1056             : !> \brief ...
    1057             : !> \param plan ...
    1058             : !> \param zin ...
    1059             : !> \param zout ...
    1060             : !> \param plan_style ...
    1061             : ! **************************************************************************************************
    1062      159232 :    SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
    1063             : 
    1064             :       IMPLICIT NONE
    1065             : 
    1066             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
    1067             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
    1068             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
    1069             :       INTEGER, INTENT(IN)                                :: plan_style
    1070             : #if defined ( __FFTW3 )
    1071             :       INTEGER                                            :: ii, di, io, DO, num_threads, num_rows
    1072             : 
    1073             :       INTEGER :: fftw_plan_type
    1074      318260 :       SELECT CASE (plan_style)
    1075             :       CASE (1)
    1076      159028 :          fftw_plan_type = FFTW_ESTIMATE
    1077             :       CASE (2)
    1078          48 :          fftw_plan_type = FFTW_MEASURE
    1079             :       CASE (3)
    1080         144 :          fftw_plan_type = FFTW_PATIENT
    1081             :       CASE (4)
    1082          12 :          fftw_plan_type = FFTW_EXHAUSTIVE
    1083             :       CASE DEFAULT
    1084      159232 :          CPABORT("fftw3_create_plan_1dm")
    1085             :       END SELECT
    1086             : 
    1087             : #if defined (__FFTW3_UNALIGNED)
    1088             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
    1089             : #endif
    1090      159232 :       num_threads = 1
    1091      159232 :       plan%separated_plans = .FALSE.
    1092             : !$OMP PARALLEL DEFAULT(NONE), &
    1093      159232 : !$OMP          SHARED(NUM_THREADS)
    1094             : !$OMP MASTER
    1095             : !$    num_threads = omp_get_num_threads()
    1096             : !$OMP END MASTER
    1097             : !$OMP END PARALLEL
    1098             : 
    1099      159232 :       num_rows = plan%m/num_threads
    1100      159232 : !$    plan%num_threads_needed = num_threads
    1101             : 
    1102             : ! Check for number of rows less than num_threads
    1103      159232 : !$    IF (plan%m < num_threads) THEN
    1104           0 : !$       num_rows = 1
    1105           0 : !$       plan%num_threads_needed = plan%m
    1106             : !$    END IF
    1107             : 
    1108             : ! Check for total number of rows not divisible by num_threads
    1109      159232 : !$    IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
    1110           0 : !$       plan%need_alt_plan = .TRUE.
    1111             : !$    END IF
    1112             : 
    1113      159232 : !$    plan%num_rows = num_rows
    1114      159232 :       ii = 1
    1115      159232 :       di = plan%n
    1116      159232 :       io = 1
    1117      159232 :       DO = plan%n
    1118      159232 :       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1119       79506 :          ii = plan%m
    1120       79506 :          di = 1
    1121       79726 :       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1122       79506 :          io = plan%m
    1123       79506 :          DO = 1
    1124             :       END IF
    1125             : 
    1126      159232 :       IF (plan%fsign == +1) THEN
    1127             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
    1128       79726 :                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
    1129             :       ELSE
    1130             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
    1131       79506 :                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1132             :       END IF
    1133             : 
    1134      159232 : !$    IF (plan%need_alt_plan) THEN
    1135           0 : !$       plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
    1136           0 : !$       IF (plan%fsign == +1) THEN
    1137             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1138           0 : !$                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
    1139             : !$       ELSE
    1140             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1141           0 : !$                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1142             : !$       END IF
    1143             : !$    END IF
    1144             : 
    1145             : #else
    1146             :       MARK_USED(plan)
    1147             :       MARK_USED(plan_style)
    1148             :       !MARK_USED does not work with assumed size arguments
    1149             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1150             : #endif
    1151             : 
    1152      159232 :    END SUBROUTINE fftw3_create_plan_1dm
    1153             : 
    1154             : ! **************************************************************************************************
    1155             : !> \brief ...
    1156             : !> \param plan ...
    1157             : ! **************************************************************************************************
    1158      212060 :    SUBROUTINE fftw3_destroy_plan(plan)
    1159             : 
    1160             :       TYPE(fft_plan_type), INTENT(INOUT)   :: plan
    1161             : 
    1162             : #if defined ( __FFTW3 )
    1163      212060 : !$    IF (plan%need_alt_plan) THEN
    1164           0 : !$       CALL fftw_destroy_plan(plan%alt_fftw_plan)
    1165             : !$    END IF
    1166             : 
    1167      212060 :       IF (.NOT. plan%separated_plans) THEN
    1168      212060 :          CALL fftw_destroy_plan(plan%fftw_plan)
    1169             :       ELSE
    1170             :          ! If it is a separated plan then we have to destroy
    1171             :          ! each dim plan individually
    1172           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx)
    1173           0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny)
    1174           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz)
    1175           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
    1176           0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
    1177           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
    1178             :       END IF
    1179             : 
    1180             : #else
    1181             :       MARK_USED(plan)
    1182             : #endif
    1183             : 
    1184      212060 :    END SUBROUTINE fftw3_destroy_plan
    1185             : 
    1186             : ! **************************************************************************************************
    1187             : !> \brief ...
    1188             : !> \param plan ...
    1189             : !> \param zin ...
    1190             : !> \param zout ...
    1191             : !> \param scale ...
    1192             : !> \param stat ...
    1193             : ! **************************************************************************************************
    1194     7973236 :    SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
    1195             :       TYPE(fft_plan_type), INTENT(IN)                    :: plan
    1196             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
    1197             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
    1198             :          TARGET                                          :: zout
    1199             :       REAL(KIND=dp), INTENT(IN)                          :: scale
    1200             :       INTEGER, INTENT(OUT)                               :: stat
    1201             : 
    1202             :       INTEGER                                            :: in_offset, my_id, num_rows, out_offset, &
    1203             :                                                             scal_offset
    1204             :       TYPE(C_PTR)                                        :: fftw_plan
    1205             : 
    1206             : !------------------------------------------------------------------------------
    1207             : 
    1208     7973236 :       my_id = 0
    1209     7973236 :       num_rows = plan%m
    1210             : 
    1211             : !$OMP PARALLEL DEFAULT(NONE), &
    1212             : !$OMP          PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
    1213             : !$OMP          SHARED(zin,zout), &
    1214     7973236 : !$OMP          SHARED(plan,scale,stat)
    1215             : !$    my_id = omp_get_thread_num()
    1216             : 
    1217             : !$    if (my_id < plan%num_threads_needed) then
    1218             : 
    1219             :          fftw_plan = plan%fftw_plan
    1220             : 
    1221             :          in_offset = 1
    1222             :          out_offset = 1
    1223             :          scal_offset = 1
    1224             : 
    1225             : !$       in_offset = 1 + plan%num_rows*my_id*plan%n
    1226             : !$       out_offset = 1 + plan%num_rows*my_id*plan%n
    1227             : !$       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1228             : !$          in_offset = 1 + plan%num_rows*my_id
    1229             : !$       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1230             : !$          out_offset = 1 + plan%num_rows*my_id
    1231             : !$       END IF
    1232             : !$       scal_offset = 1 + plan%n*plan%num_rows*my_id
    1233             : !$       IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
    1234             : !$          num_rows = plan%alt_num_rows
    1235             : !$          fftw_plan = plan%alt_fftw_plan
    1236             : !$       ELSE
    1237             : !$          num_rows = plan%num_rows
    1238             : !$       END IF
    1239             : 
    1240             : #if defined ( __FFTW3 )
    1241             : !$OMP MASTER
    1242             :          stat = 1
    1243             : !$OMP END MASTER
    1244             :          CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
    1245             : !$    end if
    1246             : ! all theads need to meet at this barrier
    1247             : !$OMP BARRIER
    1248             : !$    if (my_id < plan%num_threads_needed) then
    1249             :          IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
    1250             : !$    end if
    1251             : 
    1252             : #else
    1253             :       MARK_USED(plan)
    1254             :       MARK_USED(scale)
    1255             :       !MARK_USED does not work with assumed size arguments
    1256             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1257             :       stat = 0
    1258             : 
    1259             : !$    else
    1260             : !$    end if
    1261             : 
    1262             : #endif
    1263             : 
    1264             : !$OMP END PARALLEL
    1265             : 
    1266     7973236 :       END SUBROUTINE fftw31dm
    1267             : 
    1268           0 :    END MODULE

Generated by: LCOV version 1.15