Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : MODULE fftw3_lib
8 : USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
9 : C_CHAR, &
10 : C_DOUBLE, &
11 : C_DOUBLE_COMPLEX, &
12 : C_INT, &
13 : C_PTR
14 : #if defined(__FFTW3)
15 : USE ISO_C_BINDING, ONLY: &
16 : C_FLOAT, &
17 : C_FLOAT_COMPLEX, &
18 : C_FUNPTR, &
19 : C_INT32_T, &
20 : C_INTPTR_T, &
21 : C_LOC, &
22 : C_NULL_CHAR, &
23 : C_SIZE_T, C_F_POINTER
24 : #endif
25 : USE cp_files, ONLY: get_unit_number
26 : USE fft_kinds, ONLY: dp
27 : USE fft_plan, ONLY: fft_plan_type
28 :
29 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
30 :
31 : #include "../../base/base_uses.f90"
32 :
33 : IMPLICIT NONE
34 : PRIVATE
35 :
36 : PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
37 : PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
38 : PUBLIC :: fftw_alloc, fftw_dealloc
39 :
40 : #if defined ( __FFTW3 )
41 : #if defined ( __FFTW3_MKL )
42 : #include "fftw/fftw3.f03"
43 : #else
44 : #include "fftw3.f03"
45 : #endif
46 : #endif
47 :
48 : INTERFACE fftw_alloc
49 : MODULE PROCEDURE :: fftw_alloc_complex_1d
50 : MODULE PROCEDURE :: fftw_alloc_complex_2d
51 : MODULE PROCEDURE :: fftw_alloc_complex_3d
52 : END INTERFACE
53 :
54 : INTERFACE fftw_dealloc
55 : MODULE PROCEDURE :: fftw_dealloc_complex_1d
56 : MODULE PROCEDURE :: fftw_dealloc_complex_2d
57 : MODULE PROCEDURE :: fftw_dealloc_complex_3d
58 : END INTERFACE
59 :
60 : CONTAINS
61 :
62 : #:set maxdim = 3
63 : #:for dim in range(1, maxdim+1)
64 : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
65 : #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
66 108193 : SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
67 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
68 : INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
69 :
70 : #if defined(__FFTW3)
71 : TYPE(C_PTR) :: data_ptr
72 379426 : data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
73 379426 : CALL C_F_POINTER(data_ptr, array, n)
74 : #else
75 : ! Just allocate the array
76 : ALLOCATE (array(${dim_extended}$))
77 : #endif
78 :
79 108193 : END SUBROUTINE fftw_alloc_complex_${dim}$d
80 :
81 108193 : SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
82 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
83 :
84 : #if defined(__FFTW3)
85 108193 : CALL fftw_free(C_LOC(array))
86 108193 : NULLIFY (array)
87 : #else
88 : ! Just deallocate the array
89 : DEALLOCATE (array)
90 : #endif
91 :
92 108193 : END SUBROUTINE fftw_dealloc_complex_${dim}$d
93 : #:endfor
94 :
95 : #if defined ( __FFTW3 )
96 : ! **************************************************************************************************
97 : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
98 : ! **************************************************************************************************
99 0 : SUBROUTINE dummy_routine_to_call_mark_used()
100 : MARK_USED(FFTW_R2HC)
101 : MARK_USED(FFTW_HC2R)
102 : MARK_USED(FFTW_DHT)
103 : MARK_USED(FFTW_REDFT00)
104 : MARK_USED(FFTW_REDFT01)
105 : MARK_USED(FFTW_REDFT10)
106 : MARK_USED(FFTW_REDFT11)
107 : MARK_USED(FFTW_RODFT00)
108 : MARK_USED(FFTW_RODFT01)
109 : MARK_USED(FFTW_RODFT10)
110 : MARK_USED(FFTW_RODFT11)
111 : MARK_USED(FFTW_FORWARD)
112 : MARK_USED(FFTW_BACKWARD)
113 : MARK_USED(FFTW_MEASURE)
114 : MARK_USED(FFTW_DESTROY_INPUT)
115 : MARK_USED(FFTW_UNALIGNED)
116 : MARK_USED(FFTW_CONSERVE_MEMORY)
117 : MARK_USED(FFTW_EXHAUSTIVE)
118 : MARK_USED(FFTW_PRESERVE_INPUT)
119 : MARK_USED(FFTW_PATIENT)
120 : MARK_USED(FFTW_ESTIMATE)
121 : MARK_USED(FFTW_WISDOM_ONLY)
122 : MARK_USED(FFTW_ESTIMATE_PATIENT)
123 : MARK_USED(FFTW_BELIEVE_PCOST)
124 : MARK_USED(FFTW_NO_DFT_R2HC)
125 : MARK_USED(FFTW_NO_NONTHREADED)
126 : MARK_USED(FFTW_NO_BUFFERING)
127 : MARK_USED(FFTW_NO_INDIRECT_OP)
128 : MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
129 : MARK_USED(FFTW_NO_RANK_SPLITS)
130 : MARK_USED(FFTW_NO_VRANK_SPLITS)
131 : MARK_USED(FFTW_NO_VRECURSE)
132 : MARK_USED(FFTW_NO_SIMD)
133 : MARK_USED(FFTW_NO_SLOW)
134 : MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
135 : MARK_USED(FFTW_ALLOW_PRUNING)
136 0 : END SUBROUTINE
137 : #endif
138 :
139 : ! **************************************************************************************************
140 : !> \brief ...
141 : !> \param wisdom_file ...
142 : !> \param ionode ...
143 : ! **************************************************************************************************
144 9569 : SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
145 :
146 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
147 : LOGICAL :: ionode
148 :
149 : #if defined ( __FFTW3 )
150 9569 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
151 : INTEGER :: file_name_length, i, iunit, istat
152 : INTEGER(KIND=C_INT) :: isuccess
153 : ! Write out FFTW3 wisdom to file (if we can)
154 : ! only the ionode updates the wisdom
155 9569 : IF (ionode) THEN
156 4888 : iunit = get_unit_number()
157 : ! Check whether the file can be opened in the necessary manner
158 4888 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
159 4888 : IF (istat == 0) THEN
160 2 : CLOSE (iunit)
161 2 : file_name_length = LEN_TRIM(wisdom_file)
162 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
163 18 : DO i = 1, file_name_length
164 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
165 : END DO
166 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
167 2 : isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
168 2 : IF (isuccess == 0) &
169 : CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
170 0 : "Wisdom was not exported.")
171 : END IF
172 : END IF
173 :
174 9569 : CALL fftw_cleanup()
175 : #else
176 : MARK_USED(wisdom_file)
177 : MARK_USED(ionode)
178 : #endif
179 :
180 9569 : END SUBROUTINE
181 :
182 : ! **************************************************************************************************
183 : !> \brief ...
184 : !> \param wisdom_file ...
185 : ! **************************************************************************************************
186 9779 : SUBROUTINE fftw3_do_init(wisdom_file)
187 :
188 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
189 :
190 : #if defined ( __FFTW3 )
191 9779 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
192 : INTEGER :: file_name_length, i, istat, iunit
193 : INTEGER(KIND=C_INT) :: isuccess
194 : LOGICAL :: file_exists
195 :
196 : #if defined(__MKL)
197 : !$ LOGICAL :: mkl_is_safe
198 : #endif
199 :
200 : ! If using the Intel compiler then we need to declare
201 : ! a C interface to a global variable in MKL that sets
202 : ! the number of threads which can concurrently execute
203 : ! an FFT
204 : ! We need __INTEL_COMPILER so we can be sure that the compiler
205 : ! understands the !DEC$ version definitions
206 : #if defined (__INTEL_COMPILER) && defined (__MKL)
207 : !$ include "mkl.fi"
208 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
209 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
210 : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
211 : !$ COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
212 : !$ INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
213 : !$ BIND(c) :: /fftw3_mkl/
214 : !DEC$ ENDIF
215 : !DEC$ ENDIF
216 : #elif defined (__MKL)
217 : ! Preprocessing is enabled by default, and below header is not language specific
218 : #include <mkl_version.h>
219 : #endif
220 :
221 9779 : isuccess = fftw_init_threads()
222 9779 : IF (isuccess == 0) &
223 0 : CPABORT("Error initializing FFTW with threads")
224 :
225 : ! Read FFTW wisdom (if available)
226 : ! all nodes are opening the file here...
227 9779 : INQUIRE (FILE=wisdom_file, exist=file_exists)
228 9779 : IF (file_exists) THEN
229 2 : iunit = get_unit_number()
230 2 : file_name_length = LEN_TRIM(wisdom_file)
231 : ! Check whether the file can be opened in the necessary manner
232 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
233 2 : ACTION="READ", IOSTAT=istat)
234 2 : IF (istat == 0) THEN
235 2 : CLOSE (iunit)
236 2 : file_name_length = LEN_TRIM(wisdom_file)
237 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
238 18 : DO i = 1, file_name_length
239 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
240 : END DO
241 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
242 2 : isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
243 2 : IF (isuccess == 0) &
244 : CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
245 : "Maybe the file was created with a different configuration than CP2K is run with. "// &
246 0 : "CP2K continues without importing wisdom.")
247 : END IF
248 : END IF
249 :
250 : #if defined (__MKL)
251 : ! Now check if we have a real FFTW3 library, or are using MKL wrappers
252 :
253 : !$ IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
254 : ! If we are not using the Intel compiler, there is no way to tell which
255 : ! MKL version is in use, so fail safe...
256 : !$ mkl_is_safe = .FALSE.
257 : #if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
258 : !$ mkl_is_safe = .TRUE.
259 : #elif defined (__INTEL_COMPILER)
260 : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
261 : ! MKL version and make the appropriate action
262 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
263 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
264 : !$ mkl_dft_number_of_user_threads = omp_get_max_threads()
265 : !DEC$ ENDIF
266 : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
267 : !$ mkl_is_safe = .TRUE.
268 : !DEC$ ENDIF
269 : !DEC$ ENDIF
270 : #endif
271 : !$ IF (.NOT. mkl_is_safe) THEN
272 : !$ CALL cp_abort(__LOCATION__, "Intel's FFTW3 interface to MKL is not "// &
273 : !$ "thread-safe prior to MKL 11.1.0! Please "// &
274 : !$ "rebuild CP2K, linking against FFTW 3 from "// &
275 : !$ "www.fftw.org or a newer version of MKL. "// &
276 : !$ "Now exiting...")
277 : !$ END IF
278 : !$ END IF
279 : #endif
280 : #else
281 : MARK_USED(wisdom_file)
282 : #endif
283 :
284 9779 : END SUBROUTINE
285 :
286 : ! **************************************************************************************************
287 : !> \brief ...
288 : !> \param DATA ...
289 : !> \param max_length ...
290 : !> \par History
291 : !> JGH 23-Jan-2006 : initial version
292 : !> Adapted for new interface
293 : !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
294 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
295 : !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
296 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
297 : !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
298 : !> \author JGH
299 : ! **************************************************************************************************
300 504 : SUBROUTINE fftw3_get_lengths(DATA, max_length)
301 :
302 : INTEGER, DIMENSION(*) :: DATA
303 : INTEGER, INTENT(INOUT) :: max_length
304 :
305 : INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
306 : maxn_fives, maxn_sevens, &
307 : maxn_thirteens, maxn_threes, &
308 : maxn_twos, ndata, nmax, number
309 504 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
310 :
311 : !------------------------------------------------------------------------------
312 : ! compute ndata
313 : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
314 : !! powers of small prime numbers though...
315 :
316 504 : maxn_twos = 15
317 504 : maxn_threes = 3
318 504 : maxn_fives = 2
319 504 : maxn_sevens = 1
320 504 : maxn_elevens = 1
321 504 : maxn_thirteens = 0
322 504 : maxn = 37748736
323 :
324 504 : ndata = 0
325 8568 : DO h = 0, maxn_twos
326 8064 : nmax = HUGE(0)/2**h
327 40824 : DO i = 0, maxn_threes
328 137088 : DO j = 0, maxn_fives
329 322560 : DO k = 0, maxn_sevens
330 677376 : DO m = 0, maxn_elevens
331 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
332 :
333 387072 : IF (number > nmax) CYCLE
334 :
335 387072 : number = number*2**h
336 387072 : IF (number >= maxn) CYCLE
337 :
338 580608 : ndata = ndata + 1
339 : END DO
340 : END DO
341 : END DO
342 : END DO
343 : END DO
344 :
345 2016 : ALLOCATE (dlocal(ndata), idx(ndata))
346 :
347 504 : ndata = 0
348 371448 : dlocal(:) = 0
349 8568 : DO h = 0, maxn_twos
350 8064 : nmax = HUGE(0)/2**h
351 40824 : DO i = 0, maxn_threes
352 137088 : DO j = 0, maxn_fives
353 322560 : DO k = 0, maxn_sevens
354 677376 : DO m = 0, maxn_elevens
355 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
356 :
357 387072 : IF (number > nmax) CYCLE
358 :
359 387072 : number = number*2**h
360 387072 : IF (number >= maxn) CYCLE
361 :
362 370944 : ndata = ndata + 1
363 580608 : dlocal(ndata) = number
364 : END DO
365 : END DO
366 : END DO
367 : END DO
368 : END DO
369 :
370 504 : CALL sortint(dlocal, ndata, idx)
371 504 : ndata = MIN(ndata, max_length)
372 371448 : DATA(1:ndata) = dlocal(1:ndata)
373 504 : max_length = ndata
374 :
375 504 : DEALLOCATE (dlocal, idx)
376 :
377 504 : END SUBROUTINE fftw3_get_lengths
378 :
379 : ! **************************************************************************************************
380 : !> \brief ...
381 : !> \param iarr ...
382 : !> \param n ...
383 : !> \param index ...
384 : ! **************************************************************************************************
385 504 : SUBROUTINE sortint(iarr, n, index)
386 :
387 : INTEGER, INTENT(IN) :: n
388 : INTEGER, INTENT(INOUT) :: iarr(1:n)
389 : INTEGER, INTENT(OUT) :: INDEX(1:n)
390 :
391 : INTEGER, PARAMETER :: m = 7, nstack = 50
392 :
393 : INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
394 : j, jstack, k, l, temp
395 :
396 : !------------------------------------------------------------------------------
397 :
398 371448 : DO i = 1, n
399 371448 : INDEX(i) = i
400 : END DO
401 : jstack = 0
402 : l = 1
403 : ir = n
404 : DO WHILE (.TRUE.)
405 139608 : IF (ir - l < m) THEN
406 301392 : DO j = l + 1, ir
407 231336 : a = iarr(j)
408 231336 : ib = INDEX(j)
409 549360 : DO i = j - 1, 0, -1
410 549360 : IF (i == 0) EXIT
411 548352 : IF (iarr(i) <= a) EXIT
412 318024 : iarr(i + 1) = iarr(i)
413 549360 : INDEX(i + 1) = INDEX(i)
414 : END DO
415 231336 : iarr(i + 1) = a
416 301392 : INDEX(i + 1) = ib
417 : END DO
418 70056 : IF (jstack == 0) RETURN
419 69552 : ir = istack(jstack)
420 69552 : l = istack(jstack - 1)
421 69552 : jstack = jstack - 2
422 : ELSE
423 69552 : k = (l + ir)/2
424 69552 : temp = iarr(k)
425 69552 : iarr(k) = iarr(l + 1)
426 69552 : iarr(l + 1) = temp
427 69552 : itemp = INDEX(k)
428 69552 : INDEX(k) = INDEX(l + 1)
429 69552 : INDEX(l + 1) = itemp
430 69552 : IF (iarr(l + 1) > iarr(ir)) THEN
431 32760 : temp = iarr(l + 1)
432 32760 : iarr(l + 1) = iarr(ir)
433 32760 : iarr(ir) = temp
434 32760 : itemp = INDEX(l + 1)
435 32760 : INDEX(l + 1) = INDEX(ir)
436 32760 : INDEX(ir) = itemp
437 : END IF
438 69552 : IF (iarr(l) > iarr(ir)) THEN
439 26208 : temp = iarr(l)
440 26208 : iarr(l) = iarr(ir)
441 26208 : iarr(ir) = temp
442 26208 : itemp = INDEX(l)
443 26208 : INDEX(l) = INDEX(ir)
444 26208 : INDEX(ir) = itemp
445 : END IF
446 69552 : IF (iarr(l + 1) > iarr(l)) THEN
447 26208 : temp = iarr(l + 1)
448 26208 : iarr(l + 1) = iarr(l)
449 26208 : iarr(l) = temp
450 26208 : itemp = INDEX(l + 1)
451 26208 : INDEX(l + 1) = INDEX(l)
452 26208 : INDEX(l) = itemp
453 : END IF
454 69552 : i = l + 1
455 69552 : j = ir
456 69552 : a = iarr(l)
457 69552 : ib = INDEX(l)
458 443016 : DO WHILE (.TRUE.)
459 512568 : i = i + 1
460 1663704 : DO WHILE (iarr(i) < a)
461 1151136 : i = i + 1
462 : END DO
463 512568 : j = j - 1
464 1270080 : DO WHILE (iarr(j) > a)
465 757512 : j = j - 1
466 : END DO
467 512568 : IF (j < i) EXIT
468 443016 : temp = iarr(i)
469 443016 : iarr(i) = iarr(j)
470 443016 : iarr(j) = temp
471 443016 : itemp = INDEX(i)
472 443016 : INDEX(i) = INDEX(j)
473 443016 : INDEX(j) = itemp
474 : END DO
475 69552 : iarr(l) = iarr(j)
476 69552 : iarr(j) = a
477 69552 : INDEX(l) = INDEX(j)
478 69552 : INDEX(j) = ib
479 69552 : jstack = jstack + 2
480 69552 : IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
481 69552 : IF (ir - i + 1 >= j - l) THEN
482 35784 : istack(jstack) = ir
483 35784 : istack(jstack - 1) = i
484 35784 : ir = j - 1
485 : ELSE
486 33768 : istack(jstack) = j - 1
487 33768 : istack(jstack - 1) = l
488 33768 : l = i
489 : END IF
490 : END IF
491 :
492 : END DO
493 :
494 : END SUBROUTINE sortint
495 :
496 : ! **************************************************************************************************
497 :
498 : ! **************************************************************************************************
499 : !> \brief ...
500 : !> \param plan ...
501 : !> \param fft_rank ...
502 : !> \param dim_n ...
503 : !> \param dim_istride ...
504 : !> \param dim_ostride ...
505 : !> \param hm_rank ...
506 : !> \param hm_n ...
507 : !> \param hm_istride ...
508 : !> \param hm_ostride ...
509 : !> \param zin ...
510 : !> \param zout ...
511 : !> \param fft_direction ...
512 : !> \param fftw_plan_type ...
513 : !> \param valid ...
514 : ! **************************************************************************************************
515 55420 : SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
516 : dim_istride, dim_ostride, hm_rank, &
517 : hm_n, hm_istride, hm_ostride, &
518 : zin, zout, fft_direction, fftw_plan_type, &
519 : valid)
520 :
521 : IMPLICIT NONE
522 :
523 : TYPE(C_PTR), INTENT(INOUT) :: plan
524 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
525 : INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
526 : hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
527 : fft_direction, fftw_plan_type, hm_rank
528 : LOGICAL, INTENT(OUT) :: valid
529 :
530 : #if defined (__FFTW3)
531 : TYPE(fftw_iodim) :: dim(2), hm(2)
532 : INTEGER :: i
533 :
534 166260 : DO i = 1, 2
535 110840 : DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
536 166260 : hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
537 : END DO
538 :
539 : plan = fftw_plan_guru_dft(fft_rank, &
540 : dim, hm_rank, hm, &
541 : zin, zout, &
542 55420 : fft_direction, fftw_plan_type)
543 :
544 55420 : valid = C_ASSOCIATED(plan)
545 :
546 : #else
547 : MARK_USED(plan)
548 : MARK_USED(fft_rank)
549 : MARK_USED(dim_n)
550 : MARK_USED(dim_istride)
551 : MARK_USED(dim_ostride)
552 : MARK_USED(hm_rank)
553 : MARK_USED(hm_n)
554 : MARK_USED(hm_istride)
555 : MARK_USED(hm_ostride)
556 : MARK_USED(fft_direction)
557 : MARK_USED(fftw_plan_type)
558 : !MARK_USED does not work with assumed size arguments
559 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
560 : valid = .FALSE.
561 :
562 : #endif
563 :
564 55420 : END SUBROUTINE
565 :
566 : ! **************************************************************************************************
567 :
568 : ! **************************************************************************************************
569 : !> \brief ...
570 : !> \return ...
571 : ! **************************************************************************************************
572 55420 : FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
573 :
574 : IMPLICIT NONE
575 :
576 : LOGICAL :: is_mkl
577 : #if defined ( __FFTW3 )
578 : LOGICAL :: guru_supported
579 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
580 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
581 : TYPE(C_PTR) :: test_plan
582 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
583 :
584 : ! Attempt to create a plan with the guru interface for a 2d sub-space
585 : ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
586 : ! FFTW3 threaded 3D transform instead of the hand-optimised version
587 55420 : dim_n(1) = 1
588 55420 : dim_n(2) = 1
589 55420 : dim_istride(1) = 1
590 55420 : dim_istride(2) = 1
591 55420 : dim_ostride(1) = 1
592 55420 : dim_ostride(2) = 1
593 55420 : howmany_n(1) = 1
594 55420 : howmany_n(2) = 1
595 55420 : howmany_istride(1) = 1
596 55420 : howmany_istride(2) = 1
597 55420 : howmany_ostride(1) = 1
598 55420 : howmany_ostride(2) = 1
599 55420 : zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
600 : CALL fftw3_create_guru_plan(test_plan, 1, &
601 : dim_n, dim_istride, dim_ostride, &
602 : 2, howmany_n, howmany_istride, howmany_ostride, &
603 : zin, zin, &
604 55420 : FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
605 55420 : IF (guru_supported) THEN
606 55420 : CALL fftw_destroy_plan(test_plan)
607 55420 : is_mkl = .FALSE.
608 : ELSE
609 : is_mkl = .TRUE.
610 : END IF
611 :
612 : #else
613 : is_mkl = .FALSE.
614 : #endif
615 :
616 55420 : END FUNCTION
617 :
618 : ! **************************************************************************************************
619 :
620 : ! **************************************************************************************************
621 : !> \brief ...
622 : !> \param nrows ...
623 : !> \param nt ...
624 : !> \param rows_per_thread ...
625 : !> \param rows_per_thread_r ...
626 : !> \param th_planA ...
627 : !> \param th_planB ...
628 : ! **************************************************************************************************
629 0 : SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
630 : th_planA, th_planB)
631 :
632 : INTEGER, INTENT(IN) :: nrows, nt
633 : INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
634 : th_planA, th_planB
635 :
636 0 : IF (MOD(nrows, nt) .EQ. 0) THEN
637 0 : rows_per_thread = nrows/nt
638 0 : rows_per_thread_r = 0
639 0 : th_planA = nt
640 0 : th_planB = 0
641 : ELSE
642 0 : rows_per_thread = nrows/nt + 1
643 0 : rows_per_thread_r = nrows/nt
644 0 : th_planA = MOD(nrows, nt)
645 0 : th_planB = nt - th_planA
646 : END IF
647 :
648 0 : END SUBROUTINE
649 :
650 : ! **************************************************************************************************
651 :
652 : ! **************************************************************************************************
653 : !> \brief ...
654 : !> \param plan ...
655 : !> \param plan_r ...
656 : !> \param dim_n ...
657 : !> \param dim_istride ...
658 : !> \param dim_ostride ...
659 : !> \param hm_n ...
660 : !> \param hm_istride ...
661 : !> \param hm_ostride ...
662 : !> \param input ...
663 : !> \param output ...
664 : !> \param fft_direction ...
665 : !> \param fftw_plan_type ...
666 : !> \param rows_per_th ...
667 : !> \param rows_per_th_r ...
668 : ! **************************************************************************************************
669 0 : SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
670 : hm_n, hm_istride, hm_ostride, &
671 : input, output, &
672 : fft_direction, fftw_plan_type, rows_per_th, &
673 : rows_per_th_r)
674 :
675 : TYPE(C_PTR), INTENT(INOUT) :: plan, plan_r
676 : INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
677 : dim_ostride(2), hm_n(2), &
678 : hm_istride(2), hm_ostride(2)
679 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
680 : INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
681 : INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
682 :
683 : LOGICAL :: valid
684 :
685 : ! First plans will have an additional row
686 :
687 0 : hm_n(2) = rows_per_th
688 : CALL fftw3_create_guru_plan(plan, 1, &
689 : dim_n, dim_istride, dim_ostride, &
690 : 2, hm_n, hm_istride, hm_ostride, &
691 : input, output, &
692 0 : fft_direction, fftw_plan_type, valid)
693 :
694 0 : IF (.NOT. valid) THEN
695 0 : CPABORT("fftw3_create_plan")
696 : END IF
697 :
698 : !!!! Remainder
699 0 : hm_n(2) = rows_per_th_r
700 : CALL fftw3_create_guru_plan(plan_r, 1, &
701 : dim_n, dim_istride, dim_ostride, &
702 : 2, hm_n, hm_istride, hm_ostride, &
703 : input, output, &
704 0 : fft_direction, fftw_plan_type, valid)
705 0 : IF (.NOT. valid) THEN
706 0 : CPABORT("fftw3_create_plan (remaining)")
707 : END IF
708 :
709 0 : END SUBROUTINE
710 :
711 : ! **************************************************************************************************
712 :
713 : ! **************************************************************************************************
714 : !> \brief ...
715 : !> \param plan ...
716 : !> \param zin ...
717 : !> \param zout ...
718 : !> \param plan_style ...
719 : ! **************************************************************************************************
720 55420 : SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
721 :
722 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
723 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
724 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
725 : INTEGER :: plan_style
726 : #if defined ( __FFTW3 )
727 : INTEGER :: n1, n2, n3
728 : INTEGER :: nt
729 : INTEGER :: rows_per_th
730 : INTEGER :: rows_per_th_r
731 : INTEGER :: fft_direction
732 : INTEGER :: th_planA, th_planB
733 55420 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
734 :
735 : ! GURU Interface
736 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
737 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
738 :
739 : INTEGER :: fftw_plan_type
740 110800 : SELECT CASE (plan_style)
741 : CASE (1)
742 55380 : fftw_plan_type = FFTW_ESTIMATE
743 : CASE (2)
744 8 : fftw_plan_type = FFTW_MEASURE
745 : CASE (3)
746 24 : fftw_plan_type = FFTW_PATIENT
747 : CASE (4)
748 8 : fftw_plan_type = FFTW_EXHAUSTIVE
749 : CASE DEFAULT
750 55420 : CPABORT("fftw3_create_plan_3d")
751 : END SELECT
752 :
753 : #if defined (__FFTW3_UNALIGNED)
754 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
755 : #endif
756 :
757 55420 : IF (plan%fsign == +1) THEN
758 27710 : fft_direction = FFTW_FORWARD
759 : ELSE
760 27710 : fft_direction = FFTW_BACKWARD
761 : END IF
762 :
763 55420 : n1 = plan%n_3d(1)
764 55420 : n2 = plan%n_3d(2)
765 55420 : n3 = plan%n_3d(3)
766 :
767 55420 : nt = 1
768 55420 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
769 : !$OMP MASTER
770 : !$ nt = omp_get_num_threads()
771 : !$OMP END MASTER
772 : !$OMP END PARALLEL
773 :
774 : IF ((fftw3_is_mkl_wrapper()) .OR. &
775 55420 : (.NOT. plan_style == 1) .OR. &
776 : (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
777 : ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
778 : ! the grid size is small (and we are single-threaded) then
779 : ! FFTW3 does a better job than handmade optimization
780 : ! so plan a single 3D FFT which will execute using all the threads
781 :
782 55420 : plan%separated_plans = .FALSE.
783 55420 : !$ CALL fftw_plan_with_nthreads(nt)
784 :
785 55420 : IF (plan%fft_in_place) THEN
786 27710 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
787 : ELSE
788 27710 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
789 : END IF
790 : ELSE
791 0 : ALLOCATE (tmp(n1*n2*n3))
792 : ! ************************* PLANS WITH TRANSPOSITIONS ****************************
793 : ! In the cases described above, we manually thread each stage of the 3D FFT.
794 : !
795 : ! The following plans replace the 3D FFT call by running 1D FFTW across all
796 : ! 3 directions of the array.
797 : !
798 : ! Output of FFTW is transposed to ensure that the next round of FFTW access
799 : ! contiguous information.
800 : !
801 : ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
802 : ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
803 : ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
804 : ! will perform the final transposition. Performance evaluation showed that using an external
805 : ! DO loop to do the final transposition performed better than directly transposing the output.
806 : ! However, this might vary depending on the compiler/platform, so a potential tuning spot
807 : ! is to perform the final transposition within the fftw library rather than using the external loop
808 : ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
809 : !
810 : ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
811 : !
812 : ! OpenMP : Work is distributed on the Z plane.
813 : ! All transpositions are out-of-place to facilitate multi-threading
814 : !
815 : !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
816 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
817 0 : th_planA, th_planB)
818 :
819 0 : dim_n(1) = n1
820 0 : dim_istride(1) = 1
821 0 : dim_ostride(1) = n2
822 0 : howmany_n(1) = n2
823 0 : howmany_n(2) = rows_per_th
824 0 : howmany_istride(1) = n1
825 0 : howmany_istride(2) = n1*n2
826 0 : howmany_ostride(1) = 1
827 0 : howmany_ostride(2) = n1*n2
828 : CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
829 : dim_n, dim_istride, dim_ostride, howmany_n, &
830 : howmany_istride, howmany_ostride, &
831 : zin, tmp, &
832 : fft_direction, fftw_plan_type, rows_per_th, &
833 0 : rows_per_th_r)
834 :
835 : !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
836 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
837 0 : th_planA, th_planB)
838 0 : dim_n(1) = n2
839 : dim_istride(1) = 1
840 0 : dim_ostride(1) = n3
841 0 : howmany_n(1) = n1
842 0 : howmany_n(2) = rows_per_th
843 0 : howmany_istride(1) = n2
844 : howmany_istride(2) = n1*n2
845 : !!! transposed Z axis on output
846 0 : howmany_ostride(1) = n2*n3
847 0 : howmany_ostride(2) = 1
848 :
849 : CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
850 : dim_n, dim_istride, dim_ostride, &
851 : howmany_n, howmany_istride, howmany_ostride, &
852 : tmp, zin, &
853 : fft_direction, fftw_plan_type, rows_per_th, &
854 0 : rows_per_th_r)
855 :
856 : !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
857 : CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
858 0 : th_planA, th_planB)
859 0 : dim_n(1) = n3
860 : dim_istride(1) = 1
861 0 : dim_ostride(1) = 1 ! To transpose: n2*n1
862 0 : howmany_n(1) = n2
863 0 : howmany_n(2) = rows_per_th
864 0 : howmany_istride(1) = n3
865 0 : howmany_istride(2) = n2*n3
866 0 : howmany_ostride(1) = n3 ! To transpose: n1
867 0 : howmany_ostride(2) = n2*n3 ! To transpose: 1
868 :
869 : CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
870 : dim_n, dim_istride, dim_ostride, &
871 : howmany_n, howmany_istride, howmany_ostride, &
872 : zin, tmp, &
873 : fft_direction, fftw_plan_type, rows_per_th, &
874 0 : rows_per_th_r)
875 :
876 0 : plan%separated_plans = .TRUE.
877 :
878 0 : DEALLOCATE (tmp)
879 : END IF
880 :
881 : #else
882 : MARK_USED(plan)
883 : MARK_USED(plan_style)
884 : !MARK_USED does not work with assumed size arguments
885 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
886 : #endif
887 :
888 55420 : END SUBROUTINE fftw3_create_plan_3d
889 :
890 : ! **************************************************************************************************
891 :
892 : ! **************************************************************************************************
893 : !> \brief ...
894 : !> \param plan ...
895 : !> \param plan_r ...
896 : !> \param split_dim ...
897 : !> \param nt ...
898 : !> \param tid ...
899 : !> \param input ...
900 : !> \param istride ...
901 : !> \param output ...
902 : !> \param ostride ...
903 : ! **************************************************************************************************
904 0 : SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
905 : input, istride, output, ostride)
906 :
907 : INTEGER, INTENT(IN) :: split_dim, nt, tid
908 : INTEGER, INTENT(IN) :: istride, ostride
909 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
910 : TYPE(C_PTR) :: plan, plan_r
911 : #if defined (__FFTW3)
912 : INTEGER :: i_off, o_off
913 : INTEGER :: th_planA, th_planB
914 : INTEGER :: rows_per_thread, rows_per_thread_r
915 :
916 : CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
917 : rows_per_thread_r, &
918 0 : th_planA, th_planB)
919 :
920 0 : IF (th_planB .GT. 0) THEN
921 0 : IF (tid .LT. th_planA) THEN
922 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
923 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
924 0 : IF (rows_per_thread .GT. 0) THEN
925 : CALL fftw_execute_dft(plan, input(i_off), &
926 0 : output(o_off))
927 : END IF
928 0 : ELSE IF ((tid - th_planA) < th_planB) THEN
929 :
930 : i_off = (th_planA)*istride*(rows_per_thread) + &
931 0 : (tid - th_planA)*istride*(rows_per_thread_r) + 1
932 : o_off = (th_planA)*ostride*(rows_per_thread) + &
933 0 : (tid - th_planA)*ostride*(rows_per_thread_r) + 1
934 :
935 : CALL fftw_execute_dft(plan_r, input(i_off), &
936 0 : output(o_off))
937 : END IF
938 :
939 : ELSE
940 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
941 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
942 :
943 : CALL fftw_execute_dft(plan, input(i_off), &
944 0 : output(o_off))
945 :
946 : END IF
947 : #else
948 : MARK_USED(plan)
949 : MARK_USED(plan_r)
950 : MARK_USED(split_dim)
951 : MARK_USED(nt)
952 : MARK_USED(tid)
953 : MARK_USED(istride)
954 : MARK_USED(ostride)
955 : !MARK_USED does not work with assumed size arguments
956 : IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
957 : #endif
958 :
959 0 : END SUBROUTINE
960 :
961 : ! **************************************************************************************************
962 :
963 : ! **************************************************************************************************
964 : !> \brief ...
965 : !> \param plan ...
966 : !> \param scale ...
967 : !> \param zin ...
968 : !> \param zout ...
969 : !> \param stat ...
970 : ! **************************************************************************************************
971 633188 : SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
972 :
973 : TYPE(fft_plan_type), INTENT(IN) :: plan
974 : REAL(KIND=dp), INTENT(IN) :: scale
975 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
976 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
977 : INTEGER, INTENT(OUT) :: stat
978 : #if defined ( __FFTW3 )
979 633188 : COMPLEX(KIND=dp), POINTER :: xout(:)
980 633188 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
981 : INTEGER :: n1, n2, n3
982 : INTEGER :: tid, nt
983 : INTEGER :: i, j, k
984 :
985 633188 : n1 = plan%n_3d(1)
986 633188 : n2 = plan%n_3d(2)
987 633188 : n3 = plan%n_3d(3)
988 :
989 633188 : stat = 1
990 :
991 : ! We use a POINTER to the output array to avoid duplicating code
992 633188 : IF (plan%fft_in_place) THEN
993 633184 : xout => zin(:n1*n2*n3)
994 : ELSE
995 4 : xout => zout(:n1*n2*n3)
996 : END IF
997 :
998 : ! Either compute the full 3D FFT using a multithreaded plan
999 633188 : IF (.NOT. plan%separated_plans) THEN
1000 633188 : CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
1001 : ELSE
1002 : ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
1003 0 : ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
1004 0 : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
1005 : tid = 0
1006 : nt = 1
1007 :
1008 : !$ tid = omp_get_thread_num()
1009 : !$ nt = omp_get_num_threads()
1010 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
1011 : n3, nt, tid, &
1012 : zin, n1*n2, tmp1, n1*n2)
1013 :
1014 : !$OMP BARRIER
1015 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
1016 : n3, nt, tid, &
1017 : tmp1, n1*n2, xout, 1)
1018 : !$OMP BARRIER
1019 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
1020 : n1, nt, tid, &
1021 : xout, n2*n3, tmp1, n2*n3)
1022 : !$OMP BARRIER
1023 :
1024 : !$OMP DO COLLAPSE(3)
1025 : DO i = 1, n1
1026 : DO j = 1, n2
1027 : DO k = 1, n3
1028 : xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
1029 : tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
1030 : END DO
1031 : END DO
1032 : END DO
1033 : !$OMP END DO
1034 :
1035 : !$OMP END PARALLEL
1036 : END IF
1037 :
1038 633188 : IF (scale /= 1.0_dp) THEN
1039 300626 : CALL zdscal(n1*n2*n3, scale, xout, 1)
1040 : END IF
1041 :
1042 : #else
1043 : MARK_USED(plan)
1044 : MARK_USED(scale)
1045 : !MARK_USED does not work with assumed size arguments
1046 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1047 : stat = 0
1048 :
1049 : #endif
1050 :
1051 633188 : END SUBROUTINE fftw33d
1052 :
1053 : ! **************************************************************************************************
1054 :
1055 : ! **************************************************************************************************
1056 : !> \brief ...
1057 : !> \param plan ...
1058 : !> \param zin ...
1059 : !> \param zout ...
1060 : !> \param plan_style ...
1061 : ! **************************************************************************************************
1062 159946 : SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
1063 :
1064 : IMPLICIT NONE
1065 :
1066 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1067 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
1068 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
1069 : INTEGER, INTENT(IN) :: plan_style
1070 : #if defined ( __FFTW3 )
1071 : INTEGER :: ii, di, io, DO, num_threads, num_rows
1072 :
1073 : INTEGER :: fftw_plan_type
1074 319688 : SELECT CASE (plan_style)
1075 : CASE (1)
1076 159742 : fftw_plan_type = FFTW_ESTIMATE
1077 : CASE (2)
1078 48 : fftw_plan_type = FFTW_MEASURE
1079 : CASE (3)
1080 144 : fftw_plan_type = FFTW_PATIENT
1081 : CASE (4)
1082 12 : fftw_plan_type = FFTW_EXHAUSTIVE
1083 : CASE DEFAULT
1084 159946 : CPABORT("fftw3_create_plan_1dm")
1085 : END SELECT
1086 :
1087 : #if defined (__FFTW3_UNALIGNED)
1088 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
1089 : #endif
1090 159946 : num_threads = 1
1091 159946 : plan%separated_plans = .FALSE.
1092 : !$OMP PARALLEL DEFAULT(NONE), &
1093 159946 : !$OMP SHARED(NUM_THREADS)
1094 : !$OMP MASTER
1095 : !$ num_threads = omp_get_num_threads()
1096 : !$OMP END MASTER
1097 : !$OMP END PARALLEL
1098 :
1099 159946 : num_rows = plan%m/num_threads
1100 159946 : !$ plan%num_threads_needed = num_threads
1101 :
1102 : ! Check for number of rows less than num_threads
1103 159946 : !$ IF (plan%m < num_threads) THEN
1104 0 : !$ num_rows = 1
1105 0 : !$ plan%num_threads_needed = plan%m
1106 : !$ END IF
1107 :
1108 : ! Check for total number of rows not divisible by num_threads
1109 159946 : !$ IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
1110 0 : !$ plan%need_alt_plan = .TRUE.
1111 : !$ END IF
1112 :
1113 159946 : !$ plan%num_rows = num_rows
1114 159946 : ii = 1
1115 159946 : di = plan%n
1116 159946 : io = 1
1117 159946 : DO = plan%n
1118 159946 : IF (plan%fsign == +1 .AND. plan%trans) THEN
1119 79863 : ii = plan%m
1120 79863 : di = 1
1121 80083 : ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1122 79863 : io = plan%m
1123 79863 : DO = 1
1124 : END IF
1125 :
1126 159946 : IF (plan%fsign == +1) THEN
1127 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1128 80083 : zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1129 : ELSE
1130 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1131 79863 : zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1132 : END IF
1133 :
1134 159946 : !$ IF (plan%need_alt_plan) THEN
1135 0 : !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1136 0 : !$ IF (plan%fsign == +1) THEN
1137 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1138 0 : !$ zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1139 : !$ ELSE
1140 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1141 0 : !$ zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1142 : !$ END IF
1143 : !$ END IF
1144 :
1145 : #else
1146 : MARK_USED(plan)
1147 : MARK_USED(plan_style)
1148 : !MARK_USED does not work with assumed size arguments
1149 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1150 : #endif
1151 :
1152 159946 : END SUBROUTINE fftw3_create_plan_1dm
1153 :
1154 : ! **************************************************************************************************
1155 : !> \brief ...
1156 : !> \param plan ...
1157 : ! **************************************************************************************************
1158 215366 : SUBROUTINE fftw3_destroy_plan(plan)
1159 :
1160 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1161 :
1162 : #if defined ( __FFTW3 )
1163 215366 : !$ IF (plan%need_alt_plan) THEN
1164 0 : !$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1165 : !$ END IF
1166 :
1167 215366 : IF (.NOT. plan%separated_plans) THEN
1168 215366 : CALL fftw_destroy_plan(plan%fftw_plan)
1169 : ELSE
1170 : ! If it is a separated plan then we have to destroy
1171 : ! each dim plan individually
1172 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx)
1173 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny)
1174 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz)
1175 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1176 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1177 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1178 : END IF
1179 :
1180 : #else
1181 : MARK_USED(plan)
1182 : #endif
1183 :
1184 215366 : END SUBROUTINE fftw3_destroy_plan
1185 :
1186 : ! **************************************************************************************************
1187 : !> \brief ...
1188 : !> \param plan ...
1189 : !> \param zin ...
1190 : !> \param zout ...
1191 : !> \param scale ...
1192 : !> \param stat ...
1193 : ! **************************************************************************************************
1194 7928488 : SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1195 : TYPE(fft_plan_type), INTENT(IN) :: plan
1196 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1197 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1198 : TARGET :: zout
1199 : REAL(KIND=dp), INTENT(IN) :: scale
1200 : INTEGER, INTENT(OUT) :: stat
1201 :
1202 : INTEGER :: in_offset, my_id, num_rows, out_offset, &
1203 : scal_offset
1204 : TYPE(C_PTR) :: fftw_plan
1205 :
1206 : !------------------------------------------------------------------------------
1207 :
1208 7928488 : my_id = 0
1209 7928488 : num_rows = plan%m
1210 :
1211 : !$OMP PARALLEL DEFAULT(NONE), &
1212 : !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1213 : !$OMP SHARED(zin,zout), &
1214 7928488 : !$OMP SHARED(plan,scale,stat)
1215 : !$ my_id = omp_get_thread_num()
1216 :
1217 : !$ if (my_id < plan%num_threads_needed) then
1218 :
1219 : fftw_plan = plan%fftw_plan
1220 :
1221 : in_offset = 1
1222 : out_offset = 1
1223 : scal_offset = 1
1224 :
1225 : !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1226 : !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1227 : !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1228 : !$ in_offset = 1 + plan%num_rows*my_id
1229 : !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1230 : !$ out_offset = 1 + plan%num_rows*my_id
1231 : !$ END IF
1232 : !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1233 : !$ IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
1234 : !$ num_rows = plan%alt_num_rows
1235 : !$ fftw_plan = plan%alt_fftw_plan
1236 : !$ ELSE
1237 : !$ num_rows = plan%num_rows
1238 : !$ END IF
1239 :
1240 : #if defined ( __FFTW3 )
1241 : !$OMP MASTER
1242 : stat = 1
1243 : !$OMP END MASTER
1244 : CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1245 : !$ end if
1246 : ! all theads need to meet at this barrier
1247 : !$OMP BARRIER
1248 : !$ if (my_id < plan%num_threads_needed) then
1249 : IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1250 : !$ end if
1251 :
1252 : #else
1253 : MARK_USED(plan)
1254 : MARK_USED(scale)
1255 : !MARK_USED does not work with assumed size arguments
1256 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1257 : stat = 0
1258 :
1259 : !$ else
1260 : !$ end if
1261 :
1262 : #endif
1263 :
1264 : !$OMP END PARALLEL
1265 :
1266 7928488 : END SUBROUTINE fftw31dm
1267 :
1268 0 : END MODULE
|