Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \par History
10 : !> JGH (30-Nov-2000): ESSL FFT Library added
11 : !> JGH (05-Jan-2001): Added SGI library FFT
12 : !> JGH (14-Jan-2001): Added parallel 3d FFT
13 : !> JGH (10-Feb-2006): New interface type
14 : !> JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
15 : !> Possible problems can be related with setting arrays
16 : !> not to zero
17 : !> Some interfaces could be further simplified by avoiding
18 : !> an initial copy. However, this assumes contiguous arrays
19 : !> IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
20 : !> fft_scratch type, reducing number of calls dramatically
21 : !> IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
22 : !> IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
23 : !> IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
24 : !> IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
25 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
26 : !> HFP (17-Oct-2024): Thread-safety insurance (CPASSERT), and OMP ATOMIC (tick_fft_pool)
27 : !> \author JGH
28 : ! **************************************************************************************************
29 : MODULE fft_tools
30 : USE ISO_C_BINDING, ONLY: C_F_POINTER,&
31 : C_LOC,&
32 : C_PTR,&
33 : C_SIZE_T
34 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit
35 : USE fft_lib, ONLY: &
36 : fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
37 : fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
38 : USE fft_plan, ONLY: fft_plan_type
39 : USE kinds, ONLY: dp,&
40 : dp_size,&
41 : sp
42 : USE mathconstants, ONLY: z_zero
43 : USE message_passing, ONLY: mp_cart_type,&
44 : mp_comm_null,&
45 : mp_comm_type,&
46 : mp_request_type,&
47 : mp_waitall
48 : USE offload_api, ONLY: offload_free_pinned_mem,&
49 : offload_malloc_pinned_mem
50 :
51 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_in_parallel
52 :
53 : #include "../base/base_uses.f90"
54 :
55 : IMPLICIT NONE
56 :
57 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
58 :
59 : ! Types for the pool of scratch data needed in FFT routines
60 : ! keep the subroutine "is_equal" up-to-date
61 : ! needs a default initialization
62 : TYPE fft_scratch_sizes
63 : INTEGER :: nx = 0, ny = 0, nz = 0
64 : INTEGER :: lmax = 0, mmax = 0, nmax = 0
65 : INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
66 : INTEGER :: my1 = 0, my2 = 0, my3 = 0
67 : INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
68 : INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
69 : INTEGER :: lg = 0, mg = 0
70 : INTEGER :: nbx = 0, nbz = 0
71 : INTEGER :: nmray = 0, nyzray = 0
72 : TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73 : INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 : INTEGER :: numtask = 0
75 : END TYPE fft_scratch_sizes
76 :
77 : TYPE fft_scratch_type
78 : INTEGER :: fft_scratch_id = 0
79 : INTEGER :: tf_type = -1
80 : LOGICAL :: in_use = .TRUE.
81 : TYPE(mp_comm_type) :: group = mp_comm_type()
82 : INTEGER, DIMENSION(3) :: nfft = -1
83 : ! to be used in cube_transpose_* routines
84 : TYPE(mp_cart_type), DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85 : INTEGER, DIMENSION(2) :: dim = -1, pos = -1
86 : ! to be used in fft3d_s
87 : COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
88 : :: ziptr => NULL(), zoptr => NULL()
89 : ! to be used in fft3d_ps : block distribution
90 : COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
91 : :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
92 : p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
93 : ! to be used in fft3d_ps : plane distribution
94 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
95 : :: r1buf => NULL(), r2buf => NULL()
96 : COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
97 : :: tbuf => NULL()
98 : ! to be used in fft3d_pb
99 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
100 : :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
101 : a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
102 : ! to be used in communication routines
103 : INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
104 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => NULL()
105 : INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
106 : INTEGER :: in = 0, mip = -1
107 : REAL(KIND=dp) :: rsratio = 1.0_dp
108 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
109 : :: xzbuf => NULL(), yzbuf => NULL()
110 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
111 : :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
112 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
113 : :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
114 : rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
115 : COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
116 : :: ss => NULL(), tt => NULL()
117 : INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => NULL()
118 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
119 : TYPE(fft_scratch_sizes) :: sizes = fft_scratch_sizes()
120 : TYPE(fft_plan_type), DIMENSION(6) :: fft_plan = fft_plan_type()
121 : INTEGER :: last_tick = -1
122 : END TYPE fft_scratch_type
123 :
124 : TYPE fft_scratch_pool_type
125 : TYPE(fft_scratch_type), POINTER :: fft_scratch => NULL()
126 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_next => NULL()
127 : END TYPE fft_scratch_pool_type
128 :
129 : INTEGER, SAVE :: init_fft_pool = 0
130 : ! the clock for fft pool. Allows to identify the least recently used scratch
131 : INTEGER, SAVE :: tick_fft_pool = 0
132 : ! limit the number of scratch pools to fft_pool_scratch_limit.
133 : INTEGER, SAVE :: fft_pool_scratch_limit = 15
134 : TYPE(fft_scratch_pool_type), POINTER, SAVE :: fft_scratch_first
135 : ! END of types for the pool of scratch data needed in FFT routines
136 :
137 : PRIVATE
138 : PUBLIC :: init_fft, fft3d, finalize_fft
139 : PUBLIC :: fft_alloc, fft_dealloc
140 : PUBLIC :: init_fft_scratch_pool
141 : PUBLIC :: fft_radix_operations, fft_fw1d
142 : PUBLIC :: FWFFT, BWFFT
143 : PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
144 : PUBLIC :: FFT_RADIX_NEXT_ODD
145 :
146 : INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
147 : INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
148 : INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
149 : INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
150 :
151 : REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
152 :
153 : ! these saved variables are FFT globals
154 : INTEGER, SAVE :: fft_type = 0
155 : LOGICAL, SAVE :: alltoall_sgl = .FALSE.
156 : LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
157 : INTEGER, SAVE :: fft_plan_style = 1
158 :
159 : ! these are only needed for pw_gpu (-D__OFFLOAD)
160 : PUBLIC :: get_fft_scratch, release_fft_scratch
161 : PUBLIC :: cube_transpose_1, cube_transpose_2
162 : PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
163 : PUBLIC :: fft_scratch_sizes, fft_scratch_type
164 : PUBLIC :: fft_type, fft_plan_style
165 :
166 : INTERFACE fft3d
167 : MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
168 : END INTERFACE
169 :
170 : ! **************************************************************************************************
171 :
172 : CONTAINS
173 :
174 : ! **************************************************************************************************
175 : !> \brief ...
176 : !> \param fftlib ...
177 : !> \param alltoall ...
178 : !> \param fftsg_sizes ...
179 : !> \param pool_limit ...
180 : !> \param wisdom_file ...
181 : !> \param plan_style ...
182 : !> \author JGH
183 : ! **************************************************************************************************
184 9143 : SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
185 : plan_style)
186 :
187 : CHARACTER(LEN=*), INTENT(IN) :: fftlib
188 : LOGICAL, INTENT(IN) :: alltoall, fftsg_sizes
189 : INTEGER, INTENT(IN) :: pool_limit
190 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
191 : INTEGER, INTENT(IN) :: plan_style
192 :
193 9143 : use_fftsg_sizes = fftsg_sizes
194 9143 : alltoall_sgl = alltoall
195 9143 : fft_pool_scratch_limit = pool_limit
196 9143 : fft_type = fft_library(fftlib)
197 9143 : fft_plan_style = plan_style
198 :
199 9143 : IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
200 :
201 9143 : CALL fft_do_init(fft_type, wisdom_file)
202 :
203 : ! setup the FFT scratch pool, if one is associated, clear first
204 9143 : CALL init_fft_scratch_pool() ! CALLs release_fft_scratch_pool()
205 :
206 9143 : END SUBROUTINE init_fft
207 :
208 : ! **************************************************************************************************
209 : !> \brief does whatever is needed to finalize the current fft setup
210 : !> \param para_env ...
211 : !> \param wisdom_file ...
212 : !> \par History
213 : !> 10.2007 created [Joost VandeVondele]
214 : ! **************************************************************************************************
215 8933 : SUBROUTINE finalize_fft(para_env, wisdom_file)
216 : CLASS(mp_comm_type) :: para_env
217 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
218 :
219 : ! release the FFT scratch pool
220 :
221 8933 : CALL release_fft_scratch_pool()
222 :
223 : ! finalize fft libs
224 :
225 8933 : CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
226 :
227 8933 : END SUBROUTINE finalize_fft
228 :
229 : ! **************************************************************************************************
230 : !> \brief Determine the allowed lengths of FFT's '''
231 : !> \param radix_in ...
232 : !> \param radix_out ...
233 : !> \param operation ...
234 : !> \par History
235 : !> new library structure (JGH)
236 : !> \author Ari Seitsonen
237 : ! **************************************************************************************************
238 110726 : SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
239 :
240 : INTEGER, INTENT(IN) :: radix_in
241 : INTEGER, INTENT(OUT) :: radix_out
242 : INTEGER, INTENT(IN) :: operation
243 :
244 : INTEGER, PARAMETER :: fft_type_sg = 1
245 :
246 : INTEGER :: i, iloc, ldata
247 110726 : INTEGER, ALLOCATABLE, DIMENSION(:) :: DATA
248 :
249 110726 : ldata = 1024
250 110726 : ALLOCATE (DATA(ldata))
251 113494150 : DATA = -1
252 :
253 : ! if the user wants to use fftsg sizes go for it
254 110726 : IF (use_fftsg_sizes) THEN
255 110222 : CALL fft_get_lengths(fft_type_sg, DATA, ldata)
256 : ELSE
257 504 : CALL fft_get_lengths(fft_type, DATA, ldata)
258 : END IF
259 :
260 110726 : iloc = 0
261 1222242 : DO i = 1, ldata
262 1222242 : IF (DATA(i) == radix_in) THEN
263 : iloc = i
264 : EXIT
265 : ELSE
266 1192150 : IF (OPERATION == FFT_RADIX_ALLOWED) THEN
267 : CYCLE
268 1192150 : ELSE IF (DATA(i) > radix_in) THEN
269 : iloc = i
270 : EXIT
271 : END IF
272 : END IF
273 : END DO
274 :
275 110726 : IF (iloc == 0) THEN
276 0 : CPABORT("Index to radix array not found.")
277 : END IF
278 :
279 110726 : IF (OPERATION == FFT_RADIX_ALLOWED) THEN
280 0 : IF (DATA(iloc) == radix_in) THEN
281 0 : radix_out = FFT_RADIX_ALLOWED
282 : ELSE
283 0 : radix_out = FFT_RADIX_DISALLOWED
284 : END IF
285 :
286 110726 : ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
287 270 : IF (DATA(iloc) == radix_in) THEN
288 102 : radix_out = DATA(iloc)
289 : ELSE
290 168 : IF (ABS(DATA(iloc - 1) - radix_in) <= &
291 : ABS(DATA(iloc) - radix_in)) THEN
292 162 : radix_out = DATA(iloc - 1)
293 : ELSE
294 6 : radix_out = DATA(iloc)
295 : END IF
296 : END IF
297 :
298 110456 : ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
299 108884 : radix_out = DATA(iloc)
300 :
301 1572 : ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
302 3562 : DO i = iloc, ldata
303 3562 : IF (MOD(DATA(i), 2) == 1) THEN
304 1572 : radix_out = DATA(i)
305 1572 : EXIT
306 : END IF
307 : END DO
308 1572 : IF (MOD(radix_out, 2) == 0) THEN
309 0 : CPABORT("No odd radix found.")
310 : END IF
311 :
312 : ELSE
313 0 : CPABORT("Disallowed radix operation.")
314 : END IF
315 :
316 110726 : DEALLOCATE (DATA)
317 :
318 110726 : END SUBROUTINE fft_radix_operations
319 :
320 : ! **************************************************************************************************
321 : !> \brief Performs m 1-D forward FFT-s of size n.
322 : !> \param n size of the FFT
323 : !> \param m number of FFT-s
324 : !> \param trans shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
325 : !> \param zin input array
326 : !> \param zout output array
327 : !> \param scale scaling factor
328 : !> \param stat status of the operation, non-zero code indicates an error
329 : ! **************************************************************************************************
330 208 : SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 : INTEGER, INTENT(in) :: n, m
332 : LOGICAL, INTENT(in) :: trans
333 : COMPLEX(kind=dp), DIMENSION(*), INTENT(inout) :: zin, zout
334 : REAL(kind=dp), INTENT(in) :: scale
335 : INTEGER, INTENT(out) :: stat
336 :
337 : CHARACTER(len=*), PARAMETER :: routineN = 'fft_fw1d'
338 :
339 : INTEGER :: handle
340 : TYPE(fft_plan_type) :: fft_plan
341 :
342 208 : CALL timeset(routineN, handle)
343 :
344 208 : IF (fft_type == 3) THEN
345 208 : CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
346 208 : CALL fft_1dm(fft_plan, zin, zout, scale, stat)
347 208 : CALL fft_destroy_plan(fft_plan)
348 : ELSE
349 : CALL cp_warn(__LOCATION__, &
350 0 : "FFT library in use cannot handle transformation of an arbitrary length.")
351 0 : stat = 1
352 : END IF
353 :
354 208 : CALL timestop(handle)
355 832 : END SUBROUTINE fft_fw1d
356 :
357 : ! **************************************************************************************************
358 : !> \brief Calls the 3D-FFT function from the initialized library
359 : !> \param fsign ...
360 : !> \param n ...
361 : !> \param zin ...
362 : !> \param zout ...
363 : !> \param status ...
364 : !> \param debug ...
365 : !> \par History
366 : !> none
367 : !> \author JGH
368 : ! **************************************************************************************************
369 635466 : SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
370 :
371 : INTEGER, INTENT(IN) :: fsign
372 : INTEGER, DIMENSION(:), INTENT(INOUT) :: n
373 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
374 : INTENT(INOUT) :: zin
375 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
376 : INTENT(INOUT), OPTIONAL, TARGET :: zout
377 : INTEGER, INTENT(OUT), OPTIONAL :: status
378 : LOGICAL, INTENT(IN), OPTIONAL :: debug
379 :
380 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_s'
381 :
382 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
383 635466 : POINTER :: zoptr
384 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET :: zdum
385 : INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
386 : stat
387 : LOGICAL :: fft_in_place, test
388 : REAL(KIND=dp) :: in_sum, norm, out_sum
389 : TYPE(fft_scratch_type), POINTER :: fft_scratch
390 :
391 635466 : CALL timeset(routineN, handle)
392 635466 : output_unit = cp_logger_get_default_io_unit()
393 :
394 635466 : IF (fsign == FWFFT) THEN
395 1205800 : norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
396 334016 : ELSE IF (fsign == BWFFT) THEN
397 334016 : norm = 1.0_dp
398 : ELSE
399 0 : CPABORT("Unknown FFT direction!")
400 : END IF
401 :
402 635466 : IF (PRESENT(debug)) THEN
403 598617 : test = debug
404 : ELSE
405 : test = .FALSE.
406 : END IF
407 :
408 635466 : IF (PRESENT(zout)) THEN
409 : fft_in_place = .FALSE.
410 : ELSE
411 635458 : fft_in_place = .TRUE.
412 : END IF
413 :
414 635466 : IF (test) THEN
415 0 : in_sum = SUM(ABS(zin))
416 : END IF
417 :
418 635466 : ld(1) = SIZE(zin, 1)
419 635466 : ld(2) = SIZE(zin, 2)
420 635466 : ld(3) = SIZE(zin, 3)
421 :
422 635466 : IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
423 0 : CPABORT("Size and dimension (zin) have to be the same.")
424 : END IF
425 :
426 635466 : sign = fsign
427 635466 : CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
428 :
429 635466 : IF (fft_in_place) THEN
430 635458 : zoptr => zdum
431 635458 : IF (fsign == FWFFT) THEN
432 301446 : CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
433 : ELSE
434 334012 : CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
435 : END IF
436 : ELSE
437 8 : IF (fsign == FWFFT) THEN
438 4 : CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
439 : ELSE
440 4 : CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
441 : END IF
442 : END IF
443 :
444 635466 : CALL release_fft_scratch(fft_scratch)
445 :
446 635466 : IF (PRESENT(zout)) THEN
447 8 : lo(1) = SIZE(zout, 1)
448 8 : lo(2) = SIZE(zout, 2)
449 8 : lo(3) = SIZE(zout, 3)
450 8 : IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
451 0 : CPABORT("Size and dimension (zout) have to be the same.")
452 : END IF
453 : END IF
454 :
455 635466 : IF (PRESENT(status)) THEN
456 9131 : status = stat
457 : END IF
458 :
459 635466 : IF (test .AND. output_unit > 0) THEN
460 0 : IF (PRESENT(zout)) THEN
461 0 : out_sum = SUM(ABS(zout))
462 0 : WRITE (output_unit, '(A)') " Out of place 3D FFT (local) : fft3d_s"
463 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
464 0 : WRITE (output_unit, '(A,T60,3I7)') " Input array dimensions ", ld
465 0 : WRITE (output_unit, '(A,T60,3I7)') " Output array dimensions ", lo
466 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
467 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
468 : ELSE
469 0 : out_sum = SUM(ABS(zin))
470 0 : WRITE (output_unit, '(A)') " In place 3D FFT (local) : fft3d_s"
471 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
472 0 : WRITE (output_unit, '(A,T60,3I7)') " Input/output array dimensions ", ld
473 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
474 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
475 : END IF
476 : END IF
477 :
478 635466 : CALL timestop(handle)
479 :
480 635466 : END SUBROUTINE fft3d_s
481 :
482 : ! **************************************************************************************************
483 : !> \brief ...
484 : !> \param fsign ...
485 : !> \param n ...
486 : !> \param cin ...
487 : !> \param gin ...
488 : !> \param rs_group ...
489 : !> \param yzp ...
490 : !> \param nyzray ...
491 : !> \param bo ...
492 : !> \param status ...
493 : !> \param debug ...
494 : ! **************************************************************************************************
495 2663210 : SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
496 2663210 : bo, status, debug)
497 :
498 : INTEGER, INTENT(IN) :: fsign
499 : INTEGER, DIMENSION(:), INTENT(IN) :: n
500 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
501 : INTENT(INOUT) :: cin
502 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
503 : INTENT(INOUT) :: gin
504 : TYPE(mp_cart_type), INTENT(IN) :: rs_group
505 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
506 : INTENT(IN) :: yzp
507 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nyzray
508 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
509 : INTENT(IN) :: bo
510 : INTEGER, INTENT(OUT), OPTIONAL :: status
511 : LOGICAL, INTENT(IN), OPTIONAL :: debug
512 :
513 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_ps'
514 :
515 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
516 2663210 : POINTER :: pbuf, qbuf, rbuf, sbuf
517 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
518 2663210 : POINTER :: tbuf
519 : INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
520 : nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
521 2663210 : INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p
522 : LOGICAL :: test
523 : REAL(KIND=dp) :: norm, sum_data
524 18642470 : TYPE(fft_scratch_sizes) :: fft_scratch_size
525 : TYPE(fft_scratch_type), POINTER :: fft_scratch
526 :
527 2663210 : CALL timeset(routineN, handle)
528 2663210 : output_unit = cp_logger_get_default_io_unit()
529 :
530 2663210 : IF (PRESENT(debug)) THEN
531 2663210 : test = debug
532 : ELSE
533 : test = .FALSE.
534 : END IF
535 :
536 2663210 : g_pos = rs_group%mepos
537 2663210 : numtask = rs_group%num_pe
538 7989630 : r_dim = rs_group%num_pe_cart
539 7989630 : r_pos = rs_group%mepos_cart
540 :
541 2663210 : IF (fsign == FWFFT) THEN
542 5266936 : norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
543 1346476 : ELSE IF (fsign == BWFFT) THEN
544 1346476 : norm = 1.0_dp
545 : ELSE
546 0 : CPABORT("Unknown FFT direction!")
547 : END IF
548 :
549 2663210 : sign = fsign
550 :
551 2663210 : lg = SIZE(gin, 1)
552 2663210 : mg = SIZE(gin, 2)
553 :
554 2663210 : nx = SIZE(cin, 1)
555 2663210 : ny = SIZE(cin, 2)
556 2663210 : nz = SIZE(cin, 3)
557 :
558 2663210 : IF (mg == 0) THEN
559 : mmax = 1
560 : ELSE
561 2663210 : mmax = mg
562 : END IF
563 2663210 : lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
564 :
565 7989630 : ALLOCATE (p2p(0:numtask - 1))
566 :
567 2663210 : CALL rs_group%rank_compare(rs_group, p2p)
568 :
569 2663210 : rp = p2p(g_pos)
570 2663210 : mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
571 2663210 : my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
572 2663210 : mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
573 2663210 : mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
574 :
575 7989630 : n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
576 7989630 : n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
577 2663210 : nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
578 7989630 : nmax = MAX(nmax, n1*MAXVAL(nyzray))
579 7989630 : n1 = MAXVAL(bo(2, 1, :, 2))
580 7989630 : n2 = MAXVAL(bo(2, 3, :, 2))
581 :
582 2663210 : fft_scratch_size%nx = nx
583 2663210 : fft_scratch_size%ny = ny
584 2663210 : fft_scratch_size%nz = nz
585 2663210 : fft_scratch_size%lmax = lmax
586 2663210 : fft_scratch_size%mmax = mmax
587 2663210 : fft_scratch_size%mx1 = mx1
588 2663210 : fft_scratch_size%mx2 = mx2
589 2663210 : fft_scratch_size%my1 = my1
590 2663210 : fft_scratch_size%mz2 = mz2
591 2663210 : fft_scratch_size%lg = lg
592 2663210 : fft_scratch_size%mg = mg
593 2663210 : fft_scratch_size%nbx = n1
594 2663210 : fft_scratch_size%nbz = n2
595 7989630 : mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
596 7989630 : mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
597 7989630 : mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
598 2663210 : fft_scratch_size%mcz1 = mcz1
599 2663210 : fft_scratch_size%mcx2 = mcx2
600 2663210 : fft_scratch_size%mcz2 = mcz2
601 2663210 : fft_scratch_size%nmax = nmax
602 7989630 : fft_scratch_size%nmray = MAXVAL(nyzray)
603 2663210 : fft_scratch_size%nyzray = nyzray(g_pos)
604 2663210 : fft_scratch_size%rs_group = rs_group
605 7989630 : fft_scratch_size%g_pos = g_pos
606 7989630 : fft_scratch_size%r_pos = r_pos
607 7989630 : fft_scratch_size%r_dim = r_dim
608 2663210 : fft_scratch_size%numtask = numtask
609 :
610 2663210 : IF (test) THEN
611 8 : IF (g_pos == 0 .AND. output_unit > 0) THEN
612 4 : WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_ps"
613 4 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
614 4 : WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg, mg
615 4 : WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", nx, ny, nz
616 : END IF
617 : END IF
618 :
619 2663210 : IF (r_dim(2) > 1) THEN
620 :
621 : !
622 : ! real space is distributed over x and y coordinate
623 : ! we have two stages of communication
624 : !
625 :
626 0 : IF (r_dim(1) == 1) THEN
627 0 : CPABORT("This processor distribution is not supported.")
628 : END IF
629 0 : CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
630 :
631 0 : IF (sign == FWFFT) THEN
632 : ! cin -> gin
633 :
634 0 : IF (test) THEN
635 0 : sum_data = ABS(SUM(cin))
636 0 : CALL rs_group%sum(sum_data)
637 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
638 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
639 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
640 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
641 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
642 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
643 : END IF
644 : END IF
645 :
646 0 : pbuf => fft_scratch%p1buf
647 0 : qbuf => fft_scratch%p2buf
648 :
649 : ! FFT along z
650 0 : CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
651 :
652 0 : rbuf => fft_scratch%p3buf
653 :
654 0 : IF (test) THEN
655 0 : sum_data = ABS(SUM(qbuf))
656 0 : CALL rs_group%sum(sum_data)
657 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
658 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
659 : END IF
660 : END IF
661 :
662 : ! Exchange data ( transpose of matrix )
663 0 : CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
664 :
665 0 : IF (test) THEN
666 0 : sum_data = ABS(SUM(rbuf))
667 0 : CALL rs_group%sum(sum_data)
668 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
669 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data
670 : END IF
671 : END IF
672 :
673 0 : pbuf => fft_scratch%p4buf
674 :
675 : ! FFT along y
676 0 : CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
677 :
678 0 : qbuf => fft_scratch%p5buf
679 :
680 0 : IF (test) THEN
681 0 : sum_data = ABS(SUM(pbuf))
682 0 : CALL rs_group%sum(sum_data)
683 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
684 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data
685 : END IF
686 : END IF
687 :
688 : ! Exchange data ( transpose of matrix ) and sort
689 : CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
690 0 : bo(:, :, :, 2), qbuf, fft_scratch)
691 :
692 0 : IF (test) THEN
693 0 : sum_data = ABS(SUM(qbuf))
694 0 : CALL rs_group%sum(sum_data)
695 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
696 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data
697 : END IF
698 : END IF
699 :
700 : ! FFT along x
701 0 : CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
702 :
703 0 : IF (test) THEN
704 0 : sum_data = ABS(SUM(gin))
705 0 : CALL rs_group%sum(sum_data)
706 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
707 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
708 : END IF
709 : END IF
710 :
711 0 : ELSE IF (sign == BWFFT) THEN
712 : ! gin -> cin
713 :
714 0 : IF (test) THEN
715 0 : sum_data = ABS(SUM(gin))
716 0 : CALL rs_group%sum(sum_data)
717 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
718 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
719 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
720 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
721 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
722 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
723 : END IF
724 : END IF
725 :
726 0 : pbuf => fft_scratch%p7buf
727 :
728 : ! FFT along x
729 0 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
730 :
731 0 : qbuf => fft_scratch%p4buf
732 :
733 0 : IF (test) THEN
734 0 : sum_data = ABS(SUM(pbuf))
735 0 : CALL rs_group%sum(sum_data)
736 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
737 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
738 : END IF
739 : END IF
740 :
741 : ! Exchange data ( transpose of matrix ) and sort
742 : CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
743 0 : bo(:, :, :, 2), qbuf, fft_scratch)
744 :
745 0 : IF (test) THEN
746 0 : sum_data = ABS(SUM(qbuf))
747 0 : CALL rs_group%sum(sum_data)
748 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
749 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
750 : END IF
751 : END IF
752 :
753 0 : rbuf => fft_scratch%p3buf
754 :
755 : ! FFT along y
756 0 : CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
757 :
758 0 : pbuf => fft_scratch%p2buf
759 :
760 0 : IF (test) THEN
761 0 : sum_data = ABS(SUM(rbuf))
762 0 : CALL rs_group%sum(sum_data)
763 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
764 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
765 : END IF
766 : END IF
767 :
768 : ! Exchange data ( transpose of matrix )
769 0 : CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
770 :
771 0 : IF (test) THEN
772 0 : sum_data = ABS(SUM(pbuf))
773 0 : CALL rs_group%sum(sum_data)
774 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
775 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data
776 : END IF
777 : END IF
778 :
779 0 : qbuf => fft_scratch%p1buf
780 :
781 : ! FFT along z
782 0 : CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
783 :
784 0 : IF (test) THEN
785 0 : sum_data = ABS(SUM(cin))
786 0 : CALL rs_group%sum(sum_data)
787 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
788 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
789 : END IF
790 : END IF
791 :
792 : ELSE
793 :
794 0 : CPABORT("Illegal fsign parameter.")
795 :
796 : END IF
797 :
798 0 : CALL release_fft_scratch(fft_scratch)
799 :
800 : ELSE
801 :
802 : !
803 : ! real space is only distributed over x coordinate
804 : ! we have one stage of communication, after the transform of
805 : ! direction x
806 : !
807 :
808 2663210 : CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
809 :
810 2663210 : sbuf => fft_scratch%r1buf
811 2663210 : tbuf => fft_scratch%tbuf
812 :
813 80737581213 : sbuf = z_zero
814 81119098863 : tbuf = z_zero
815 :
816 2663210 : IF (sign == FWFFT) THEN
817 : ! cin -> gin
818 :
819 1316734 : IF (test) THEN
820 9284 : sum_data = ABS(SUM(cin))
821 4 : CALL rs_group%sum(sum_data)
822 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
823 2 : WRITE (output_unit, '(A)') " One step communication algorithm "
824 2 : WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
825 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
826 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
827 : END IF
828 : END IF
829 :
830 : ! FFT along y and z
831 1316734 : CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
832 1316734 : CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
833 :
834 1316734 : IF (test) THEN
835 8740 : sum_data = ABS(SUM(tbuf))
836 4 : CALL rs_group%sum(sum_data)
837 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
838 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
839 : END IF
840 : END IF
841 :
842 : ! Exchange data ( transpose of matrix ) and sort
843 : CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
844 1316734 : bo(:, :, :, 2), sbuf, fft_scratch)
845 :
846 1316734 : IF (test) THEN
847 8776 : sum_data = ABS(SUM(sbuf))
848 4 : CALL rs_group%sum(sum_data)
849 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
850 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
851 : END IF
852 : END IF
853 : ! FFT along x
854 1316734 : CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
855 :
856 1316734 : IF (test) THEN
857 8708 : sum_data = ABS(SUM(gin))
858 4 : CALL rs_group%sum(sum_data)
859 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
860 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
861 : END IF
862 : END IF
863 :
864 1346476 : ELSE IF (sign == BWFFT) THEN
865 : ! gin -> cin
866 :
867 1346476 : IF (test) THEN
868 8708 : sum_data = ABS(SUM(gin))
869 4 : CALL rs_group%sum(sum_data)
870 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
871 2 : WRITE (output_unit, '(A)') " One step communication algorithm "
872 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
873 2 : WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
874 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
875 : END IF
876 : END IF
877 :
878 : ! FFT along x
879 1346476 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
880 :
881 1346476 : IF (test) THEN
882 8776 : sum_data = ABS(SUM(sbuf))
883 4 : CALL rs_group%sum(sum_data)
884 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
885 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
886 : END IF
887 : END IF
888 :
889 : ! Exchange data ( transpose of matrix ) and sort
890 : CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
891 1346476 : bo(:, :, :, 2), tbuf, fft_scratch)
892 :
893 1346476 : IF (test) THEN
894 8740 : sum_data = ABS(SUM(tbuf))
895 4 : CALL rs_group%sum(sum_data)
896 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
897 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
898 : END IF
899 : END IF
900 :
901 : ! FFT along y and z
902 1346476 : CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
903 1346476 : CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
904 :
905 1346476 : IF (test) THEN
906 9284 : sum_data = ABS(SUM(cin))
907 4 : CALL rs_group%sum(sum_data)
908 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
909 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
910 : END IF
911 : END IF
912 : ELSE
913 0 : CPABORT("Illegal fsign parameter.")
914 : END IF
915 :
916 2663210 : CALL release_fft_scratch(fft_scratch)
917 :
918 : END IF
919 :
920 2663210 : DEALLOCATE (p2p)
921 :
922 2663210 : IF (PRESENT(status)) THEN
923 0 : status = stat
924 : END IF
925 2663210 : CALL timestop(handle)
926 :
927 5326420 : END SUBROUTINE fft3d_ps
928 :
929 : ! **************************************************************************************************
930 : !> \brief ...
931 : !> \param fsign ...
932 : !> \param n ...
933 : !> \param zin ...
934 : !> \param gin ...
935 : !> \param group ...
936 : !> \param bo ...
937 : !> \param status ...
938 : !> \param debug ...
939 : ! **************************************************************************************************
940 208 : SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
941 :
942 : INTEGER, INTENT(IN) :: fsign
943 : INTEGER, DIMENSION(3), INTENT(IN) :: n
944 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
945 : INTENT(INOUT) :: zin
946 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
947 : INTENT(INOUT) :: gin
948 : TYPE(mp_cart_type), INTENT(IN) :: group
949 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
950 : INTENT(IN) :: bo
951 : INTEGER, INTENT(OUT), OPTIONAL :: status
952 : LOGICAL, INTENT(IN), OPTIONAL :: debug
953 :
954 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_pb'
955 :
956 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
957 208 : POINTER :: abuf, bbuf
958 : INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
959 : mcz2, mx1, mx2, mx3, my1, my2, my3, &
960 : my_pos, mz1, mz2, mz3, output_unit, &
961 : sign, stat
962 : INTEGER, DIMENSION(2) :: dim
963 : LOGICAL :: test
964 : REAL(KIND=dp) :: norm, sum_data
965 1456 : TYPE(fft_scratch_sizes) :: fft_scratch_size
966 : TYPE(fft_scratch_type), POINTER :: fft_scratch
967 :
968 : !------------------------------------------------------------------------------
969 : ! "Real Space" 1) xyZ or 1) xYZ
970 : ! 2) xYz or not used
971 : ! "G Space" 3) Xyz or 3) XYz
972 : !
973 : ! There is one communicator (2-dimensional) for all distributions
974 : ! np = n1 * n2, where np is the total number of processors
975 : ! If n2 = 1, we have the second case and only one transpose step is needed
976 : !
977 : ! Assignment of dimensions to axis for different steps
978 : ! First case: 1) n1=x; n2=y
979 : ! 2) n1=x; n2=z
980 : ! 3) n1=y; n2=z
981 : ! Second case 1) n1=x
982 : ! 3) n1=z
983 : !
984 : ! The more general case with two communicators for the initial and final
985 : ! distribution is not covered.
986 : !------------------------------------------------------------------------------
987 :
988 208 : CALL timeset(routineN, handle)
989 208 : output_unit = cp_logger_get_default_io_unit()
990 :
991 624 : dim = group%num_pe_cart
992 208 : my_pos = group%mepos
993 :
994 208 : IF (PRESENT(debug)) THEN
995 208 : test = debug
996 : ELSE
997 : test = .FALSE.
998 : END IF
999 :
1000 208 : IF (fsign == FWFFT) THEN
1001 416 : norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
1002 104 : ELSE IF (fsign == BWFFT) THEN
1003 104 : norm = 1.0_dp
1004 : ELSE
1005 0 : CPABORT("Unknown FFT direction!")
1006 : END IF
1007 :
1008 208 : sign = fsign
1009 :
1010 208 : IF (test) THEN
1011 8 : lg(1) = SIZE(gin, 1)
1012 8 : lg(2) = SIZE(gin, 2)
1013 8 : lz(1) = SIZE(zin, 1)
1014 8 : lz(2) = SIZE(zin, 2)
1015 8 : lz(3) = SIZE(zin, 3)
1016 8 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1017 4 : WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_pb"
1018 4 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
1019 4 : WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg
1020 4 : WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", lz
1021 : END IF
1022 : END IF
1023 :
1024 208 : mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1025 208 : my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1026 208 : mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1027 208 : mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1028 208 : my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1029 208 : mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1030 208 : mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1031 208 : my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1032 208 : mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1033 208 : fft_scratch_size%mx1 = mx1
1034 208 : fft_scratch_size%mx2 = mx2
1035 208 : fft_scratch_size%mx3 = mx3
1036 208 : fft_scratch_size%my1 = my1
1037 208 : fft_scratch_size%my2 = my2
1038 208 : fft_scratch_size%my3 = my3
1039 208 : fft_scratch_size%mz1 = mz1
1040 208 : fft_scratch_size%mz2 = mz2
1041 208 : fft_scratch_size%mz3 = mz3
1042 624 : mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1043 624 : mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1044 624 : mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1045 624 : mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1046 208 : fft_scratch_size%mcz1 = mcz1
1047 208 : fft_scratch_size%mcx2 = mcx2
1048 208 : fft_scratch_size%mcz2 = mcz2
1049 208 : fft_scratch_size%mcy3 = mcy3
1050 208 : fft_scratch_size%rs_group = group
1051 624 : fft_scratch_size%g_pos = my_pos
1052 208 : fft_scratch_size%numtask = DIM(1)*DIM(2)
1053 :
1054 208 : IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
1055 :
1056 : !
1057 : ! First case; two stages of communication
1058 : !
1059 :
1060 0 : CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1061 :
1062 0 : IF (sign == FWFFT) THEN
1063 : ! Stage 1 -> 3
1064 :
1065 0 : bbuf => fft_scratch%a2buf
1066 :
1067 0 : IF (test) THEN
1068 0 : sum_data = ABS(SUM(zin))
1069 0 : CALL group%sum(sum_data)
1070 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1071 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
1072 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1073 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1074 : END IF
1075 : END IF
1076 :
1077 : ! FFT along z
1078 0 : CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1079 :
1080 0 : abuf => fft_scratch%a3buf
1081 :
1082 0 : IF (test) THEN
1083 0 : sum_data = ABS(SUM(bbuf))
1084 0 : CALL group%sum(sum_data)
1085 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1086 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1087 : END IF
1088 : END IF
1089 :
1090 0 : CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1091 :
1092 0 : bbuf => fft_scratch%a4buf
1093 :
1094 0 : IF (test) THEN
1095 0 : sum_data = ABS(SUM(abuf))
1096 0 : CALL group%sum(sum_data)
1097 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1098 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1099 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1100 : END IF
1101 : END IF
1102 :
1103 : ! FFT along y
1104 0 : CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1105 :
1106 0 : abuf => fft_scratch%a5buf
1107 :
1108 0 : IF (test) THEN
1109 0 : sum_data = ABS(SUM(bbuf))
1110 0 : CALL group%sum(sum_data)
1111 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1112 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1113 : END IF
1114 : END IF
1115 :
1116 0 : CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1117 :
1118 0 : IF (test) THEN
1119 0 : sum_data = ABS(SUM(abuf))
1120 0 : CALL group%sum(sum_data)
1121 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1122 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1123 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1124 : END IF
1125 : END IF
1126 :
1127 : ! FFT along x
1128 0 : CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1129 :
1130 0 : IF (test) THEN
1131 0 : sum_data = ABS(SUM(gin))
1132 0 : CALL group%sum(sum_data)
1133 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1134 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1135 : END IF
1136 : END IF
1137 :
1138 0 : ELSEIF (sign == BWFFT) THEN
1139 : ! Stage 3 -> 1
1140 :
1141 0 : bbuf => fft_scratch%a5buf
1142 :
1143 0 : IF (test) THEN
1144 0 : sum_data = ABS(SUM(gin))
1145 0 : CALL group%sum(sum_data)
1146 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1147 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
1148 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1149 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1150 : END IF
1151 : END IF
1152 :
1153 : ! FFT along x
1154 0 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1155 :
1156 0 : abuf => fft_scratch%a4buf
1157 :
1158 0 : IF (test) THEN
1159 0 : sum_data = ABS(SUM(bbuf))
1160 0 : CALL group%sum(sum_data)
1161 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1162 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1163 : END IF
1164 : END IF
1165 :
1166 0 : CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1167 :
1168 0 : bbuf => fft_scratch%a3buf
1169 :
1170 0 : IF (test) THEN
1171 0 : sum_data = ABS(SUM(abuf))
1172 0 : CALL group%sum(sum_data)
1173 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1174 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1175 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1176 : END IF
1177 : END IF
1178 :
1179 : ! FFT along y
1180 0 : CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1181 :
1182 0 : abuf => fft_scratch%a2buf
1183 :
1184 0 : IF (test) THEN
1185 0 : sum_data = ABS(SUM(bbuf))
1186 0 : CALL group%sum(sum_data)
1187 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1188 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1189 : END IF
1190 : END IF
1191 :
1192 0 : CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1193 :
1194 0 : IF (test) THEN
1195 0 : sum_data = ABS(SUM(abuf))
1196 0 : CALL group%sum(sum_data)
1197 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1198 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1199 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1200 : END IF
1201 : END IF
1202 :
1203 : ! FFT along z
1204 0 : CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1205 :
1206 0 : IF (test) THEN
1207 0 : sum_data = ABS(SUM(zin))
1208 0 : CALL group%sum(sum_data)
1209 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1210 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1211 : END IF
1212 : END IF
1213 :
1214 : ELSE
1215 0 : CPABORT("Illegal fsign parameter.")
1216 : END IF
1217 :
1218 0 : CALL release_fft_scratch(fft_scratch)
1219 :
1220 208 : ELSEIF (DIM(2) == 1) THEN
1221 :
1222 : !
1223 : ! Second case; one stage of communication
1224 : !
1225 :
1226 208 : CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1227 :
1228 208 : IF (sign == FWFFT) THEN
1229 : ! Stage 1 -> 3
1230 :
1231 104 : IF (test) THEN
1232 9284 : sum_data = ABS(SUM(zin))
1233 4 : CALL group%sum(sum_data)
1234 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1235 2 : WRITE (output_unit, '(A)') " one step communication algorithm "
1236 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1237 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1238 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1239 : END IF
1240 : END IF
1241 :
1242 104 : abuf => fft_scratch%a3buf
1243 104 : bbuf => fft_scratch%a4buf
1244 : ! FFT along z and y
1245 104 : CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1246 104 : CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1247 :
1248 104 : abuf => fft_scratch%a5buf
1249 :
1250 104 : IF (test) THEN
1251 8708 : sum_data = ABS(SUM(bbuf))
1252 4 : CALL group%sum(sum_data)
1253 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1254 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1255 : END IF
1256 : END IF
1257 :
1258 104 : CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1259 :
1260 104 : IF (test) THEN
1261 8260 : sum_data = ABS(SUM(abuf))
1262 4 : CALL group%sum(sum_data)
1263 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1264 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1265 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1266 : END IF
1267 : END IF
1268 :
1269 : ! FFT along x
1270 104 : CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1271 :
1272 104 : IF (test) THEN
1273 8708 : sum_data = ABS(SUM(gin))
1274 4 : CALL group%sum(sum_data)
1275 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1276 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1277 : END IF
1278 : END IF
1279 :
1280 104 : ELSEIF (sign == BWFFT) THEN
1281 : ! Stage 3 -> 1
1282 :
1283 104 : IF (test) THEN
1284 8708 : sum_data = ABS(SUM(gin))
1285 4 : CALL group%sum(sum_data)
1286 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1287 2 : WRITE (output_unit, '(A)') " one step communication algorithm "
1288 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1289 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1290 : END IF
1291 : END IF
1292 :
1293 104 : bbuf => fft_scratch%a5buf
1294 :
1295 : ! FFT along x
1296 104 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1297 :
1298 104 : abuf => fft_scratch%a4buf
1299 :
1300 104 : IF (test) THEN
1301 8260 : sum_data = ABS(SUM(bbuf))
1302 4 : CALL group%sum(sum_data)
1303 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1304 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1305 : END IF
1306 : END IF
1307 :
1308 104 : CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1309 :
1310 104 : bbuf => fft_scratch%a3buf
1311 :
1312 104 : IF (test) THEN
1313 8708 : sum_data = ABS(SUM(abuf))
1314 4 : CALL group%sum(sum_data)
1315 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1316 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1317 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1318 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1319 : END IF
1320 : END IF
1321 :
1322 : ! FFT along y
1323 104 : CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1324 :
1325 : ! FFT along z
1326 104 : CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1327 :
1328 104 : IF (test) THEN
1329 9284 : sum_data = ABS(SUM(zin))
1330 4 : CALL group%sum(sum_data)
1331 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1332 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1333 : END IF
1334 : END IF
1335 :
1336 : ELSE
1337 0 : CPABORT("Illegal fsign parameter.")
1338 : END IF
1339 :
1340 208 : CALL release_fft_scratch(fft_scratch)
1341 :
1342 : ELSE
1343 :
1344 0 : CPABORT("Partition not implemented.")
1345 :
1346 : END IF
1347 :
1348 208 : IF (PRESENT(status)) THEN
1349 0 : status = stat
1350 : END IF
1351 :
1352 208 : CALL timestop(handle)
1353 :
1354 416 : END SUBROUTINE fft3d_pb
1355 :
1356 : ! **************************************************************************************************
1357 : !> \brief ...
1358 : !> \param sb ...
1359 : !> \param group ...
1360 : !> \param my_pos ...
1361 : !> \param p2p ...
1362 : !> \param yzp ...
1363 : !> \param nray ...
1364 : !> \param bo ...
1365 : !> \param tb ...
1366 : !> \param fft_scratch ...
1367 : !> \par History
1368 : !> 15. Feb. 2006 : single precision all_to_all
1369 : !> \author JGH (14-Jan-2001)
1370 : ! **************************************************************************************************
1371 1346476 : SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1372 :
1373 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1374 : INTENT(IN) :: sb
1375 :
1376 : CLASS(mp_comm_type), INTENT(IN) :: group
1377 : INTEGER, INTENT(IN) :: my_pos
1378 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1379 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1380 : INTENT(IN) :: yzp
1381 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1382 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1383 : INTENT(IN) :: bo
1384 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1385 : INTENT(INOUT) :: tb
1386 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1387 :
1388 : CHARACTER(len=*), PARAMETER :: routineN = 'x_to_yz'
1389 :
1390 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1391 1346476 : POINTER :: rr
1392 : COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1393 1346476 : POINTER :: ss, tt
1394 : INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1395 : nm, np, nr, nx
1396 1346476 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1397 :
1398 1346476 : CALL timeset(routineN, handle)
1399 :
1400 1346476 : np = SIZE(p2p)
1401 1346476 : scount => fft_scratch%scount
1402 1346476 : rcount => fft_scratch%rcount
1403 1346476 : sdispl => fft_scratch%sdispl
1404 1346476 : rdispl => fft_scratch%rdispl
1405 :
1406 1346476 : IF (alltoall_sgl) THEN
1407 230 : ss => fft_scratch%ss
1408 230 : tt => fft_scratch%tt
1409 3441099 : ss(:, :) = CMPLX(sb(:, :), KIND=sp)
1410 3353860 : tt(:, :) = 0._sp
1411 : ELSE
1412 1346246 : rr => fft_scratch%rr
1413 : END IF
1414 :
1415 1346476 : mpr = p2p(my_pos)
1416 4039428 : nm = MAXVAL(nray(0:np - 1))
1417 1346476 : nr = nray(my_pos)
1418 : !$OMP PARALLEL DO DEFAULT(NONE), &
1419 : !$OMP PRIVATE(ix,nx), &
1420 1346476 : !$OMP SHARED(np,p2p,bo,nr,scount,sdispl)
1421 : DO ip = 0, np - 1
1422 : ix = p2p(ip)
1423 : nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1424 : scount(ip) = nr*nx
1425 : sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1426 : END DO
1427 : !$OMP END PARALLEL DO
1428 1346476 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1429 : !$OMP PARALLEL DO DEFAULT(NONE), &
1430 : !$OMP PRIVATE(nr), &
1431 1346476 : !$OMP SHARED(np,nray,nx,rcount,rdispl,nm)
1432 : DO ip = 0, np - 1
1433 : nr = nray(ip)
1434 : rcount(ip) = nr*nx
1435 : rdispl(ip) = nm*nx*ip
1436 : END DO
1437 : !$OMP END PARALLEL DO
1438 1346476 : IF (alltoall_sgl) THEN
1439 230 : CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1440 : ELSE
1441 1346246 : CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1442 : END IF
1443 :
1444 1346476 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1445 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1446 : !$OMP PRIVATE(ixx,ir,iy,iz,ix) &
1447 1346476 : !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
1448 : DO ip = 0, np - 1
1449 : DO ix = 1, nx
1450 : ixx = nray(ip)*(ix - 1)
1451 : IF (alltoall_sgl) THEN
1452 : DO ir = 1, nray(ip)
1453 : iy = yzp(1, ir, ip)
1454 : iz = yzp(2, ir, ip)
1455 : tb(iy, iz, ix) = tt(ir + ixx, ip)
1456 : END DO
1457 : ELSE
1458 : DO ir = 1, nray(ip)
1459 : iy = yzp(1, ir, ip)
1460 : iz = yzp(2, ir, ip)
1461 : tb(iy, iz, ix) = rr(ir + ixx, ip)
1462 : END DO
1463 : END IF
1464 : END DO
1465 : END DO
1466 : !$OMP END PARALLEL DO
1467 :
1468 1346476 : CALL timestop(handle)
1469 :
1470 1346476 : END SUBROUTINE x_to_yz
1471 :
1472 : ! **************************************************************************************************
1473 : !> \brief ...
1474 : !> \param tb ...
1475 : !> \param group ...
1476 : !> \param my_pos ...
1477 : !> \param p2p ...
1478 : !> \param yzp ...
1479 : !> \param nray ...
1480 : !> \param bo ...
1481 : !> \param sb ...
1482 : !> \param fft_scratch ...
1483 : !> \par History
1484 : !> 15. Feb. 2006 : single precision all_to_all
1485 : !> \author JGH (14-Jan-2001)
1486 : ! **************************************************************************************************
1487 1316734 : SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1488 :
1489 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1490 : INTENT(IN) :: tb
1491 :
1492 : CLASS(mp_comm_type), INTENT(IN) :: group
1493 : INTEGER, INTENT(IN) :: my_pos
1494 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1495 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1496 : INTENT(IN) :: yzp
1497 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1498 : INTEGER, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1499 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1500 : INTENT(INOUT) :: sb
1501 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1502 :
1503 : CHARACTER(len=*), PARAMETER :: routineN = 'yz_to_x'
1504 :
1505 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1506 1316734 : POINTER :: rr
1507 : COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1508 1316734 : POINTER :: ss, tt
1509 : INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1510 : nm, np, nr, nx
1511 1316734 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1512 :
1513 1316734 : CALL timeset(routineN, handle)
1514 :
1515 1316734 : np = SIZE(p2p)
1516 1316734 : mpr = p2p(my_pos)
1517 1316734 : scount => fft_scratch%scount
1518 1316734 : rcount => fft_scratch%rcount
1519 1316734 : sdispl => fft_scratch%sdispl
1520 1316734 : rdispl => fft_scratch%rdispl
1521 :
1522 1316734 : IF (alltoall_sgl) THEN
1523 238 : ss => fft_scratch%ss
1524 238 : tt => fft_scratch%tt
1525 3703835 : ss = 0._sp
1526 3609884 : tt = 0._sp
1527 : ELSE
1528 1316496 : rr => fft_scratch%rr
1529 : END IF
1530 :
1531 1316734 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1532 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1533 : !$OMP PRIVATE(ip, ixx, ir, iy, iz, ix) &
1534 1316734 : !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
1535 : DO ip = 0, np - 1
1536 : DO ix = 1, nx
1537 : ixx = nray(ip)*(ix - 1)
1538 : IF (alltoall_sgl) THEN
1539 : DO ir = 1, nray(ip)
1540 : iy = yzp(1, ir, ip)
1541 : iz = yzp(2, ir, ip)
1542 : tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
1543 : END DO
1544 : ELSE
1545 : DO ir = 1, nray(ip)
1546 : iy = yzp(1, ir, ip)
1547 : iz = yzp(2, ir, ip)
1548 : rr(ir + ixx, ip) = tb(iy, iz, ix)
1549 : END DO
1550 : END IF
1551 : END DO
1552 : END DO
1553 : !$OMP END PARALLEL DO
1554 3950202 : nm = MAXVAL(nray(0:np - 1))
1555 1316734 : nr = nray(my_pos)
1556 : !$OMP PARALLEL DO DEFAULT(NONE), &
1557 : !$OMP PRIVATE(ix,nx), &
1558 1316734 : !$OMP SHARED(np,p2p,bo,rcount,rdispl,nr)
1559 : DO ip = 0, np - 1
1560 : ix = p2p(ip)
1561 : nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1562 : rcount(ip) = nr*nx
1563 : rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1564 : END DO
1565 : !$OMP END PARALLEL DO
1566 1316734 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1567 : !$OMP PARALLEL DO DEFAULT(NONE), &
1568 : !$OMP PRIVATE(nr), &
1569 1316734 : !$OMP SHARED(np,nray,scount,sdispl,nx,nm)
1570 : DO ip = 0, np - 1
1571 : nr = nray(ip)
1572 : scount(ip) = nr*nx
1573 : sdispl(ip) = nm*nx*ip
1574 : END DO
1575 : !$OMP END PARALLEL DO
1576 :
1577 1316734 : IF (alltoall_sgl) THEN
1578 238 : CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1579 3703835 : sb = ss
1580 : ELSE
1581 1316496 : CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1582 : END IF
1583 :
1584 1316734 : CALL timestop(handle)
1585 :
1586 1316734 : END SUBROUTINE yz_to_x
1587 :
1588 : ! **************************************************************************************************
1589 : !> \brief ...
1590 : !> \param sb ...
1591 : !> \param group ...
1592 : !> \param dims ...
1593 : !> \param my_pos ...
1594 : !> \param p2p ...
1595 : !> \param yzp ...
1596 : !> \param nray ...
1597 : !> \param bo ...
1598 : !> \param tb ...
1599 : !> \param fft_scratch ...
1600 : !> \par History
1601 : !> 15. Feb. 2006 : single precision all_to_all
1602 : !> \author JGH (18-Jan-2001)
1603 : ! **************************************************************************************************
1604 0 : SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1605 :
1606 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1607 : INTENT(IN) :: sb
1608 :
1609 : CLASS(mp_comm_type), INTENT(IN) :: group
1610 : INTEGER, DIMENSION(2), INTENT(IN) :: dims
1611 : INTEGER, INTENT(IN) :: my_pos
1612 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1613 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1614 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1615 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1616 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1617 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1618 :
1619 : CHARACTER(len=*), PARAMETER :: routineN = 'yz_to_xz'
1620 :
1621 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1622 0 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1623 : INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1624 : jj, jx, jy, jz, myx, myz, np, npx, &
1625 : npz, nx, nz, rs_pos
1626 0 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1627 0 : xcor, zcor
1628 0 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1629 :
1630 0 : CALL timeset(routineN, handle)
1631 :
1632 0 : np = SIZE(p2p)
1633 :
1634 0 : rs_pos = p2p(my_pos)
1635 :
1636 0 : IF (alltoall_sgl) THEN
1637 0 : yzbuf_sgl => fft_scratch%yzbuf_sgl
1638 0 : xzbuf_sgl => fft_scratch%xzbuf_sgl
1639 : ELSE
1640 0 : yzbuf => fft_scratch%yzbuf
1641 0 : xzbuf => fft_scratch%xzbuf
1642 : END IF
1643 0 : npx = dims(1)
1644 0 : npz = dims(2)
1645 0 : pgrid => fft_scratch%pgrid
1646 0 : xcor => fft_scratch%xcor
1647 0 : zcor => fft_scratch%zcor
1648 0 : pzcoord => fft_scratch%pzcoord
1649 0 : scount => fft_scratch%scount
1650 0 : rcount => fft_scratch%rcount
1651 0 : sdispl => fft_scratch%sdispl
1652 0 : rdispl => fft_scratch%rdispl
1653 :
1654 0 : nx = SIZE(sb, 2)
1655 :
1656 : ! If the send and recv counts are not already cached, then
1657 : ! calculate and store them
1658 0 : IF (fft_scratch%in == 0) THEN
1659 :
1660 0 : scount = 0
1661 :
1662 0 : DO ix = 0, npx - 1
1663 0 : ip = pgrid(ix, 0)
1664 0 : xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1665 : END DO
1666 0 : DO iz = 0, npz - 1
1667 0 : ip = pgrid(0, iz)
1668 0 : zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1669 : END DO
1670 0 : DO jx = 1, nx
1671 0 : IF (alltoall_sgl) THEN
1672 0 : DO ir = 1, nray(my_pos)
1673 0 : jy = yzp(1, ir, my_pos)
1674 0 : jz = yzp(2, ir, my_pos)
1675 0 : ip = pgrid(xcor(jx), zcor(jz))
1676 0 : scount(ip) = scount(ip) + 1
1677 : END DO
1678 : ELSE
1679 0 : DO ir = 1, nray(my_pos)
1680 0 : jy = yzp(1, ir, my_pos)
1681 0 : jz = yzp(2, ir, my_pos)
1682 0 : ip = pgrid(xcor(jx), zcor(jz))
1683 0 : scount(ip) = scount(ip) + 1
1684 : END DO
1685 : END IF
1686 : END DO
1687 :
1688 0 : CALL group%alltoall(scount, rcount, 1)
1689 0 : fft_scratch%yzcount = scount
1690 0 : fft_scratch%xzcount = rcount
1691 :
1692 : ! Work out the correct displacements in the buffers
1693 0 : sdispl(0) = 0
1694 0 : rdispl(0) = 0
1695 0 : DO ip = 1, np - 1
1696 0 : sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1697 0 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1698 : END DO
1699 :
1700 0 : fft_scratch%yzdispl = sdispl
1701 0 : fft_scratch%xzdispl = rdispl
1702 :
1703 0 : icrs = 0
1704 0 : DO ip = 0, np - 1
1705 0 : IF (scount(ip) /= 0) icrs = icrs + 1
1706 0 : IF (rcount(ip) /= 0) icrs = icrs + 1
1707 : END DO
1708 0 : CALL group%sum(icrs)
1709 0 : fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
1710 :
1711 0 : fft_scratch%in = 1
1712 : ELSE
1713 0 : scount = fft_scratch%yzcount
1714 0 : rcount = fft_scratch%xzcount
1715 0 : sdispl = fft_scratch%yzdispl
1716 0 : rdispl = fft_scratch%xzdispl
1717 : END IF
1718 :
1719 : ! Do the actual packing
1720 : !$OMP PARALLEL DO DEFAULT(NONE), &
1721 : !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1722 : !$OMP SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
1723 : !$OMP SHARED(yzbuf,sb,scount,sdispl,my_pos),&
1724 0 : !$OMP SHARED(yzbuf_sgl,alltoall_sgl)
1725 : DO ip = 0, np - 1
1726 : IF (scount(ip) == 0) CYCLE
1727 : ipl = p2p(ip)
1728 : jj = 0
1729 : nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1730 : DO ir = 1, nray(my_pos)
1731 : jz = yzp(2, ir, my_pos)
1732 : IF (zcor(jz) == pzcoord(ipl)) THEN
1733 : jj = jj + 1
1734 : jy = yzp(1, ir, my_pos)
1735 : IF (alltoall_sgl) THEN
1736 : DO jx = 0, nx - 1
1737 : yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
1738 : END DO
1739 : ELSE
1740 : DO jx = 0, nx - 1
1741 : yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1742 : END DO
1743 : END IF
1744 : END IF
1745 : END DO
1746 : END DO
1747 : !$OMP END PARALLEL DO
1748 :
1749 0 : IF (alltoall_sgl) THEN
1750 0 : CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1751 : ELSE
1752 0 : IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1753 0 : CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1754 : ELSE
1755 0 : CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1756 : END IF
1757 : END IF
1758 :
1759 0 : myx = fft_scratch%sizes%r_pos(1)
1760 0 : myz = fft_scratch%sizes%r_pos(2)
1761 0 : nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1762 :
1763 : !$OMP PARALLEL DO DEFAULT(NONE), &
1764 : !$OMP PRIVATE(ipr,jj,ir,jx,jy,jz),&
1765 : !$OMP SHARED(tb,np,p2p,bo,rs_pos,nray),&
1766 : !$OMP SHARED(yzp,alltoall_sgl,zcor,myz),&
1767 0 : !$OMP SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
1768 : DO ip = 0, np - 1
1769 : ipr = p2p(ip)
1770 : jj = 0
1771 : DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1772 : DO ir = 1, nray(ip)
1773 : jz = yzp(2, ir, ip)
1774 : IF (alltoall_sgl) THEN
1775 : IF (zcor(jz) == myz) THEN
1776 : jj = jj + 1
1777 : jy = yzp(1, ir, ip)
1778 : jz = jz - bo(1, 3, rs_pos) + 1
1779 : tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1780 : END IF
1781 : ELSE
1782 : IF (zcor(jz) == myz) THEN
1783 : jj = jj + 1
1784 : jy = yzp(1, ir, ip)
1785 : jz = jz - bo(1, 3, rs_pos) + 1
1786 : tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1787 : END IF
1788 : END IF
1789 : END DO
1790 : END DO
1791 : END DO
1792 : !$OMP END PARALLEL DO
1793 :
1794 0 : CALL timestop(handle)
1795 :
1796 0 : END SUBROUTINE yz_to_xz
1797 :
1798 : ! **************************************************************************************************
1799 : !> \brief ...
1800 : !> \param sb ...
1801 : !> \param group ...
1802 : !> \param dims ...
1803 : !> \param my_pos ...
1804 : !> \param p2p ...
1805 : !> \param yzp ...
1806 : !> \param nray ...
1807 : !> \param bo ...
1808 : !> \param tb ...
1809 : !> \param fft_scratch ...
1810 : !> \par History
1811 : !> 15. Feb. 2006 : single precision all_to_all
1812 : !> \author JGH (19-Jan-2001)
1813 : ! **************************************************************************************************
1814 0 : SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1815 :
1816 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1817 : INTENT(IN) :: sb
1818 :
1819 : CLASS(mp_comm_type), INTENT(IN) :: group
1820 : INTEGER, DIMENSION(2), INTENT(IN) :: dims
1821 : INTEGER, INTENT(IN) :: my_pos
1822 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1823 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1824 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1825 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1826 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1827 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1828 :
1829 : CHARACTER(len=*), PARAMETER :: routineN = 'xz_to_yz'
1830 :
1831 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1832 0 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1833 : INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1834 : jj, jx, jy, jz, mp, myx, myz, np, npx, &
1835 : npz, nx, nz
1836 0 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1837 0 : xcor, zcor
1838 0 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1839 :
1840 0 : CALL timeset(routineN, handle)
1841 :
1842 0 : np = SIZE(p2p)
1843 :
1844 0 : IF (alltoall_sgl) THEN
1845 0 : yzbuf_sgl => fft_scratch%yzbuf_sgl
1846 0 : xzbuf_sgl => fft_scratch%xzbuf_sgl
1847 : ELSE
1848 0 : yzbuf => fft_scratch%yzbuf
1849 0 : xzbuf => fft_scratch%xzbuf
1850 : END IF
1851 0 : npx = dims(1)
1852 0 : npz = dims(2)
1853 0 : pgrid => fft_scratch%pgrid
1854 0 : xcor => fft_scratch%xcor
1855 0 : zcor => fft_scratch%zcor
1856 0 : pzcoord => fft_scratch%pzcoord
1857 0 : scount => fft_scratch%scount
1858 0 : rcount => fft_scratch%rcount
1859 0 : sdispl => fft_scratch%sdispl
1860 0 : rdispl => fft_scratch%rdispl
1861 :
1862 : ! If the send and recv counts are not already cached, then
1863 : ! calculate and store them
1864 0 : IF (fft_scratch%in == 0) THEN
1865 :
1866 0 : rcount = 0
1867 0 : nx = MAXVAL(bo(2, 1, :))
1868 :
1869 0 : DO ix = 0, npx - 1
1870 0 : ip = pgrid(ix, 0)
1871 0 : xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1872 : END DO
1873 0 : DO iz = 0, npz - 1
1874 0 : ip = pgrid(0, iz)
1875 0 : zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1876 : END DO
1877 0 : DO jx = 1, nx
1878 0 : DO ir = 1, nray(my_pos)
1879 0 : jy = yzp(1, ir, my_pos)
1880 0 : jz = yzp(2, ir, my_pos)
1881 0 : ip = pgrid(xcor(jx), zcor(jz))
1882 0 : rcount(ip) = rcount(ip) + 1
1883 : END DO
1884 : END DO
1885 :
1886 0 : CALL group%alltoall(rcount, scount, 1)
1887 0 : fft_scratch%xzcount = scount
1888 0 : fft_scratch%yzcount = rcount
1889 :
1890 : ! Work out the correct displacements in the buffers
1891 0 : sdispl(0) = 0
1892 0 : rdispl(0) = 0
1893 0 : DO ip = 1, np - 1
1894 0 : sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1895 0 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1896 : END DO
1897 :
1898 0 : fft_scratch%xzdispl = sdispl
1899 0 : fft_scratch%yzdispl = rdispl
1900 :
1901 0 : icrs = 0
1902 0 : DO ip = 0, np - 1
1903 0 : IF (scount(ip) /= 0) icrs = icrs + 1
1904 0 : IF (rcount(ip) /= 0) icrs = icrs + 1
1905 : END DO
1906 0 : CALL group%sum(icrs)
1907 0 : fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
1908 :
1909 0 : fft_scratch%in = 1
1910 : ELSE
1911 0 : scount = fft_scratch%xzcount
1912 0 : rcount = fft_scratch%yzcount
1913 0 : sdispl = fft_scratch%xzdispl
1914 0 : rdispl = fft_scratch%yzdispl
1915 : END IF
1916 :
1917 : ! Now do the actual packing
1918 0 : myx = fft_scratch%sizes%r_pos(1)
1919 0 : myz = fft_scratch%sizes%r_pos(2)
1920 0 : mp = p2p(my_pos)
1921 0 : nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1922 0 : nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1923 :
1924 : !$OMP PARALLEL DO DEFAULT(NONE), &
1925 : !$OMP PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
1926 : !$OMP SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
1927 : !$OMP SHARED(alltoall_sgl,nx,scount,sdispl),&
1928 0 : !$OMP SHARED(xzbuf,xzbuf_sgl,sb,nz)
1929 : DO ip = 0, np - 1
1930 : jj = 0
1931 : ipl = p2p(ip)
1932 : DO ir = 1, nray(ip)
1933 : jz = yzp(2, ir, ip)
1934 : IF (zcor(jz) == myz) THEN
1935 : jj = jj + 1
1936 : jy = yzp(1, ir, ip)
1937 : jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1938 : IF (alltoall_sgl) THEN
1939 : DO jx = 0, nx - 1
1940 : ixx = jj + jx*scount(ipl)/nx
1941 : xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
1942 : END DO
1943 : ELSE
1944 : DO jx = 0, nx - 1
1945 : ixx = jj + jx*scount(ipl)/nx
1946 : xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1947 : END DO
1948 : END IF
1949 : END IF
1950 : END DO
1951 : END DO
1952 : !$OMP END PARALLEL DO
1953 :
1954 0 : IF (alltoall_sgl) THEN
1955 0 : CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1956 : ELSE
1957 0 : IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1958 0 : CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1959 : ELSE
1960 0 : CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1961 : END IF
1962 : END IF
1963 :
1964 : !$OMP PARALLEL DO DEFAULT(NONE), &
1965 : !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1966 : !$OMP SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
1967 : !$OMP SHARED(rcount,rdispl,tb,yzbuf,zcor),&
1968 0 : !$OMP SHARED(yzbuf_sgl,alltoall_sgl,np)
1969 : DO ip = 0, np - 1
1970 : IF (rcount(ip) == 0) CYCLE
1971 : ipl = p2p(ip)
1972 : jj = 0
1973 : nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1974 : DO ir = 1, nray(my_pos)
1975 : jz = yzp(2, ir, my_pos)
1976 : IF (zcor(jz) == pzcoord(ipl)) THEN
1977 : jj = jj + 1
1978 : jy = yzp(1, ir, my_pos)
1979 : IF (alltoall_sgl) THEN
1980 : DO jx = 0, nx - 1
1981 : tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1982 : END DO
1983 : ELSE
1984 : DO jx = 0, nx - 1
1985 : tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1986 : END DO
1987 : END IF
1988 : END IF
1989 : END DO
1990 : END DO
1991 : !$OMP END PARALLEL DO
1992 :
1993 0 : CALL timestop(handle)
1994 :
1995 0 : END SUBROUTINE xz_to_yz
1996 :
1997 : ! **************************************************************************************************
1998 : !> \brief ...
1999 : !> \param cin ...
2000 : !> \param boin ...
2001 : !> \param boout ...
2002 : !> \param sout ...
2003 : !> \param fft_scratch ...
2004 : !> \par History
2005 : !> none
2006 : !> \author JGH (20-Jan-2001)
2007 : ! **************************************************************************************************
2008 0 : SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
2009 :
2010 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2011 : INTENT(IN) :: cin
2012 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2013 : INTENT(IN) :: boin, boout
2014 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2015 : INTENT(OUT) :: sout
2016 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2017 :
2018 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_1'
2019 :
2020 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2021 0 : POINTER :: rbuf
2022 : INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2023 : mz, np, nx, ny, nz
2024 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2025 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2026 : INTEGER, DIMENSION(2) :: dim, pos
2027 :
2028 0 : CALL timeset(routineN, handle)
2029 :
2030 0 : mip = fft_scratch%mip
2031 0 : dim = fft_scratch%dim
2032 0 : pos = fft_scratch%pos
2033 0 : scount => fft_scratch%scount
2034 0 : rcount => fft_scratch%rcount
2035 0 : sdispl => fft_scratch%sdispl
2036 0 : rdispl => fft_scratch%rdispl
2037 0 : pgrid => fft_scratch%pgcube
2038 0 : np = DIM(2)
2039 :
2040 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2041 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2042 :
2043 : !$OMP PARALLEL DO DEFAULT(NONE), &
2044 : !$OMP PRIVATE(ipl,ny), &
2045 0 : !$OMP SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
2046 : DO ip = 0, np - 1
2047 : ipl = pgrid(ip, 2)
2048 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2049 : scount(ip) = nx*nz*ny
2050 : sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2051 : END DO
2052 : !$OMP END PARALLEL DO
2053 0 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2054 0 : mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
2055 : !$OMP PARALLEL DO DEFAULT(NONE), &
2056 : !$OMP PRIVATE(ipl,nz), &
2057 0 : !$OMP SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
2058 : DO ip = 0, np - 1
2059 : ipl = pgrid(ip, 2)
2060 : nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2061 : rcount(ip) = nx*nz*ny
2062 : rdispl(ip) = nx*ny*mz*ip
2063 : END DO
2064 : !$OMP END PARALLEL DO
2065 :
2066 0 : rbuf => fft_scratch%rbuf1
2067 :
2068 0 : CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2069 :
2070 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2071 : !$OMP PRIVATE(ip,ipl,nz,iz,is,ir) &
2072 0 : !$OMP SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
2073 : DO ixy = 1, nx*ny
2074 : DO ip = 0, np - 1
2075 : ipl = pgrid(ip, 2)
2076 : nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2077 : DO iz = 1, nz
2078 : is = boin(1, 3, ipl) + iz - 1
2079 : ir = iz + nz*(ixy - 1)
2080 : sout(is, ixy) = rbuf(ir, ip)
2081 : END DO
2082 : END DO
2083 : END DO
2084 : !$OMP END PARALLEL DO
2085 :
2086 0 : CALL timestop(handle)
2087 :
2088 0 : END SUBROUTINE cube_transpose_1
2089 :
2090 : ! **************************************************************************************************
2091 : !> \brief ...
2092 : !> \param cin ...
2093 : !> \param boin ...
2094 : !> \param boout ...
2095 : !> \param sout ...
2096 : !> \param fft_scratch ...
2097 : ! **************************************************************************************************
2098 0 : SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
2099 :
2100 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2101 : INTENT(IN) :: cin
2102 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2103 : INTENT(IN) :: boin, boout
2104 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2105 : INTENT(OUT) :: sout
2106 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2107 :
2108 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_2'
2109 :
2110 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2111 0 : POINTER :: rbuf
2112 : INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2113 : np, nx, ny, nz
2114 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2115 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2116 : INTEGER, DIMENSION(2) :: dim, pos
2117 : TYPE(mp_comm_type) :: sub_group
2118 :
2119 0 : CALL timeset(routineN, handle)
2120 :
2121 0 : sub_group = fft_scratch%cart_sub_comm(2)
2122 0 : mip = fft_scratch%mip
2123 0 : dim = fft_scratch%dim
2124 0 : pos = fft_scratch%pos
2125 0 : scount => fft_scratch%scount
2126 0 : rcount => fft_scratch%rcount
2127 0 : sdispl => fft_scratch%sdispl
2128 0 : rdispl => fft_scratch%rdispl
2129 0 : pgrid => fft_scratch%pgcube
2130 0 : np = DIM(2)
2131 :
2132 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2133 0 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2134 0 : mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
2135 :
2136 0 : rbuf => fft_scratch%rbuf2
2137 :
2138 : !$OMP PARALLEL DEFAULT(NONE), &
2139 : !$OMP PRIVATE(ip,ipl,nz,iz,ir), &
2140 0 : !$OMP SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
2141 : !$OMP DO COLLAPSE(2)
2142 : DO ixy = 1, nx*ny
2143 : DO ip = 0, np - 1
2144 : ipl = pgrid(ip, 2)
2145 : nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2146 : DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2147 : ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2148 : rbuf(ir, ip) = cin(iz, ixy)
2149 : END DO
2150 : END DO
2151 : END DO
2152 : !$OMP END DO
2153 : !$OMP DO
2154 : DO ip = 0, np - 1
2155 : ipl = pgrid(ip, 2)
2156 : nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2157 : scount(ip) = nx*ny*nz
2158 : sdispl(ip) = nx*ny*mz*ip
2159 : END DO
2160 : !$OMP END DO
2161 : !$OMP END PARALLEL
2162 0 : nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2163 : !$OMP PARALLEL DO DEFAULT(NONE), &
2164 : !$OMP PRIVATE(ipl,ny), &
2165 0 : !$OMP SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
2166 : DO ip = 0, np - 1
2167 : ipl = pgrid(ip, 2)
2168 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2169 : rcount(ip) = nx*ny*nz
2170 : rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2171 : END DO
2172 : !$OMP END PARALLEL DO
2173 :
2174 0 : CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2175 :
2176 0 : CALL timestop(handle)
2177 :
2178 0 : END SUBROUTINE cube_transpose_2
2179 :
2180 : ! **************************************************************************************************
2181 : !> \brief ...
2182 : !> \param cin ...
2183 : !> \param boin ...
2184 : !> \param boout ...
2185 : !> \param sout ...
2186 : !> \param fft_scratch ...
2187 : ! **************************************************************************************************
2188 0 : SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2189 :
2190 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2191 : INTENT(IN) :: cin
2192 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2193 : INTENT(IN) :: boin, boout
2194 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2195 : INTENT(OUT) :: sout
2196 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2197 :
2198 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_3'
2199 :
2200 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2201 0 : POINTER :: rbuf
2202 : INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2203 : mip, my, my_id, np, num_threads, nx, &
2204 : ny, nz, ub
2205 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2206 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2207 : INTEGER, DIMENSION(2) :: dim, pos
2208 : TYPE(mp_comm_type) :: sub_group
2209 :
2210 0 : CALL timeset(routineN, handle)
2211 :
2212 0 : sub_group = fft_scratch%cart_sub_comm(1)
2213 0 : mip = fft_scratch%mip
2214 0 : dim = fft_scratch%dim
2215 0 : pos = fft_scratch%pos
2216 0 : np = DIM(1)
2217 0 : scount => fft_scratch%scount
2218 0 : rcount => fft_scratch%rcount
2219 0 : sdispl => fft_scratch%sdispl
2220 0 : rdispl => fft_scratch%rdispl
2221 0 : pgrid => fft_scratch%pgcube
2222 :
2223 0 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2224 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2225 : !$OMP PARALLEL DO DEFAULT(NONE), &
2226 : !$OMP PRIVATE(ipl, nx), &
2227 0 : !$OMP SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
2228 : DO ip = 0, np - 1
2229 : ipl = pgrid(ip, 1)
2230 : nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2231 : scount(ip) = nx*nz*ny
2232 : sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2233 : END DO
2234 : !$OMP END PARALLEL DO
2235 0 : nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2236 0 : my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
2237 : !$OMP PARALLEL DO DEFAULT(NONE), &
2238 : !$OMP PRIVATE(ipl, ny), &
2239 0 : !$OMP SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
2240 : DO ip = 0, np - 1
2241 : ipl = pgrid(ip, 1)
2242 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2243 : rcount(ip) = nx*nz*ny
2244 : rdispl(ip) = nx*my*nz*ip
2245 : END DO
2246 : !$OMP END PARALLEL DO
2247 :
2248 0 : rbuf => fft_scratch%rbuf3
2249 0 : num_threads = 1
2250 0 : my_id = 0
2251 : !$OMP PARALLEL DEFAULT(NONE), &
2252 : !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub) &
2253 0 : !$OMP SHARED(rbuf)
2254 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2255 : !$ my_id = omp_get_thread_num()
2256 : IF (my_id < num_threads) THEN
2257 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2258 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2259 : rbuf(:, lb:ub) = 0.0_dp
2260 : END IF
2261 : !$OMP END PARALLEL
2262 :
2263 0 : CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2264 :
2265 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2266 : !$OMP PRIVATE(ip,ipl,ny,iy,is,ir) &
2267 0 : !$OMP SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
2268 : DO ixz = 1, nx*nz
2269 : DO ip = 0, np - 1
2270 : ipl = pgrid(ip, 1)
2271 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2272 : DO iy = 1, ny
2273 : is = boin(1, 2, ipl) + iy - 1
2274 : ir = iy + ny*(ixz - 1)
2275 : sout(is, ixz) = rbuf(ir, ip)
2276 : END DO
2277 : END DO
2278 : END DO
2279 : !$OMP END PARALLEL DO
2280 :
2281 0 : CALL timestop(handle)
2282 :
2283 0 : END SUBROUTINE cube_transpose_3
2284 :
2285 : ! **************************************************************************************************
2286 : !> \brief ...
2287 : !> \param cin ...
2288 : !> \param boin ...
2289 : !> \param boout ...
2290 : !> \param sout ...
2291 : !> \param fft_scratch ...
2292 : ! **************************************************************************************************
2293 0 : SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2294 :
2295 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2296 : INTENT(IN) :: cin
2297 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2298 : INTENT(IN) :: boin, boout
2299 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2300 : INTENT(OUT) :: sout
2301 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2302 :
2303 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_4'
2304 :
2305 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2306 0 : POINTER :: rbuf
2307 : INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2308 : my, my_id, np, num_threads, nx, ny, &
2309 : nz, ub
2310 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2311 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2312 : INTEGER, DIMENSION(2) :: dim, pos
2313 : TYPE(mp_comm_type) :: sub_group
2314 :
2315 0 : CALL timeset(routineN, handle)
2316 :
2317 0 : sub_group = fft_scratch%cart_sub_comm(1)
2318 0 : mip = fft_scratch%mip
2319 0 : dim = fft_scratch%dim
2320 0 : pos = fft_scratch%pos
2321 0 : np = DIM(1)
2322 0 : scount => fft_scratch%scount
2323 0 : rcount => fft_scratch%rcount
2324 0 : sdispl => fft_scratch%sdispl
2325 0 : rdispl => fft_scratch%rdispl
2326 0 : pgrid => fft_scratch%pgcube
2327 :
2328 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2329 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2330 0 : my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
2331 :
2332 0 : rbuf => fft_scratch%rbuf4
2333 0 : num_threads = 1
2334 0 : my_id = 0
2335 : !$OMP PARALLEL DEFAULT(NONE), &
2336 : !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
2337 0 : !$OMP SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
2338 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2339 : !$ my_id = omp_get_thread_num()
2340 : IF (my_id < num_threads) THEN
2341 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2342 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2343 : rbuf(:, lb:ub) = 0.0_dp
2344 : END IF
2345 : !$OMP BARRIER
2346 :
2347 : !$OMP DO COLLAPSE(2)
2348 : DO izx = 1, nz*nx
2349 : DO ip = 0, np - 1
2350 : ipl = pgrid(ip, 1)
2351 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2352 : DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2353 : ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2354 : rbuf(ir, ip) = cin(iy, izx)
2355 : END DO
2356 : END DO
2357 : END DO
2358 : !$OMP END DO
2359 : !$OMP DO
2360 : DO ip = 0, np - 1
2361 : ipl = pgrid(ip, 1)
2362 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2363 : scount(ip) = nx*ny*nz
2364 : sdispl(ip) = nx*nz*my*ip
2365 : END DO
2366 : !$OMP END DO
2367 : !$OMP END PARALLEL
2368 0 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2369 : !$OMP PARALLEL DO DEFAULT(NONE), &
2370 : !$OMP PRIVATE(ipl,nx), &
2371 0 : !$OMP SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
2372 : DO ip = 0, np - 1
2373 : ipl = pgrid(ip, 1)
2374 : nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2375 : rcount(ip) = nx*ny*nz
2376 : rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2377 : END DO
2378 : !$OMP END PARALLEL DO
2379 :
2380 0 : CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2381 :
2382 0 : CALL timestop(handle)
2383 :
2384 0 : END SUBROUTINE cube_transpose_4
2385 :
2386 : ! **************************************************************************************************
2387 : !> \brief ...
2388 : !> \param cin ...
2389 : !> \param group ...
2390 : !> \param boin ...
2391 : !> \param boout ...
2392 : !> \param sout ...
2393 : !> \param fft_scratch ...
2394 : ! **************************************************************************************************
2395 104 : SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2396 :
2397 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2398 : INTENT(IN) :: cin
2399 :
2400 : CLASS(mp_comm_type), INTENT(IN) :: group
2401 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2402 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2403 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2404 :
2405 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_5'
2406 :
2407 104 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2408 : INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2409 : my, my_id, np, num_threads, nx, ny, &
2410 : nz, ub
2411 104 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2412 :
2413 104 : CALL timeset(routineN, handle)
2414 :
2415 104 : np = fft_scratch%sizes%numtask
2416 104 : mip = fft_scratch%mip
2417 104 : scount => fft_scratch%scount
2418 104 : rcount => fft_scratch%rcount
2419 104 : sdispl => fft_scratch%sdispl
2420 104 : rdispl => fft_scratch%rdispl
2421 :
2422 104 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2423 104 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2424 : !$OMP PARALLEL DO DEFAULT(NONE), &
2425 : !$OMP PRIVATE(nx), &
2426 104 : !$OMP SHARED(np,boout,ny,nz,scount,sdispl)
2427 : DO ip = 0, np - 1
2428 : nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2429 : scount(ip) = nx*nz*ny
2430 : sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2431 : END DO
2432 : !$OMP END PARALLEL DO
2433 104 : nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2434 312 : my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
2435 : !$OMP PARALLEL DO DEFAULT(NONE), &
2436 : !$OMP PRIVATE(ny), &
2437 104 : !$OMP SHARED(np,boin,nx,nz,rcount,rdispl,my)
2438 : DO ip = 0, np - 1
2439 : ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2440 : rcount(ip) = nx*nz*ny
2441 : rdispl(ip) = nx*my*nz*ip
2442 : END DO
2443 : !$OMP END PARALLEL DO
2444 :
2445 104 : rbuf => fft_scratch%rbuf5
2446 104 : num_threads = 1
2447 104 : my_id = 0
2448 : !$OMP PARALLEL DEFAULT(NONE), &
2449 : !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub), &
2450 104 : !$OMP SHARED(rbuf)
2451 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2452 : !$ my_id = omp_get_thread_num()
2453 : IF (my_id < num_threads) THEN
2454 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2455 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2456 : rbuf(:, lb:ub) = 0.0_dp
2457 : END IF
2458 : !$OMP END PARALLEL
2459 :
2460 104 : CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2461 :
2462 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2463 : !$OMP PRIVATE(ip,ny,iy,is,ir) &
2464 104 : !$OMP SHARED(nx,nz,np,boin,sout,rbuf)
2465 : DO ixz = 1, nx*nz
2466 : DO ip = 0, np - 1
2467 : ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2468 : DO iy = 1, ny
2469 : is = boin(1, 2, ip) + iy - 1
2470 : ir = iy + ny*(ixz - 1)
2471 : sout(is, ixz) = rbuf(ir, ip)
2472 : END DO
2473 : END DO
2474 : END DO
2475 : !$OMP END PARALLEL DO
2476 :
2477 104 : CALL timestop(handle)
2478 :
2479 104 : END SUBROUTINE cube_transpose_5
2480 :
2481 : ! **************************************************************************************************
2482 : !> \brief ...
2483 : !> \param cin ...
2484 : !> \param group ...
2485 : !> \param boin ...
2486 : !> \param boout ...
2487 : !> \param sout ...
2488 : !> \param fft_scratch ...
2489 : ! **************************************************************************************************
2490 104 : SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2491 :
2492 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2493 : INTENT(IN) :: cin
2494 :
2495 : CLASS(mp_comm_type), INTENT(IN) :: group
2496 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2497 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2498 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2499 :
2500 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_6'
2501 :
2502 104 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2503 : INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2504 : my_id, np, num_threads, nx, ny, nz, ub
2505 104 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2506 :
2507 104 : CALL timeset(routineN, handle)
2508 :
2509 104 : np = fft_scratch%sizes%numtask
2510 104 : mip = fft_scratch%mip
2511 104 : scount => fft_scratch%scount
2512 104 : rcount => fft_scratch%rcount
2513 104 : sdispl => fft_scratch%sdispl
2514 104 : rdispl => fft_scratch%rdispl
2515 :
2516 104 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2517 104 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2518 312 : my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
2519 :
2520 104 : rbuf => fft_scratch%rbuf5
2521 104 : num_threads = 1
2522 104 : my_id = 0
2523 : !$OMP PARALLEL DEFAULT(NONE), &
2524 : !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
2525 104 : !$OMP SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
2526 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2527 : !$ my_id = omp_get_thread_num()
2528 : IF (my_id < num_threads) THEN
2529 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2530 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2531 : rbuf(:, lb:ub) = 0.0_dp
2532 : END IF
2533 : !$OMP BARRIER
2534 :
2535 : !$OMP DO COLLAPSE(2)
2536 : DO izx = 1, nz*nx
2537 : DO ip = 0, np - 1
2538 : ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2539 : DO iy = boout(1, 2, ip), boout(2, 2, ip)
2540 : ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2541 : rbuf(ir, ip) = cin(iy, izx)
2542 : END DO
2543 : END DO
2544 : END DO
2545 : !$OMP END DO
2546 : !$OMP DO
2547 : DO ip = 0, np - 1
2548 : ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2549 : scount(ip) = nx*ny*nz
2550 : sdispl(ip) = nx*nz*my*ip
2551 : END DO
2552 : !$OMP END DO
2553 : !$OMP END PARALLEL
2554 104 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2555 : !$OMP PARALLEL DO DEFAULT(NONE), &
2556 : !$OMP PRIVATE(nx), &
2557 104 : !$OMP SHARED(np,boin,rcount,rdispl,nz,ny)
2558 : DO ip = 0, np - 1
2559 : nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2560 : rcount(ip) = nx*ny*nz
2561 : rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2562 : END DO
2563 : !$OMP END PARALLEL DO
2564 :
2565 104 : CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2566 :
2567 104 : CALL timestop(handle)
2568 :
2569 104 : END SUBROUTINE cube_transpose_6
2570 :
2571 : ! **************************************************************************************************
2572 : !> \brief ...
2573 : ! **************************************************************************************************
2574 17893 : SUBROUTINE init_fft_scratch_pool()
2575 :
2576 17893 : CALL release_fft_scratch_pool()
2577 :
2578 : ! Allocate first scratch and mark it as used
2579 17893 : ALLOCATE (fft_scratch_first)
2580 518897 : ALLOCATE (fft_scratch_first%fft_scratch)
2581 : ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
2582 17893 : fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
2583 :
2584 17893 : init_fft_pool = init_fft_pool + 1
2585 :
2586 17893 : END SUBROUTINE init_fft_scratch_pool
2587 :
2588 : ! **************************************************************************************************
2589 : !> \brief ...
2590 : !> \param fft_scratch ...
2591 : ! **************************************************************************************************
2592 57674 : SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2593 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
2594 :
2595 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2596 : INTEGER :: ierr
2597 : COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
2598 : #endif
2599 :
2600 : ! deallocate structures
2601 57674 : IF (ASSOCIATED(fft_scratch%ziptr)) THEN
2602 13243 : CALL fft_dealloc(fft_scratch%ziptr)
2603 : END IF
2604 57674 : IF (ASSOCIATED(fft_scratch%zoptr)) THEN
2605 13243 : CALL fft_dealloc(fft_scratch%zoptr)
2606 : END IF
2607 57674 : IF (ASSOCIATED(fft_scratch%p1buf)) THEN
2608 0 : CALL fft_dealloc(fft_scratch%p1buf)
2609 : END IF
2610 57674 : IF (ASSOCIATED(fft_scratch%p2buf)) THEN
2611 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2612 : dummy_ptr_z => fft_scratch%p2buf(1, 1)
2613 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2614 : #else
2615 0 : CALL fft_dealloc(fft_scratch%p2buf)
2616 : #endif
2617 : END IF
2618 57674 : IF (ASSOCIATED(fft_scratch%p3buf)) THEN
2619 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 : dummy_ptr_z => fft_scratch%p3buf(1, 1)
2621 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2622 : #else
2623 0 : CALL fft_dealloc(fft_scratch%p3buf)
2624 : #endif
2625 : END IF
2626 57674 : IF (ASSOCIATED(fft_scratch%p4buf)) THEN
2627 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 : dummy_ptr_z => fft_scratch%p4buf(1, 1)
2629 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2630 : #else
2631 0 : CALL fft_dealloc(fft_scratch%p4buf)
2632 : #endif
2633 : END IF
2634 57674 : IF (ASSOCIATED(fft_scratch%p5buf)) THEN
2635 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 : dummy_ptr_z => fft_scratch%p5buf(1, 1)
2637 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2638 : #else
2639 0 : CALL fft_dealloc(fft_scratch%p5buf)
2640 : #endif
2641 : END IF
2642 57674 : IF (ASSOCIATED(fft_scratch%p6buf)) THEN
2643 0 : CALL fft_dealloc(fft_scratch%p6buf)
2644 : END IF
2645 57674 : IF (ASSOCIATED(fft_scratch%p7buf)) THEN
2646 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2647 : dummy_ptr_z => fft_scratch%p7buf(1, 1)
2648 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2649 : #else
2650 0 : CALL fft_dealloc(fft_scratch%p7buf)
2651 : #endif
2652 : END IF
2653 57674 : IF (ASSOCIATED(fft_scratch%r1buf)) THEN
2654 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 : dummy_ptr_z => fft_scratch%r1buf(1, 1)
2656 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2657 : #else
2658 26530 : CALL fft_dealloc(fft_scratch%r1buf)
2659 : #endif
2660 : END IF
2661 57674 : IF (ASSOCIATED(fft_scratch%r2buf)) THEN
2662 26530 : CALL fft_dealloc(fft_scratch%r2buf)
2663 : END IF
2664 57674 : IF (ASSOCIATED(fft_scratch%tbuf)) THEN
2665 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2666 : dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2667 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2668 : #else
2669 26530 : CALL fft_dealloc(fft_scratch%tbuf)
2670 : #endif
2671 : END IF
2672 57674 : IF (ASSOCIATED(fft_scratch%a1buf)) THEN
2673 8 : CALL fft_dealloc(fft_scratch%a1buf)
2674 : END IF
2675 57674 : IF (ASSOCIATED(fft_scratch%a2buf)) THEN
2676 8 : CALL fft_dealloc(fft_scratch%a2buf)
2677 : END IF
2678 57674 : IF (ASSOCIATED(fft_scratch%a3buf)) THEN
2679 8 : CALL fft_dealloc(fft_scratch%a3buf)
2680 : END IF
2681 57674 : IF (ASSOCIATED(fft_scratch%a4buf)) THEN
2682 8 : CALL fft_dealloc(fft_scratch%a4buf)
2683 : END IF
2684 57674 : IF (ASSOCIATED(fft_scratch%a5buf)) THEN
2685 8 : CALL fft_dealloc(fft_scratch%a5buf)
2686 : END IF
2687 57674 : IF (ASSOCIATED(fft_scratch%a6buf)) THEN
2688 8 : CALL fft_dealloc(fft_scratch%a6buf)
2689 : END IF
2690 57674 : IF (ASSOCIATED(fft_scratch%scount)) THEN
2691 0 : DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2692 26538 : fft_scratch%sdispl, fft_scratch%rdispl)
2693 : END IF
2694 57674 : IF (ASSOCIATED(fft_scratch%rr)) THEN
2695 26522 : DEALLOCATE (fft_scratch%rr)
2696 : END IF
2697 57674 : IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
2698 0 : DEALLOCATE (fft_scratch%xzbuf)
2699 : END IF
2700 57674 : IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
2701 0 : DEALLOCATE (fft_scratch%yzbuf)
2702 : END IF
2703 57674 : IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
2704 0 : DEALLOCATE (fft_scratch%xzbuf_sgl)
2705 : END IF
2706 57674 : IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
2707 0 : DEALLOCATE (fft_scratch%yzbuf_sgl)
2708 : END IF
2709 57674 : IF (ASSOCIATED(fft_scratch%ss)) THEN
2710 8 : DEALLOCATE (fft_scratch%ss)
2711 : END IF
2712 57674 : IF (ASSOCIATED(fft_scratch%tt)) THEN
2713 8 : DEALLOCATE (fft_scratch%tt)
2714 : END IF
2715 57674 : IF (ASSOCIATED(fft_scratch%pgrid)) THEN
2716 0 : DEALLOCATE (fft_scratch%pgrid)
2717 : END IF
2718 57674 : IF (ASSOCIATED(fft_scratch%pgcube)) THEN
2719 26538 : DEALLOCATE (fft_scratch%pgcube)
2720 : END IF
2721 57674 : IF (ASSOCIATED(fft_scratch%xcor)) THEN
2722 0 : DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2723 : END IF
2724 57674 : IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
2725 0 : DEALLOCATE (fft_scratch%pzcoord)
2726 : END IF
2727 57674 : IF (ASSOCIATED(fft_scratch%xzcount)) THEN
2728 0 : DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2729 0 : DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2730 0 : fft_scratch%in = 0
2731 0 : fft_scratch%rsratio = 1._dp
2732 : END IF
2733 57674 : IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
2734 0 : DEALLOCATE (fft_scratch%rbuf1)
2735 : END IF
2736 57674 : IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
2737 0 : DEALLOCATE (fft_scratch%rbuf2)
2738 : END IF
2739 57674 : IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
2740 0 : DEALLOCATE (fft_scratch%rbuf3)
2741 : END IF
2742 57674 : IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
2743 0 : DEALLOCATE (fft_scratch%rbuf4)
2744 : END IF
2745 57674 : IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
2746 8 : DEALLOCATE (fft_scratch%rbuf5)
2747 : END IF
2748 57674 : IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
2749 8 : DEALLOCATE (fft_scratch%rbuf6)
2750 : END IF
2751 :
2752 57674 : IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
2753 0 : CALL fft_scratch%cart_sub_comm(1)%free()
2754 : END IF
2755 57674 : IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
2756 0 : CALL fft_scratch%cart_sub_comm(2)%free()
2757 : END IF
2758 :
2759 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(1))
2760 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(2))
2761 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(3))
2762 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(4))
2763 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(5))
2764 57674 : CALL fft_destroy_plan(fft_scratch%fft_plan(6))
2765 :
2766 57674 : END SUBROUTINE deallocate_fft_scratch_type
2767 :
2768 : ! **************************************************************************************************
2769 : !> \brief ...
2770 : ! **************************************************************************************************
2771 26826 : SUBROUTINE release_fft_scratch_pool()
2772 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch, fft_scratch_current
2773 :
2774 26826 : !$ CPASSERT(.NOT. omp_in_parallel() .OR. 0 == omp_get_thread_num())
2775 :
2776 26826 : IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
2777 :
2778 26826 : fft_scratch => fft_scratch_first
2779 57540 : DO
2780 84366 : IF (ASSOCIATED(fft_scratch)) THEN
2781 57540 : fft_scratch_current => fft_scratch
2782 57540 : fft_scratch => fft_scratch_current%fft_scratch_next
2783 57540 : NULLIFY (fft_scratch_current%fft_scratch_next)
2784 :
2785 57540 : CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2786 :
2787 230160 : DEALLOCATE (fft_scratch_current%fft_scratch)
2788 57540 : DEALLOCATE (fft_scratch_current)
2789 : ELSE
2790 : EXIT
2791 : END IF
2792 : END DO
2793 :
2794 26826 : init_fft_pool = 0
2795 :
2796 26826 : END SUBROUTINE release_fft_scratch_pool
2797 :
2798 : ! **************************************************************************************************
2799 : !> \brief ...
2800 : ! **************************************************************************************************
2801 3298884 : SUBROUTINE resize_fft_scratch_pool()
2802 :
2803 : INTEGER :: last_tick, nscratch
2804 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, fft_scratch_old
2805 :
2806 3298884 : nscratch = 0
2807 :
2808 3298884 : last_tick = HUGE(last_tick)
2809 3298884 : NULLIFY (fft_scratch_old)
2810 :
2811 : ! start at the global pool, count, and find a deletion candidate
2812 3298884 : fft_scratch_current => fft_scratch_first
2813 17772956 : DO
2814 21071840 : IF (ASSOCIATED(fft_scratch_current)) THEN
2815 17772956 : nscratch = nscratch + 1
2816 : ! is this a candidate for deletion (i.e. least recently used, and not in use)
2817 17772956 : IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
2818 14474072 : IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
2819 6723374 : last_tick = fft_scratch_current%fft_scratch%last_tick
2820 6723374 : fft_scratch_old => fft_scratch_current
2821 : END IF
2822 : END IF
2823 17772956 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2824 : ELSE
2825 : EXIT
2826 : END IF
2827 : END DO
2828 :
2829 : ! we should delete a scratch
2830 3298884 : IF (nscratch > fft_pool_scratch_limit) THEN
2831 : ! note that we never deallocate the first (special) element of the list
2832 134 : IF (ASSOCIATED(fft_scratch_old)) THEN
2833 : fft_scratch_current => fft_scratch_first
2834 : DO
2835 2278 : IF (ASSOCIATED(fft_scratch_current)) THEN
2836 : ! should we delete the next in the list?
2837 2144 : IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
2838 : ! fix the linked list
2839 134 : fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2840 :
2841 : ! deallocate the element
2842 134 : CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2843 536 : DEALLOCATE (fft_scratch_old%fft_scratch)
2844 134 : DEALLOCATE (fft_scratch_old)
2845 :
2846 : ELSE
2847 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2848 : END IF
2849 : ELSE
2850 : EXIT
2851 : END IF
2852 : END DO
2853 :
2854 : ELSE
2855 0 : CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
2856 : END IF
2857 : END IF
2858 :
2859 3298884 : END SUBROUTINE resize_fft_scratch_pool
2860 :
2861 : ! **************************************************************************************************
2862 : !> \brief ...
2863 : !> \param fft_scratch ...
2864 : !> \param tf_type ...
2865 : !> \param n ...
2866 : !> \param fft_sizes ...
2867 : ! **************************************************************************************************
2868 3298884 : SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
2869 : TYPE(fft_scratch_type), POINTER :: fft_scratch
2870 : INTEGER, INTENT(IN) :: tf_type
2871 : INTEGER, DIMENSION(:), INTENT(IN) :: n
2872 : TYPE(fft_scratch_sizes), INTENT(IN), &
2873 : OPTIONAL :: fft_sizes
2874 :
2875 : CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
2876 :
2877 : INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2878 : mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2879 : nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2880 : INTEGER, DIMENSION(3) :: pcoord
2881 : LOGICAL :: equal
2882 : LOGICAL, DIMENSION(2) :: dims
2883 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, &
2884 : fft_scratch_last, &
2885 : fft_scratch_new
2886 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2887 : INTEGER :: ierr
2888 : INTEGER(KIND=C_SIZE_T) :: length
2889 : TYPE(C_PTR) :: cptr_r1buf, cptr_tbuf, &
2890 : cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2891 : #endif
2892 3298884 : CALL timeset(routineN, handle)
2893 :
2894 : ! this is the place to check that the scratch_pool does not grow without limits
2895 : ! before we add a new scratch check the size of the pool and release some of the list if needed
2896 3298884 : CALL resize_fft_scratch_pool()
2897 :
2898 : ! get the required scratch
2899 3298884 : !$OMP ATOMIC
2900 : tick_fft_pool = tick_fft_pool + 1
2901 3298884 : fft_scratch_current => fft_scratch_first
2902 : DO
2903 10316118 : IF (ASSOCIATED(fft_scratch_current)) THEN
2904 10276337 : IF (fft_scratch_current%fft_scratch%in_use) THEN
2905 3298884 : fft_scratch_last => fft_scratch_current
2906 3298884 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2907 3298884 : CYCLE
2908 : END IF
2909 6977453 : IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
2910 436456 : fft_scratch_last => fft_scratch_current
2911 436456 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2912 436456 : CYCLE
2913 : END IF
2914 16563668 : IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
2915 3200536 : fft_scratch_last => fft_scratch_current
2916 3200536 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2917 3200536 : CYCLE
2918 : END IF
2919 3340461 : IF (PRESENT(fft_sizes)) THEN
2920 2718238 : IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN
2921 81038 : fft_scratch_last => fft_scratch_current
2922 81038 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2923 81038 : CYCLE
2924 : END IF
2925 2637200 : CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2926 2637200 : IF (.NOT. equal) THEN
2927 320 : fft_scratch_last => fft_scratch_current
2928 320 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2929 320 : CYCLE
2930 : END IF
2931 : END IF
2932 : ! Success
2933 3259103 : fft_scratch => fft_scratch_current%fft_scratch
2934 3259103 : fft_scratch_current%fft_scratch%in_use = .TRUE.
2935 3259103 : EXIT
2936 : ELSE
2937 : ! We cannot find the scratch type in this pool
2938 : ! Generate a new scratch set
2939 39781 : ALLOCATE (fft_scratch_new)
2940 1034306 : ALLOCATE (fft_scratch_new%fft_scratch)
2941 :
2942 39781 : IF (tf_type .NE. 400) THEN
2943 26538 : fft_scratch_new%fft_scratch%sizes = fft_sizes
2944 26538 : np = fft_sizes%numtask
2945 : ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2946 : fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2947 238842 : fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2948 : END IF
2949 :
2950 0 : SELECT CASE (tf_type)
2951 : CASE DEFAULT
2952 0 : CPABORT("Invalid scratch type.")
2953 : CASE (100) ! fft3d_pb: full cube distribution
2954 0 : CPASSERT(PRESENT(fft_sizes))
2955 0 : mx1 = fft_sizes%mx1
2956 0 : my1 = fft_sizes%my1
2957 0 : mx2 = fft_sizes%mx2
2958 0 : mz2 = fft_sizes%mz2
2959 0 : my3 = fft_sizes%my3
2960 0 : mz3 = fft_sizes%mz3
2961 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2962 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2963 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2964 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2965 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2966 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2967 0 : fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
2968 :
2969 0 : dim = fft_sizes%rs_group%num_pe_cart
2970 0 : pos = fft_sizes%rs_group%mepos_cart
2971 0 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2972 0 : fft_scratch_new%fft_scratch%dim = dim
2973 0 : fft_scratch_new%fft_scratch%pos = pos
2974 0 : mcz1 = fft_sizes%mcz1
2975 0 : mcx2 = fft_sizes%mcx2
2976 0 : mcz2 = fft_sizes%mcz2
2977 0 : mcy3 = fft_sizes%mcy3
2978 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
2979 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
2980 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
2981 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
2982 :
2983 0 : dims = (/.TRUE., .FALSE./)
2984 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2985 0 : dims = (/.FALSE., .TRUE./)
2986 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2987 :
2988 : !initialise pgcube
2989 0 : DO i = 0, DIM(1) - 1
2990 0 : coord = (/i, pos(2)/)
2991 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2992 : END DO
2993 0 : DO i = 0, DIM(2) - 1
2994 0 : coord = (/pos(1), i/)
2995 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
2996 : END DO
2997 :
2998 : !set up fft plans
2999 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3000 0 : fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
3001 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
3002 0 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3003 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
3004 0 : fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3005 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
3006 0 : fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3007 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
3008 0 : fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3009 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3010 0 : fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3011 :
3012 : CASE (101) ! fft3d_pb: full cube distribution (dim 1)
3013 8 : CPASSERT(PRESENT(fft_sizes))
3014 8 : mx1 = fft_sizes%mx1
3015 8 : my1 = fft_sizes%my1
3016 8 : mz1 = fft_sizes%mz1
3017 8 : my3 = fft_sizes%my3
3018 8 : mz3 = fft_sizes%mz3
3019 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3020 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3021 8 : fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3022 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3023 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3024 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3025 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3026 :
3027 24 : dim = fft_sizes%rs_group%num_pe_cart
3028 24 : pos = fft_sizes%rs_group%mepos_cart
3029 8 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3030 24 : fft_scratch_new%fft_scratch%dim = dim
3031 24 : fft_scratch_new%fft_scratch%pos = pos
3032 8 : mcy3 = fft_sizes%mcy3
3033 32 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
3034 32 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
3035 :
3036 : !set up fft plans
3037 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3038 8 : fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3039 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
3040 8 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3041 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
3042 8 : fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3043 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
3044 8 : fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3045 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
3046 8 : fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3047 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3048 8 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3049 :
3050 : CASE (200) ! fft3d_ps: plane distribution
3051 26530 : CPASSERT(PRESENT(fft_sizes))
3052 26530 : nx = fft_sizes%nx
3053 26530 : ny = fft_sizes%ny
3054 26530 : nz = fft_sizes%nz
3055 26530 : mx2 = fft_sizes%mx2
3056 26530 : lmax = fft_sizes%lmax
3057 26530 : mmax = fft_sizes%mmax
3058 26530 : lg = fft_sizes%lg
3059 26530 : mg = fft_sizes%mg
3060 26530 : np = fft_sizes%numtask
3061 26530 : nmray = fft_sizes%nmray
3062 26530 : nyzray = fft_sizes%nyzray
3063 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3064 : length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
3065 : ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
3066 : CPASSERT(ierr == 0)
3067 : CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/MAX(mmax, 1), MAX(lmax, 1)/))
3068 : length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
3069 : ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
3070 : CPASSERT(ierr == 0)
3071 : CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)/))
3072 : #else
3073 79590 : CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3074 106120 : CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3075 : #endif
3076 26530 : fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3077 79590 : CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3078 26530 : nm = nmray*mx2
3079 26530 : IF (alltoall_sgl) THEN
3080 32 : ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3081 32 : ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3082 : ELSE
3083 106088 : ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3084 : END IF
3085 :
3086 : !set up fft plans
3087 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
3088 26530 : fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3089 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
3090 26530 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3091 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
3092 26530 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
3093 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
3094 26530 : fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3095 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
3096 26530 : fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3097 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
3098 26530 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3099 :
3100 : CASE (300) ! fft3d_ps: block distribution
3101 0 : CPASSERT(PRESENT(fft_sizes))
3102 0 : mx1 = fft_sizes%mx1
3103 0 : mx2 = fft_sizes%mx2
3104 0 : my1 = fft_sizes%my1
3105 0 : mz2 = fft_sizes%mz2
3106 0 : mcx2 = fft_sizes%mcx2
3107 0 : lg = fft_sizes%lg
3108 0 : mg = fft_sizes%mg
3109 0 : nmax = fft_sizes%nmax
3110 0 : nmray = fft_sizes%nmray
3111 0 : nyzray = fft_sizes%nyzray
3112 0 : m1 = fft_sizes%r_dim(1)
3113 0 : m2 = fft_sizes%r_dim(2)
3114 0 : nbx = fft_sizes%nbx
3115 0 : nbz = fft_sizes%nbz
3116 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3117 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3118 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3119 : length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
3120 : ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
3121 : CPASSERT(ierr == 0)
3122 : CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/MAX(n(3), 1), MAX(mx1*my1, 1)/))
3123 : length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
3124 : ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
3125 : CPASSERT(ierr == 0)
3126 : CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/MAX(mx2*mz2, 1), MAX(n(2), 1)/))
3127 : length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
3128 : ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
3129 : CPASSERT(ierr == 0)
3130 : CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/MAX(n(2), 1), MAX(mx2*mz2, 1)/))
3131 : length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
3132 : ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
3133 : CPASSERT(ierr == 0)
3134 : CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/MAX(nyzray, 1), MAX(n(1), 1)/))
3135 : length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
3136 : ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
3137 : CPASSERT(ierr == 0)
3138 : CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/MAX(mg, 1), MAX(lg, 1)/))
3139 : #else
3140 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3141 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3142 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3143 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3144 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3145 : #endif
3146 0 : IF (alltoall_sgl) THEN
3147 0 : ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3148 0 : ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3149 : ELSE
3150 0 : ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3151 0 : ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3152 : END IF
3153 0 : ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3154 0 : ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3155 0 : ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3156 0 : ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3157 : ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3158 0 : fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3159 : ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3160 0 : fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3161 0 : fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
3162 :
3163 0 : dim = fft_sizes%rs_group%num_pe_cart
3164 0 : pos = fft_sizes%rs_group%mepos_cart
3165 0 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3166 0 : fft_scratch_new%fft_scratch%dim = dim
3167 0 : fft_scratch_new%fft_scratch%pos = pos
3168 0 : mcz1 = fft_sizes%mcz1
3169 0 : mcz2 = fft_sizes%mcz2
3170 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
3171 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
3172 :
3173 0 : dims = (/.FALSE., .TRUE./)
3174 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3175 :
3176 : !initialise pgcube
3177 0 : DO i = 0, DIM(2) - 1
3178 0 : coord = (/pos(1), i/)
3179 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3180 : END DO
3181 :
3182 : !initialise pgrid
3183 0 : DO ix = 0, m1 - 1
3184 0 : DO iz = 0, m2 - 1
3185 0 : coord = (/ix, iz/)
3186 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3187 : END DO
3188 : END DO
3189 :
3190 : !initialise pzcoord
3191 0 : DO i = 0, np - 1
3192 0 : CALL fft_sizes%rs_group%coords(i, pcoord)
3193 0 : fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3194 : END DO
3195 :
3196 : !set up fft plans
3197 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3198 0 : fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
3199 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
3200 0 : fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
3201 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
3202 0 : fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
3203 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
3204 0 : fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
3205 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
3206 0 : fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
3207 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3208 0 : fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
3209 :
3210 : CASE (400) ! serial FFT
3211 13243 : np = 0
3212 13243 : CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3213 13243 : CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3214 :
3215 : !in place plans
3216 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
3217 13243 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3218 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
3219 13243 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3220 : ! out of place plans
3221 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
3222 13243 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3223 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
3224 53024 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3225 :
3226 : END SELECT
3227 :
3228 39781 : NULLIFY (fft_scratch_new%fft_scratch_next)
3229 : fft_scratch_new%fft_scratch%fft_scratch_id = &
3230 39781 : fft_scratch_last%fft_scratch%fft_scratch_id + 1
3231 39781 : fft_scratch_new%fft_scratch%in_use = .TRUE.
3232 159124 : fft_scratch_new%fft_scratch%nfft = n
3233 39781 : fft_scratch_last%fft_scratch_next => fft_scratch_new
3234 39781 : fft_scratch_new%fft_scratch%tf_type = tf_type
3235 39781 : fft_scratch => fft_scratch_new%fft_scratch
3236 39781 : EXIT
3237 :
3238 : END IF
3239 : END DO
3240 :
3241 3298884 : !$OMP ATOMIC READ
3242 : fft_scratch%last_tick = tick_fft_pool
3243 :
3244 3298884 : CALL timestop(handle)
3245 :
3246 3298884 : END SUBROUTINE get_fft_scratch
3247 :
3248 : ! **************************************************************************************************
3249 : !> \brief ...
3250 : !> \param fft_scratch ...
3251 : ! **************************************************************************************************
3252 3298884 : SUBROUTINE release_fft_scratch(fft_scratch)
3253 :
3254 : TYPE(fft_scratch_type), POINTER :: fft_scratch
3255 :
3256 : INTEGER :: scratch_id
3257 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current
3258 :
3259 3298884 : scratch_id = fft_scratch%fft_scratch_id
3260 :
3261 3298884 : fft_scratch_current => fft_scratch_first
3262 7017234 : DO
3263 10316118 : IF (ASSOCIATED(fft_scratch_current)) THEN
3264 10316118 : IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
3265 3298884 : fft_scratch%in_use = .FALSE.
3266 3298884 : NULLIFY (fft_scratch)
3267 3298884 : EXIT
3268 : END IF
3269 7017234 : fft_scratch_current => fft_scratch_current%fft_scratch_next
3270 : ELSE
3271 : ! We cannot find the scratch type in this pool
3272 0 : CPABORT("Invalid scratch type.")
3273 0 : EXIT
3274 : END IF
3275 : END DO
3276 :
3277 3298884 : END SUBROUTINE release_fft_scratch
3278 :
3279 : ! **************************************************************************************************
3280 : !> \brief ...
3281 : !> \param rs ...
3282 : !> \param scount ...
3283 : !> \param sdispl ...
3284 : !> \param rq ...
3285 : !> \param rcount ...
3286 : !> \param rdispl ...
3287 : !> \param group ...
3288 : ! **************************************************************************************************
3289 0 : SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3290 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rs
3291 : INTEGER, DIMENSION(:), POINTER :: scount, sdispl
3292 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rq
3293 : INTEGER, DIMENSION(:), POINTER :: rcount, rdispl
3294 :
3295 : CLASS(mp_comm_type), INTENT(IN) :: group
3296 :
3297 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: msgin, msgout
3298 : INTEGER :: ip, n, nr, ns, pos
3299 0 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: rreq, sreq
3300 :
3301 0 : CALL group%sync()
3302 0 : n = group%num_pe
3303 0 : pos = group%mepos
3304 0 : ALLOCATE (sreq(0:n - 1))
3305 0 : ALLOCATE (rreq(0:n - 1))
3306 0 : nr = 0
3307 0 : DO ip = 0, n - 1
3308 0 : IF (rcount(ip) == 0) CYCLE
3309 0 : IF (ip == pos) CYCLE
3310 0 : msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3311 0 : CALL group%irecv(msgout, ip, rreq(nr))
3312 0 : nr = nr + 1
3313 : END DO
3314 0 : ns = 0
3315 0 : DO ip = 0, n - 1
3316 0 : IF (scount(ip) == 0) CYCLE
3317 0 : IF (ip == pos) CYCLE
3318 0 : msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3319 0 : CALL group%isend(msgin, ip, sreq(ns))
3320 0 : ns = ns + 1
3321 : END DO
3322 0 : IF (rcount(pos) /= 0) THEN
3323 0 : IF (rcount(pos) /= scount(pos)) CPABORT("Invalid count.")
3324 0 : rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3325 : END IF
3326 0 : CALL mp_waitall(sreq(0:ns - 1))
3327 0 : CALL mp_waitall(rreq(0:nr - 1))
3328 0 : DEALLOCATE (sreq)
3329 0 : DEALLOCATE (rreq)
3330 0 : CALL group%sync()
3331 :
3332 0 : END SUBROUTINE sparse_alltoall
3333 :
3334 : ! **************************************************************************************************
3335 : !> \brief test data structures for equality. It is assumed that if they are
3336 : !> different for one mpi task they are different for all (??)
3337 : !> \param fft_size_1 ...
3338 : !> \param fft_size_2 ...
3339 : !> \param equal ...
3340 : ! **************************************************************************************************
3341 2637200 : SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3342 : TYPE(fft_scratch_sizes) :: fft_size_1, fft_size_2
3343 : LOGICAL :: equal
3344 :
3345 : equal = .TRUE.
3346 :
3347 2637200 : equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3348 2637200 : equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3349 2637200 : equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3350 :
3351 2637200 : equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3352 2637200 : equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3353 2637200 : equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3354 :
3355 2637200 : equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3356 2637200 : equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3357 2637200 : equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3358 :
3359 2637200 : equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3360 2637200 : equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3361 2637200 : equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3362 :
3363 2637200 : equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3364 2637200 : equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3365 2637200 : equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3366 2637200 : equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3367 :
3368 2637200 : equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3369 2637200 : equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3370 :
3371 2637200 : equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3372 2637200 : equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3373 :
3374 2637200 : equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3375 2637200 : equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3376 :
3377 2637200 : equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3378 :
3379 7911600 : equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
3380 7911600 : equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
3381 7911600 : equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
3382 :
3383 2637200 : equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3384 :
3385 2637200 : END SUBROUTINE is_equal
3386 :
3387 0 : END MODULE fft_tools
|