Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief different utils that are useful to manipulate splines on the regular
10 : !> grid of a pw
11 : !> \par History
12 : !> 05.2003 created [fawzi]
13 : !> 08.2004 removed spline evaluation method using more than 2 read streams
14 : !> (pw_compose_stripe_rs3), added linear solver based spline
15 : !> inversion [fawzi]
16 : !> \author Fawzi Mohamed
17 : ! **************************************************************************************************
18 : MODULE pw_spline_utils
19 :
20 : USE cp_log_handling, ONLY: cp_get_default_logger,&
21 : cp_logger_type,&
22 : cp_to_string
23 : USE kinds, ONLY: dp
24 : USE mathconstants, ONLY: twopi
25 : USE message_passing, ONLY: mp_comm_congruent
26 : USE pw_grid_types, ONLY: FULLSPACE,&
27 : PW_MODE_LOCAL
28 : USE pw_methods, ONLY: pw_axpy,&
29 : pw_copy,&
30 : pw_integral_ab,&
31 : pw_zero
32 : USE pw_pool_types, ONLY: pw_pool_release,&
33 : pw_pool_type
34 : USE pw_types, ONLY: pw_c1d_gs_type,&
35 : pw_r3d_rs_type
36 : #include "../base/base_uses.f90"
37 :
38 : IMPLICIT NONE
39 : PRIVATE
40 :
41 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
42 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
43 :
44 : INTEGER, PARAMETER, PUBLIC :: no_precond = 0, &
45 : precond_spl3_aint = 1, &
46 : precond_spl3_1 = 2, &
47 : precond_spl3_aint2 = 3, &
48 : precond_spl3_2 = 4, &
49 : precond_spl3_3 = 5
50 :
51 : REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
52 : (/125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp/), &
53 : spline3_coeffs = &
54 : (/8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
55 : 1._dp/(27._dp*8._dp)/), &
56 : spline2_coeffs = &
57 : (/27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
58 : 1._dp/(64._dp*8._dp)/), &
59 : nn50_coeffs = &
60 : (/15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
61 : 1._dp/140608._dp/), &
62 : spl3_aint_coeff = &
63 : (/46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
64 : -1._dp/(27._dp*8._dp)/), &
65 : spl3_precond1_coeff = &
66 : (/64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp/), &
67 : spl3_1d_transf_coeffs = &
68 : (/2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp/)
69 :
70 : REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
71 : (/2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp/), &
72 : spline2_deriv_coeffs = &
73 : (/9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp/), &
74 : nn10_deriv_coeffs = &
75 : (/25._dp/72._dp, 5._dp/144, 1._dp/288._dp/), &
76 : nn50_deriv_coeffs = &
77 : (/625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp/), &
78 : spl3_1d_coeffs0 = &
79 : (/1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp/), &
80 : spl3_1d_transf_border1 = &
81 : (/0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp/)
82 :
83 : PUBLIC :: pw_spline3_interpolate_values_g, &
84 : pw_spline3_deriv_g
85 : PUBLIC :: pw_spline_scale_deriv
86 : PUBLIC :: pw_spline2_interpolate_values_g, &
87 : pw_spline2_deriv_g
88 : PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
89 : spl3_nopbc, spl3_pbc, spl3_nopbct
90 : PUBLIC :: add_fine2coarse, add_coarse2fine
91 : PUBLIC :: pw_spline_precond_create, &
92 : pw_spline_do_precond, &
93 : pw_spline_precond_set_kind, &
94 : find_coeffs, &
95 : pw_spline_precond_release, &
96 : pw_spline_precond_type, &
97 : Eval_Interp_Spl3_pbc, &
98 : Eval_d_Interp_Spl3_pbc
99 :
100 : !***
101 :
102 : ! **************************************************************************************************
103 : !> \brief stores information for the preconditioner used to calculate the
104 : !> coeffs of splines
105 : !> \author fawzi
106 : ! **************************************************************************************************
107 : TYPE pw_spline_precond_type
108 : INTEGER :: kind = no_precond
109 : REAL(kind=dp), DIMENSION(4) :: coeffs = 0.0_dp
110 : REAL(kind=dp), DIMENSION(3) :: coeffs_1d = 0.0_dp
111 : LOGICAL :: sharpen = .FALSE., normalize = .FALSE., pbc = .FALSE., transpose = .FALSE.
112 : TYPE(pw_pool_type), POINTER :: pool => NULL()
113 : END TYPE pw_spline_precond_type
114 :
115 : CONTAINS
116 :
117 : ! **************************************************************************************************
118 : !> \brief calculates the FFT of the coefficients of the quadratic spline that
119 : !> interpolates the given values
120 : !> \param spline_g on entry the FFT of the values to interpolate as cc,
121 : !> will contain the FFT of the coefficients of the spline
122 : !> \par History
123 : !> 06.2003 created [fawzi]
124 : !> \author Fawzi Mohamed
125 : !> \note
126 : !> does not work with spherical cutoff
127 : ! **************************************************************************************************
128 38532 : SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
129 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
130 :
131 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g'
132 :
133 : INTEGER :: handle, i, ii, j, k
134 : INTEGER, DIMENSION(2, 3) :: gbo
135 : INTEGER, DIMENSION(3) :: n_tot
136 : REAL(KIND=dp) :: c23, coeff
137 38532 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cosIVals, cosJVals, cosKVals
138 :
139 38532 : CALL timeset(routineN, handle)
140 :
141 154128 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
142 385320 : gbo = spline_g%pw_grid%bounds
143 :
144 38532 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
145 38532 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
146 :
147 0 : ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
148 269724 : cosKVals(gbo(1, 3):gbo(2, 3)))
149 :
150 38532 : coeff = twopi/n_tot(1)
151 38532 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
152 : DO i = gbo(1, 1), gbo(2, 1)
153 : cosIVals(i) = COS(coeff*REAL(i, dp))
154 : END DO
155 38532 : coeff = twopi/n_tot(2)
156 38532 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
157 : DO j = gbo(1, 2), gbo(2, 2)
158 : cosJVals(j) = COS(coeff*REAL(j, dp))
159 : END DO
160 38532 : coeff = twopi/n_tot(3)
161 38532 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
162 : DO k = gbo(1, 3), gbo(2, 3)
163 : cosKVals(k) = COS(coeff*REAL(k, dp))
164 : END DO
165 :
166 38532 : !$OMP PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
167 : DO ii = 1, SIZE(spline_g%array)
168 : i = spline_g%pw_grid%g_hat(1, ii)
169 : j = spline_g%pw_grid%g_hat(2, ii)
170 : k = spline_g%pw_grid%g_hat(3, ii)
171 :
172 : c23 = cosJVals(j)*cosKVals(k)
173 : coeff = 64.0_dp/(cosIVals(i)*c23 + &
174 : (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
175 : (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
176 : 27.0_dp)
177 :
178 : spline_g%array(ii) = spline_g%array(ii)*coeff
179 :
180 : END DO
181 38532 : DEALLOCATE (cosIVals, cosJVals, cosKVals)
182 :
183 38532 : CALL timestop(handle)
184 77064 : END SUBROUTINE pw_spline2_interpolate_values_g
185 :
186 : ! **************************************************************************************************
187 : !> \brief calculates the FFT of the coefficients of the2 cubic spline that
188 : !> interpolates the given values
189 : !> \param spline_g on entry the FFT of the values to interpolate as cc,
190 : !> will contain the FFT of the coefficients of the spline
191 : !> \par History
192 : !> 06.2003 created [fawzi]
193 : !> \author Fawzi Mohamed
194 : !> \note
195 : !> does not work with spherical cutoff
196 : !> stupid distribution for cos calculation, it should calculate only the
197 : !> needed cos, and avoid the mpi_allreduce
198 : ! **************************************************************************************************
199 396 : SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
200 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
201 :
202 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g'
203 :
204 : INTEGER :: handle, i, ii, j, k
205 : INTEGER, DIMENSION(2, 3) :: gbo
206 : INTEGER, DIMENSION(3) :: n_tot
207 : REAL(KIND=dp) :: c23, coeff
208 396 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cosIVals, cosJVals, cosKVals
209 :
210 396 : CALL timeset(routineN, handle)
211 :
212 1584 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
213 3960 : gbo = spline_g%pw_grid%bounds
214 :
215 396 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
216 396 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
217 :
218 0 : ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
219 0 : cosJVals(gbo(1, 2):gbo(2, 2)), &
220 2772 : cosKVals(gbo(1, 3):gbo(2, 3)))
221 :
222 396 : coeff = twopi/n_tot(1)
223 396 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
224 : DO i = gbo(1, 1), gbo(2, 1)
225 : cosIVals(i) = COS(coeff*REAL(i, dp))
226 : END DO
227 396 : coeff = twopi/n_tot(2)
228 396 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
229 : DO j = gbo(1, 2), gbo(2, 2)
230 : cosJVals(j) = COS(coeff*REAL(j, dp))
231 : END DO
232 396 : coeff = twopi/n_tot(3)
233 396 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
234 : DO k = gbo(1, 3), gbo(2, 3)
235 : cosKVals(k) = COS(coeff*REAL(k, dp))
236 : END DO
237 :
238 396 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
239 : DO ii = 1, SIZE(spline_g%array)
240 : i = spline_g%pw_grid%g_hat(1, ii)
241 : j = spline_g%pw_grid%g_hat(2, ii)
242 : k = spline_g%pw_grid%g_hat(3, ii)
243 : ! no opt
244 : !FM coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
245 : !FM (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
246 : !FM cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
247 : !FM (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
248 : !FM 8.0_dp/27.0_dp)
249 : ! opt
250 : c23 = cosJVals(j)*cosKVals(k)
251 : coeff = 27.0_dp/(cosIVals(i)*c23 + &
252 : (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
253 : (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
254 : 8.0_dp)
255 :
256 : spline_g%array(ii) = spline_g%array(ii)*coeff
257 :
258 : END DO
259 396 : DEALLOCATE (cosIVals, cosJVals, cosKVals)
260 :
261 396 : CALL timestop(handle)
262 792 : END SUBROUTINE pw_spline3_interpolate_values_g
263 :
264 : ! **************************************************************************************************
265 : !> \brief rescales the derivatives from gridspacing=1 to the real derivatives
266 : !> \param deriv_vals_r an array of x,y,z derivatives
267 : !> \param transpose if true applies the transpose of the map (defaults to
268 : !> false)
269 : !> \param scale a scaling factor (defaults to 1.0)
270 : !> \par History
271 : !> 06.2003 created [fawzi]
272 : !> \author Fawzi Mohamed
273 : ! **************************************************************************************************
274 18110 : SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
275 : TYPE(pw_r3d_rs_type), DIMENSION(3), INTENT(IN) :: deriv_vals_r
276 : LOGICAL, INTENT(in), OPTIONAL :: transpose
277 : REAL(KIND=dp), INTENT(in), OPTIONAL :: scale
278 :
279 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv'
280 :
281 : INTEGER :: handle, i, idir, j, k
282 : INTEGER, DIMENSION(2, 3) :: bo
283 : INTEGER, DIMENSION(3) :: n_tot
284 : LOGICAL :: diag, my_transpose
285 : REAL(KIND=dp) :: dVal1, dVal2, dVal3, my_scale, scalef
286 : REAL(KIND=dp), DIMENSION(3, 3) :: dh_inv, h_grid
287 18110 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: ddata, ddata2, ddata3
288 :
289 18110 : CALL timeset(routineN, handle)
290 :
291 18110 : my_transpose = .FALSE.
292 18110 : IF (PRESENT(transpose)) my_transpose = transpose
293 18110 : my_scale = 1.0_dp
294 18110 : IF (PRESENT(scale)) my_scale = scale
295 72440 : n_tot(1:3) = deriv_vals_r(1)%pw_grid%npts(1:3)
296 181100 : bo = deriv_vals_r(1)%pw_grid%bounds_local
297 235430 : dh_inv = deriv_vals_r(1)%pw_grid%dh_inv
298 :
299 : ! map grid to real derivative
300 18110 : diag = .TRUE.
301 18110 : IF (my_transpose) THEN
302 32848 : DO j = 1, 3
303 106756 : DO i = 1, 3
304 73908 : h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
305 98544 : IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
306 : END DO
307 : END DO
308 : ELSE
309 39592 : DO j = 1, 3
310 128674 : DO i = 1, 3
311 89082 : h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
312 118776 : IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
313 : END DO
314 : END DO
315 : END IF
316 :
317 18110 : IF (diag) THEN
318 71920 : DO idir = 1, 3
319 53940 : ddata => deriv_vals_r(idir)%array
320 53940 : scalef = h_grid(idir, idir)
321 : CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
322 71920 : scalef, ddata, 1)
323 : END DO
324 : ELSE
325 130 : ddata => deriv_vals_r(1)%array
326 130 : ddata2 => deriv_vals_r(2)%array
327 130 : ddata3 => deriv_vals_r(3)%array
328 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
329 130 : !$OMP SHARED(ddata,ddata2,ddata3,h_grid,bo)
330 : DO k = bo(1, 3), bo(2, 3)
331 : DO j = bo(1, 2), bo(2, 2)
332 : DO i = bo(1, 1), bo(2, 1)
333 :
334 : dVal1 = ddata(i, j, k)
335 : dVal2 = ddata2(i, j, k)
336 : dVal3 = ddata3(i, j, k)
337 :
338 : ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
339 : h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
340 : ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
341 : h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
342 : ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
343 : h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
344 :
345 : END DO
346 : END DO
347 : END DO
348 : END IF
349 :
350 18110 : CALL timestop(handle)
351 18110 : END SUBROUTINE pw_spline_scale_deriv
352 :
353 : ! **************************************************************************************************
354 : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
355 : !> derivative of the cubic spline
356 : !> \param spline_g on entry the FFT of the coefficients of the spline
357 : !> will contain the FFT of the derivative
358 : !> \param idir direction of the derivative
359 : !> \par History
360 : !> 06.2003 created [fawzi]
361 : !> \author Fawzi Mohamed
362 : !> \note
363 : !> the distance between gridpoints is assumed to be 1
364 : ! **************************************************************************************************
365 396 : SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
366 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
367 : INTEGER, INTENT(in) :: idir
368 :
369 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g'
370 : REAL(KIND=dp), PARAMETER :: inv9 = 1.0_dp/9.0_dp
371 :
372 : INTEGER :: handle, i, ii, j, k
373 : INTEGER, DIMENSION(2, 3) :: bo, gbo
374 : INTEGER, DIMENSION(3) :: n, n_tot
375 : REAL(KIND=dp) :: coeff, tmp
376 396 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: csIVals, csJVals, csKVals
377 :
378 396 : CALL timeset(routineN, handle)
379 :
380 1584 : n(1:3) = spline_g%pw_grid%npts_local(1:3)
381 1584 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
382 3960 : bo = spline_g%pw_grid%bounds_local
383 3960 : gbo = spline_g%pw_grid%bounds
384 :
385 396 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
386 396 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
387 :
388 0 : ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
389 0 : csJVals(gbo(1, 2):gbo(2, 2)), &
390 2772 : csKVals(gbo(1, 3):gbo(2, 3)))
391 :
392 396 : coeff = twopi/n_tot(1)
393 396 : IF (idir == 1) THEN
394 132 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
395 : DO i = gbo(1, 1), gbo(2, 1)
396 : csIVals(i) = SIN(coeff*REAL(i, dp))
397 : END DO
398 : ELSE
399 264 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
400 : DO i = gbo(1, 1), gbo(2, 1)
401 : csIVals(i) = COS(coeff*REAL(i, dp))
402 : END DO
403 : END IF
404 396 : coeff = twopi/n_tot(2)
405 396 : IF (idir == 2) THEN
406 132 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
407 : DO j = gbo(1, 2), gbo(2, 2)
408 : csJVals(j) = SIN(coeff*REAL(j, dp))
409 : END DO
410 : ELSE
411 264 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
412 : DO j = gbo(1, 2), gbo(2, 2)
413 : csJVals(j) = COS(coeff*REAL(j, dp))
414 : END DO
415 : END IF
416 396 : coeff = twopi/n_tot(3)
417 396 : IF (idir == 3) THEN
418 132 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
419 : DO k = gbo(1, 3), gbo(2, 3)
420 : csKVals(k) = SIN(coeff*REAL(k, dp))
421 : END DO
422 : ELSE
423 264 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
424 : DO k = gbo(1, 3), gbo(2, 3)
425 : csKVals(k) = COS(coeff*REAL(k, dp))
426 : END DO
427 : END IF
428 :
429 132 : SELECT CASE (idir)
430 : CASE (1)
431 : ! x deriv
432 132 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
433 : DO ii = 1, SIZE(spline_g%array)
434 : i = spline_g%pw_grid%g_hat(1, ii)
435 : j = spline_g%pw_grid%g_hat(2, ii)
436 : k = spline_g%pw_grid%g_hat(3, ii)
437 : !FM ! formula
438 : !FM coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
439 : !FM (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
440 : !FM sinVal(1)*4.0_dp/9.0_dp
441 : tmp = csIVals(i)*csJVals(j)
442 : coeff = (tmp*csKVals(k) + &
443 : (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
444 : csIVals(i)*4.0_dp)*inv9
445 :
446 : spline_g%array(ii) = spline_g%array(ii)* &
447 : CMPLX(0.0_dp, coeff, dp)
448 : END DO
449 : CASE (2)
450 : ! y deriv
451 132 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
452 : DO ii = 1, SIZE(spline_g%array)
453 : i = spline_g%pw_grid%g_hat(1, ii)
454 : j = spline_g%pw_grid%g_hat(2, ii)
455 : k = spline_g%pw_grid%g_hat(3, ii)
456 :
457 : tmp = csIVals(i)*csJVals(j)
458 : coeff = (tmp*csKVals(k) + &
459 : (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
460 : csJVals(j)*4.0_dp)*inv9
461 :
462 : spline_g%array(ii) = spline_g%array(ii)* &
463 : CMPLX(0.0_dp, coeff, dp)
464 : END DO
465 : CASE (3)
466 : ! z deriv
467 396 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
468 : DO ii = 1, SIZE(spline_g%array)
469 : i = spline_g%pw_grid%g_hat(1, ii)
470 : j = spline_g%pw_grid%g_hat(2, ii)
471 : k = spline_g%pw_grid%g_hat(3, ii)
472 :
473 : tmp = csIVals(i)*csKVals(k)
474 : coeff = (tmp*csJVals(j) + &
475 : (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
476 : csKVals(k)*4.0_dp)*inv9
477 :
478 : spline_g%array(ii) = spline_g%array(ii)* &
479 : CMPLX(0.0_dp, coeff, dp)
480 : END DO
481 : END SELECT
482 :
483 396 : DEALLOCATE (csIVals, csJVals, csKVals)
484 :
485 396 : CALL timestop(handle)
486 792 : END SUBROUTINE pw_spline3_deriv_g
487 :
488 : ! **************************************************************************************************
489 : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
490 : !> derivative of the quadratic spline
491 : !> \param spline_g on entry the FFT of the coefficients of the spline
492 : !> will contain the FFT of the derivative
493 : !> \param idir direction of the derivative
494 : !> \par History
495 : !> 06.2003 created [fawzi]
496 : !> \author Fawzi Mohamed
497 : !> \note
498 : !> the distance between gridpoints is assumed to be 1
499 : ! **************************************************************************************************
500 38532 : SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
501 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
502 : INTEGER, INTENT(in) :: idir
503 :
504 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g'
505 : REAL(KIND=dp), PARAMETER :: inv16 = 1.0_dp/16.0_dp
506 :
507 : INTEGER :: handle, i, ii, j, k
508 : INTEGER, DIMENSION(2, 3) :: bo
509 : INTEGER, DIMENSION(3) :: n, n_tot
510 : REAL(KIND=dp) :: coeff, tmp
511 38532 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: csIVals, csJVals, csKVals
512 :
513 38532 : CALL timeset(routineN, handle)
514 :
515 154128 : n(1:3) = spline_g%pw_grid%npts_local(1:3)
516 154128 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
517 385320 : bo = spline_g%pw_grid%bounds
518 :
519 38532 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
520 38532 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
521 :
522 0 : ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
523 269724 : csKVals(bo(1, 3):bo(2, 3)))
524 :
525 38532 : coeff = twopi/n_tot(1)
526 38532 : IF (idir == 1) THEN
527 12844 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
528 : DO i = bo(1, 1), bo(2, 1)
529 : csIVals(i) = SIN(coeff*REAL(i, dp))
530 : END DO
531 : ELSE
532 25688 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
533 : DO i = bo(1, 1), bo(2, 1)
534 : csIVals(i) = COS(coeff*REAL(i, dp))
535 : END DO
536 : END IF
537 38532 : coeff = twopi/n_tot(2)
538 38532 : IF (idir == 2) THEN
539 12844 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
540 : DO j = bo(1, 2), bo(2, 2)
541 : csJVals(j) = SIN(coeff*REAL(j, dp))
542 : END DO
543 : ELSE
544 25688 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
545 : DO j = bo(1, 2), bo(2, 2)
546 : csJVals(j) = COS(coeff*REAL(j, dp))
547 : END DO
548 : END IF
549 38532 : coeff = twopi/n_tot(3)
550 38532 : IF (idir == 3) THEN
551 12844 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
552 : DO k = bo(1, 3), bo(2, 3)
553 : csKVals(k) = SIN(coeff*REAL(k, dp))
554 : END DO
555 : ELSE
556 25688 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
557 : DO k = bo(1, 3), bo(2, 3)
558 : csKVals(k) = COS(coeff*REAL(k, dp))
559 : END DO
560 : END IF
561 :
562 12844 : SELECT CASE (idir)
563 : CASE (1)
564 : ! x deriv
565 12844 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
566 : DO ii = 1, SIZE(spline_g%array)
567 : i = spline_g%pw_grid%g_hat(1, ii)
568 : j = spline_g%pw_grid%g_hat(2, ii)
569 : k = spline_g%pw_grid%g_hat(3, ii)
570 : !FM ! formula
571 : !FM coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
572 : !FM (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
573 : !FM sinVal(1)*9.0_dp/16.0_dp
574 : tmp = csIVals(i)*csJVals(j)
575 : coeff = (tmp*csKVals(k) + &
576 : (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
577 : csIVals(i)*9.0_dp)*inv16
578 :
579 : spline_g%array(ii) = spline_g%array(ii)* &
580 : CMPLX(0.0_dp, coeff, dp)
581 : END DO
582 : CASE (2)
583 : ! y deriv
584 12844 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
585 : DO ii = 1, SIZE(spline_g%array)
586 : i = spline_g%pw_grid%g_hat(1, ii)
587 : j = spline_g%pw_grid%g_hat(2, ii)
588 : k = spline_g%pw_grid%g_hat(3, ii)
589 :
590 : tmp = csIVals(i)*csJVals(j)
591 : coeff = (tmp*csKVals(k) + &
592 : (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
593 : csJVals(j)*9.0_dp)*inv16
594 :
595 : spline_g%array(ii) = spline_g%array(ii)* &
596 : CMPLX(0.0_dp, coeff, dp)
597 : END DO
598 : CASE (3)
599 : ! z deriv
600 38532 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
601 : DO ii = 1, SIZE(spline_g%array)
602 : i = spline_g%pw_grid%g_hat(1, ii)
603 : j = spline_g%pw_grid%g_hat(2, ii)
604 : k = spline_g%pw_grid%g_hat(3, ii)
605 :
606 : tmp = csIVals(i)*csKVals(k)
607 : coeff = (tmp*csJVals(j) + &
608 : (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
609 : csKVals(k)*9.0_dp)*inv16
610 :
611 : spline_g%array(ii) = spline_g%array(ii)* &
612 : CMPLX(0.0_dp, coeff, dp)
613 : END DO
614 : END SELECT
615 :
616 38532 : DEALLOCATE (csIVals, csJVals, csKVals)
617 :
618 38532 : CALL timestop(handle)
619 77064 : END SUBROUTINE pw_spline2_deriv_g
620 :
621 : ! **************************************************************************************************
622 : !> \brief applies a nearest neighbor linear operator to a stripe in x direction:
623 : !> out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
624 : !> \param weights the weights of the linear operator
625 : !> \param in_val the argument to the operator
626 : !> \param in_val_first the first argument (needed to calculate out_val(1))
627 : !> \param in_val_last the last argument (needed to calculate out_val(n_el))
628 : !> \param out_val the place where the result is accumulated
629 : !> \param n_el the number of elements in in_v and out_v
630 : !> \par History
631 : !> 04.2004 created [fawzi]
632 : !> \author fawzi
633 : !> \note
634 : !> uses 2 read streams and 1 write stream
635 : ! **************************************************************************************************
636 637287616 : SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
637 : out_val, n_el)
638 : REAL(kind=dp), DIMENSION(0:2), INTENT(in) :: weights
639 : REAL(kind=dp), DIMENSION(*), INTENT(in) :: in_val
640 : REAL(kind=dp), INTENT(in) :: in_val_first, in_val_last
641 : REAL(kind=dp), DIMENSION(*), INTENT(inout) :: out_val
642 : INTEGER :: n_el
643 :
644 : INTEGER :: i
645 : REAL(kind=dp) :: v0, v1, v2
646 :
647 : !1:n_el), &
648 : !1:n_el), &
649 :
650 637287616 : IF (n_el < 1) RETURN
651 637287616 : v0 = in_val_first
652 637287616 : v1 = in_val(1)
653 637287616 : IF (weights(1) == 0.0_dp) THEN
654 : ! optimized version for x deriv
655 100241316 : DO i = 1, n_el - 3, 3
656 914503932 : v2 = in_val(i + 1)
657 : out_val(i) = out_val(i) + &
658 : weights(0)*v0 + &
659 914503932 : weights(2)*v2
660 914503932 : v0 = in_val(i + 2)
661 : out_val(i + 1) = out_val(i + 1) + &
662 : weights(0)*v1 + &
663 914503932 : weights(2)*v0
664 914503932 : v1 = in_val(i + 3)
665 : out_val(i + 2) = out_val(i + 2) + &
666 : weights(0)*v2 + &
667 914503932 : weights(2)*v1
668 : END DO
669 : ELSE
670 : ! generic version
671 537046300 : DO i = 1, n_el - 3, 3
672 4001791934 : v2 = in_val(i + 1)
673 : out_val(i) = out_val(i) + &
674 : weights(0)*v0 + &
675 : weights(1)*v1 + &
676 4001791934 : weights(2)*v2
677 4001791934 : v0 = in_val(i + 2)
678 : out_val(i + 1) = out_val(i + 1) + &
679 : weights(0)*v1 + &
680 : weights(1)*v2 + &
681 4001791934 : weights(2)*v0
682 4001791934 : v1 = in_val(i + 3)
683 : out_val(i + 2) = out_val(i + 2) + &
684 : weights(0)*v2 + &
685 : weights(1)*v0 + &
686 4006043984 : weights(2)*v1
687 : END DO
688 : END IF
689 850088172 : SELECT CASE (MODULO(n_el - 1, 3))
690 : CASE (0)
691 212800556 : v2 = in_val_last
692 : out_val(n_el) = out_val(n_el) + &
693 : weights(0)*v0 + &
694 : weights(1)*v1 + &
695 212800556 : weights(2)*v2
696 : CASE (1)
697 190169698 : v2 = in_val(n_el)
698 : out_val(n_el - 1) = out_val(n_el - 1) + &
699 : weights(0)*v0 + &
700 : weights(1)*v1 + &
701 190169698 : weights(2)*v2
702 190169698 : v0 = in_val_last
703 : out_val(n_el) = out_val(n_el) + &
704 : weights(0)*v1 + &
705 : weights(1)*v2 + &
706 190169698 : weights(2)*v0
707 : CASE (2)
708 234317362 : v2 = in_val(n_el - 1)
709 : out_val(n_el - 2) = out_val(n_el - 2) + &
710 : weights(0)*v0 + &
711 : weights(1)*v1 + &
712 234317362 : weights(2)*v2
713 234317362 : v0 = in_val(n_el)
714 : out_val(n_el - 1) = out_val(n_el - 1) + &
715 : weights(0)*v1 + &
716 : weights(1)*v2 + &
717 234317362 : weights(2)*v0
718 234317362 : v1 = in_val_last
719 : out_val(n_el) = out_val(n_el) + &
720 : weights(0)*v2 + &
721 : weights(1)*v0 + &
722 637287616 : weights(2)*v1
723 : END SELECT
724 :
725 : END SUBROUTINE pw_compose_stripe
726 :
727 : ! **************************************************************************************************
728 : !> \brief private routine that computes pw_nn_compose_r (it seems that without
729 : !> passing arrays in this way either some compiler do a copyin/out (xlf)
730 : !> or by inlining suboptimal code is produced (nag))
731 : !> \param weights a 3x3x3 array with the linear operator
732 : !> \param in_val the argument for the linear operator
733 : !> \param out_val place where the value of the linear oprator should be added
734 : !> \param pw_in pw to be able to get the needed meta data about in_val and
735 : !> out_val
736 : !> \param bo boundaries of in_val and out_val
737 : !> \author fawzi
738 : ! **************************************************************************************************
739 29892 : SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
740 : REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2) :: weights
741 : INTEGER, DIMENSION(2, 3) :: bo
742 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
743 : REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
744 : 2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout) :: out_val
745 : REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
746 : 2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in) :: in_val
747 :
748 : INTEGER :: i, j, jw, k, kw, myj, myk
749 : INTEGER, DIMENSION(2, 3) :: gbo
750 : INTEGER, DIMENSION(3) :: s
751 : LOGICAL :: has_boundary, yderiv, zderiv
752 : REAL(kind=dp) :: in_val_f, in_val_l
753 29892 : REAL(kind=dp), DIMENSION(:, :), POINTER :: l_boundary, tmp, u_boundary
754 :
755 91500 : zderiv = ALL(weights(:, :, 1) == 0.0_dp)
756 91500 : yderiv = ALL(weights(:, 1, :) == 0.0_dp)
757 298920 : bo = pw_in%pw_grid%bounds_local
758 298920 : gbo = pw_in%pw_grid%bounds
759 119568 : DO i = 1, 3
760 119568 : s(i) = bo(2, i) - bo(1, i) + 1
761 : END DO
762 119568 : IF (ANY(s < 1)) RETURN
763 : has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
764 46800 : pw_in%pw_grid%bounds(:, 1))
765 29892 : IF (has_boundary) THEN
766 : ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
767 : u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
768 228672 : tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
769 53905904 : tmp(:, :) = pw_in%array(bo(2, 1), :, :)
770 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
771 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
772 : l_boundary, pw_in%pw_grid%para%pos_of_x( &
773 107783224 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
774 53905904 : tmp(:, :) = pw_in%array(bo(1, 1), :, :)
775 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
776 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
777 : u_boundary, pw_in%pw_grid%para%pos_of_x( &
778 107783224 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
779 28584 : DEALLOCATE (tmp)
780 : END IF
781 :
782 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
783 : !$OMP in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
784 29892 : !$OMP u_boundary,weights,has_boundary)
785 : DO k = 0, s(3) - 1
786 : DO kw = 0, 2
787 : myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
788 : IF (zderiv .AND. kw == 1) CYCLE
789 : DO j = 0, s(2) - 1
790 : DO jw = 0, 2
791 : myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
792 : IF (yderiv .AND. jw == 1) CYCLE
793 : IF (has_boundary) THEN
794 : in_val_f = l_boundary(myj, myk)
795 : in_val_l = u_boundary(myj, myk)
796 : ELSE
797 : in_val_f = in_val(bo(2, 1), myj, myk)
798 : in_val_l = in_val(bo(1, 1), myj, myk)
799 : END IF
800 : CALL pw_compose_stripe(weights=weights(:, jw, kw), &
801 : in_val=in_val(:, myj, myk), &
802 : in_val_first=in_val_f, in_val_last=in_val_l, &
803 : out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
804 : END DO
805 : END DO
806 : END DO
807 : END DO
808 29892 : IF (has_boundary) THEN
809 28584 : DEALLOCATE (l_boundary, u_boundary)
810 : END IF
811 29892 : END SUBROUTINE pw_nn_compose_r_work
812 :
813 : ! **************************************************************************************************
814 : !> \brief applies a nearest neighbor linear operator to a pw in real space
815 : !> \param weights a 3x3x3 array with the linear operator
816 : !> \param pw_in the argument for the linear operator
817 : !> \param pw_out place where the value of the linear oprator should be added
818 : !> \author fawzi
819 : !> \note
820 : !> has specialized versions for derivative operator (with central values==0)
821 : ! **************************************************************************************************
822 29892 : SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
823 : REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2) :: weights
824 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
825 :
826 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_compose_r'
827 :
828 : INTEGER :: handle
829 :
830 29892 : CALL timeset(routineN, handle)
831 209244 : IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
832 0 : CPABORT("wrong pw distribution")
833 : END IF
834 : CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%array, &
835 29892 : out_val=pw_out%array, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
836 29892 : CALL timestop(handle)
837 29892 : END SUBROUTINE pw_nn_compose_r
838 :
839 : ! **************************************************************************************************
840 : !> \brief calculates the values of a nearest neighbor smearing
841 : !> \param pw_in the argument for the linear operator
842 : !> \param pw_out place where the smeared values should be added
843 : !> \param coeffs array with the coefficent of the smearing, ordered with
844 : !> the distance from the center: coeffs(1) the coeff of the central
845 : !> element, coeffs(2) the coeff of the 6 element with distance 1,
846 : !> coeff(3) the coeff of the 12 elements at distance sqrt(2),
847 : !> coeff(4) the coeff of the 8 elements at distance sqrt(3).
848 : !> \author Fawzi Mohamed
849 : !> \note
850 : !> does not normalize the smear to 1.
851 : !> with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
852 : !> is equivalent to pw_spline3_evaluate_values_g, with
853 : !> coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
854 : ! **************************************************************************************************
855 14490 : SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
856 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
857 : REAL(KIND=dp), DIMENSION(4), INTENT(in) :: coeffs
858 :
859 : INTEGER :: i, j, k
860 : REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1) :: weights
861 :
862 57960 : DO k = -1, 1
863 188370 : DO j = -1, 1
864 565110 : DO i = -1, 1
865 521640 : weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
866 : END DO
867 : END DO
868 : END DO
869 :
870 14490 : CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
871 14490 : END SUBROUTINE pw_nn_smear_r
872 :
873 : ! **************************************************************************************************
874 : !> \brief calculates a nearest neighbor central derivative.
875 : !> for the x dir:
876 : !> pw_out%array(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
877 : !> ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
878 : !> pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
879 : !> ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
880 : !> pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
881 : !> periodic boundary conditions are applied
882 : !> \param pw_in the argument for the linear operator
883 : !> \param pw_out place where the smeared values should be added
884 : !> \param coeffs array with the coefficent of the front (positive) plane
885 : !> of the central derivative, ordered with
886 : !> the distance from the center: coeffs(1) the coeff of the central
887 : !> element, coeffs(2) the coeff of the 4 element with distance 1,
888 : !> coeff(3) the coeff of the 4 elements at distance sqrt(2)
889 : !> \param idir ...
890 : !> \author Fawzi Mohamed
891 : !> \note
892 : !> with coeff=(/ 2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
893 : !> is equivalent to pw_spline3_deriv_r, with
894 : !> coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
895 : !> to pw_spline2_deriv_r
896 : !> coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
897 : ! **************************************************************************************************
898 15402 : SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
899 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
900 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: coeffs
901 : INTEGER :: idir
902 :
903 : INTEGER :: i, idirVal, j, k
904 : REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1) :: weights
905 :
906 61608 : DO k = -1, 1
907 200226 : DO j = -1, 1
908 600678 : DO i = -1, 1
909 415854 : SELECT CASE (idir)
910 : CASE (1)
911 138618 : idirVal = i
912 : CASE (2)
913 138618 : idirVal = j
914 : CASE (3)
915 138618 : idirVal = k
916 : CASE default
917 415854 : CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
918 : END SELECT
919 554472 : IF (idirVal == 0) THEN
920 138618 : weights(i, j, k) = 0.0_dp
921 : ELSE
922 277236 : weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
923 : END IF
924 : END DO
925 : END DO
926 : END DO
927 :
928 15402 : CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
929 15402 : END SUBROUTINE pw_nn_deriv_r
930 :
931 : ! **************************************************************************************************
932 : !> \brief low level function that adds a coarse grid
933 : !> to a fine grid.
934 : !> If pbc is true periodic boundary conditions are applied
935 : !>
936 : !> It will add to
937 : !>
938 : !> fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
939 : !> 2*coarse_bounds(1,2):2*coarse_bounds(2,2),
940 : !> 2*coarse_bounds(1,3):2*coarse_bounds(2,3))
941 : !>
942 : !> using
943 : !>
944 : !> coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
945 : !> coarse_bounds(1,2):coarse_bounds(2,2),
946 : !> coarse_bounds(1,3):coarse_bounds(2,3))
947 : !>
948 : !> composed with the weights obtained by the direct product of the
949 : !> 1d coefficients weights:
950 : !>
951 : !> for i,j,k in -3..3
952 : !> w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
953 : !> weights_1d(abs(k)+1)
954 : !> \param coarse_coeffs_pw the values of the coefficients
955 : !> \param fine_values_pw where to add the values due to the
956 : !> coarse coeffs
957 : !> \param weights_1d the weights of the 1d smearing
958 : !> \param w_border0 the 1d weight at the border (when pbc is false)
959 : !> \param w_border1 the 1d weights for a point one off the border
960 : !> (w_border1(1) is the weight of the coefficent at the border)
961 : !> (used if pbc is false)
962 : !> \param pbc if periodic boundary conditions should be applied
963 : !> \param safe_computation ...
964 : !> \author fawzi
965 : !> \note
966 : !> coarse looping is continuos, I did not check if keeping the fine looping
967 : !> contiguous is better.
968 : !> And I ask myself yet again why, why we use x-slice distribution,
969 : !> z-slice distribution would be much better performancewise
970 : !> (and would semplify this code enormously).
971 : !> fine2coarse has much more understandable parallel part (build up of
972 : !> send/rcv sizes,... but worse if you have really a lot of processors,
973 : !> probabily irrelevant because it is not critical) [fawzi].
974 : ! **************************************************************************************************
975 2260 : SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
976 : weights_1d, w_border0, w_border1, pbc, safe_computation)
977 : TYPE(pw_r3d_rs_type), INTENT(IN) :: coarse_coeffs_pw, fine_values_pw
978 : REAL(kind=dp), DIMENSION(4), INTENT(in) :: weights_1d
979 : REAL(kind=dp), INTENT(in) :: w_border0
980 : REAL(kind=dp), DIMENSION(3), INTENT(in) :: w_border1
981 : LOGICAL, INTENT(in) :: pbc
982 : LOGICAL, INTENT(in), OPTIONAL :: safe_computation
983 :
984 : CHARACTER(len=*), PARAMETER :: routineN = 'add_coarse2fine'
985 :
986 : INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
987 : ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
988 : s(3), send_tot_size, sf, shift, ss, x, x_att, xx
989 2260 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcv_offset, rcv_size, real_rcv_size, &
990 2260 : send_offset, send_size, sent_size
991 : INTEGER, DIMENSION(2, 3) :: coarse_bo, coarse_gbo, fine_bo, &
992 : fine_gbo, my_coarse_bo
993 2260 : INTEGER, DIMENSION(:), POINTER :: pos_of_x
994 : LOGICAL :: has_i_lbound, has_i_ubound, is_split, &
995 : safe_calc
996 : REAL(kind=dp) :: v0, v1, v2, v3, wi, wj, wk
997 2260 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: rcv_buf, send_buf
998 : REAL(kind=dp), DIMENSION(3) :: w_0, ww0
999 : REAL(kind=dp), DIMENSION(4) :: w_1, ww1
1000 2260 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: coarse_coeffs, fine_values
1001 :
1002 2260 : CALL timeset(routineN, handle)
1003 : ! CALL timeset(routineN//"_pre",handle2)
1004 2260 : safe_calc = .FALSE.
1005 2260 : IF (PRESENT(safe_computation)) safe_calc = safe_computation
1006 2260 : ii = coarse_coeffs_pw%pw_grid%para%group%compare(fine_values_pw%pw_grid%para%group)
1007 2260 : IF (ii > mp_comm_congruent) THEN
1008 0 : CPABORT("")
1009 : END IF
1010 22600 : my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1011 22600 : coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1012 22600 : fine_bo = fine_values_pw%pw_grid%bounds_local
1013 22600 : fine_gbo = fine_values_pw%pw_grid%bounds
1014 9040 : f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1015 6780 : DO j = 2, 3
1016 15820 : DO i = 1, 2
1017 13560 : coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
1018 : END DO
1019 : END DO
1020 2260 : IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1021 2260 : coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
1022 2260 : coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
1023 : ELSE
1024 0 : coarse_bo(1, 1) = coarse_gbo(2, 1)
1025 0 : coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1026 : END IF
1027 6744 : is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1028 2260 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
1029 2260 : coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1030 2260 : coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1031 : END IF
1032 2260 : has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1033 2260 : has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1034 :
1035 2260 : IF (pbc) THEN
1036 0 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1037 0 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
1038 : ELSE
1039 9040 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1040 9040 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1041 : END IF
1042 :
1043 2260 : coarse_coeffs => coarse_coeffs_pw%array
1044 9040 : DO i = 1, 3
1045 9040 : s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1046 : END DO
1047 : ! CALL timestop(handle2)
1048 : ! *** parallel case
1049 2260 : IF (is_split) THEN
1050 24 : CALL timeset(routineN//"_comm", handle2)
1051 : coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
1052 24 : (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
1053 24 : n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
1054 : ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
1055 : sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
1056 312 : rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
1057 :
1058 : ! ** rcv size count
1059 :
1060 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1061 : p_old = pos_of_x(coarse_gbo(1, 1) &
1062 24 : + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
1063 72 : rcv_size = 0
1064 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
1065 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1066 184 : rcv_size(p) = rcv_size(p) + coarse_slice_size
1067 : END DO
1068 :
1069 : ! ** send size count
1070 :
1071 24 : pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
1072 24 : sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
1073 24 : fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
1074 24 : fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
1075 24 : IF (.NOT. pbc) THEN
1076 24 : fi_lb = MAX(fi_lb, fine_gbo(1, 1))
1077 24 : fi_ub = MIN(fi_ub, fine_gbo(2, 1))
1078 : ELSE
1079 0 : fi_ub = MIN(fi_ub, fi_lb + sf - 1)
1080 : END IF
1081 24 : p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1082 24 : p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1083 72 : send_size = 0
1084 320 : DO x = fi_lb, fi_ub
1085 296 : p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1086 320 : IF (p /= p_old) THEN
1087 24 : p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
1088 :
1089 : send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1090 24 : - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1091 :
1092 24 : IF (pbc) THEN
1093 0 : DO xx = p_lb, coarse_gbo(1, 1) - 1
1094 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1095 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1096 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1097 : END IF
1098 : END DO
1099 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub
1100 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1101 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1102 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1103 : END IF
1104 : END DO
1105 : END IF
1106 :
1107 24 : p_old = p
1108 24 : p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1109 : END IF
1110 : END DO
1111 24 : p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1112 :
1113 : send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1114 24 : - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1115 :
1116 24 : IF (pbc) THEN
1117 0 : DO xx = p_lb, coarse_gbo(1, 1) - 1
1118 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1119 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1120 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1121 : END IF
1122 : END DO
1123 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub
1124 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1125 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1126 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1127 : END IF
1128 : END DO
1129 : END IF
1130 : ! ** offsets & alloc send-rcv
1131 :
1132 : send_tot_size = 0
1133 72 : DO ip = 0, n_procs - 1
1134 48 : send_offset(ip) = send_tot_size
1135 72 : send_tot_size = send_tot_size + send_size(ip)
1136 : END DO
1137 72 : ALLOCATE (send_buf(0:send_tot_size - 1))
1138 :
1139 24 : rcv_tot_size = 0
1140 72 : DO ip = 0, n_procs - 1
1141 48 : rcv_offset(ip) = rcv_tot_size
1142 72 : rcv_tot_size = rcv_tot_size + rcv_size(ip)
1143 : END DO
1144 24 : IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
1145 0 : CPABORT("Error calculating rcv_tot_size ")
1146 : END IF
1147 72 : ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
1148 :
1149 : ! ** fill send buffer
1150 :
1151 24 : p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1152 24 : p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1153 72 : sent_size(:) = send_offset
1154 24 : ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
1155 320 : DO x = fi_lb, fi_ub
1156 296 : p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1157 320 : IF (p /= p_old) THEN
1158 : shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1159 24 : FLOOR((x - 1 - f_shift(1))/2._dp)
1160 24 : p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
1161 :
1162 24 : IF (pbc) THEN
1163 0 : DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1164 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
1165 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1166 : CALL dcopy(coarse_slice_size, &
1167 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1168 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1169 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1170 : END IF
1171 : END DO
1172 : END IF
1173 :
1174 24 : ii = sent_size(p_old)
1175 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1176 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1177 17312 : DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1178 13904 : send_buf(ii) = coarse_coeffs(i, j, k)
1179 17064 : ii = ii + 1
1180 : END DO
1181 : END DO
1182 : END DO
1183 24 : sent_size(p_old) = ii
1184 :
1185 24 : IF (pbc) THEN
1186 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1187 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1188 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1189 : CALL dcopy(coarse_slice_size, &
1190 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1191 : my_coarse_bo(1, 3)), ss, &
1192 0 : send_buf(sent_size(p_old)), 1)
1193 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1194 : END IF
1195 : END DO
1196 : END IF
1197 :
1198 24 : p_old = p
1199 24 : p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1200 : END IF
1201 : END DO
1202 : shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1203 24 : FLOOR((x - 1 - f_shift(1))/2._dp)
1204 24 : p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1205 :
1206 24 : IF (pbc) THEN
1207 0 : DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1208 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1209 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1210 : CALL dcopy(coarse_slice_size, &
1211 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1212 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1213 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1214 : END IF
1215 : END DO
1216 : END IF
1217 :
1218 24 : ii = sent_size(p_old)
1219 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1220 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1221 17312 : DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1222 13904 : send_buf(ii) = coarse_coeffs(i, j, k)
1223 17064 : ii = ii + 1
1224 : END DO
1225 : END DO
1226 : END DO
1227 24 : sent_size(p_old) = ii
1228 :
1229 24 : IF (pbc) THEN
1230 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1231 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1232 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1233 : CALL dcopy(coarse_slice_size, &
1234 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1235 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1236 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1237 : END IF
1238 : END DO
1239 : END IF
1240 :
1241 48 : CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
1242 24 : CPASSERT(sent_size(n_procs - 1) == send_tot_size)
1243 : ! test send/rcv sizes
1244 24 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
1245 72 : CPASSERT(ALL(real_rcv_size == rcv_size))
1246 : ! all2all
1247 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
1248 24 : rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
1249 :
1250 : ! ** reorder rcv buffer
1251 : ! (actually reordering should be needed only with pbc)
1252 :
1253 : ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1254 : coarse_bo(1, 2):coarse_bo(2, 2), &
1255 120 : coarse_bo(1, 3):coarse_bo(2, 3)))
1256 :
1257 24 : my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1258 24 : my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1259 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1260 72 : sent_size(:) = rcv_offset
1261 24 : ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
1262 24 : DO x = my_ub + 1, coarse_bo(2, 1)
1263 0 : p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1264 : CALL dcopy(coarse_slice_size, &
1265 : rcv_buf(sent_size(p_old)), 1, &
1266 : coarse_coeffs(x, coarse_bo(1, 2), &
1267 0 : coarse_bo(1, 3)), ss)
1268 24 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1269 : END DO
1270 : p_old = pos_of_x(coarse_gbo(1, 1) &
1271 24 : + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
1272 24 : p_lb = my_lb
1273 184 : DO x = my_lb, my_ub
1274 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1275 160 : IF (p /= p_old) THEN
1276 24 : p_ub = x - 1
1277 :
1278 24 : ii = sent_size(p_old)
1279 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1280 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1281 15732 : DO i = p_lb, p_ub
1282 12324 : coarse_coeffs(i, j, k) = rcv_buf(ii)
1283 15484 : ii = ii + 1
1284 : END DO
1285 : END DO
1286 : END DO
1287 24 : sent_size(p_old) = ii
1288 :
1289 24 : p_lb = x
1290 24 : p_old = p
1291 : END IF
1292 184 : rcv_size(p) = rcv_size(p) + coarse_slice_size
1293 : END DO
1294 24 : p_ub = my_ub
1295 24 : ii = sent_size(p_old)
1296 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1297 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1298 18892 : DO i = p_lb, p_ub
1299 15484 : coarse_coeffs(i, j, k) = rcv_buf(ii)
1300 18644 : ii = ii + 1
1301 : END DO
1302 : END DO
1303 : END DO
1304 24 : sent_size(p_old) = ii
1305 24 : DO x = coarse_bo(1, 1), my_lb - 1
1306 0 : p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1307 : CALL dcopy(coarse_slice_size, &
1308 : rcv_buf(sent_size(p_old)), 1, &
1309 : coarse_coeffs(x, coarse_bo(1, 2), &
1310 0 : coarse_bo(1, 3)), ss)
1311 24 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1312 : END DO
1313 :
1314 48 : CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
1315 24 : CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
1316 :
1317 : ! dealloc
1318 24 : DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
1319 24 : DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
1320 48 : CALL timestop(handle2)
1321 :
1322 : END IF
1323 2260 : fine_values => fine_values_pw%array
1324 9040 : w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
1325 11300 : w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
1326 :
1327 30388 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1328 227284 : DO ik = -3, 3
1329 196896 : IF (pbc) THEN
1330 0 : wk = weights_1d(ABS(ik) + 1)
1331 0 : fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1332 : ELSE
1333 196896 : fk = 2*k + ik + f_shift(3)
1334 196896 : IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1335 40680 : IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1336 22600 : IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1337 9040 : IF (ik /= 0) CYCLE
1338 4520 : wk = w_border0
1339 13560 : ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
1340 2260 : SELECT CASE (ik)
1341 : CASE (1)
1342 2260 : wk = w_border1(1)
1343 : CASE (-1)
1344 2260 : wk = w_border1(2)
1345 : CASE (-3)
1346 2260 : wk = w_border1(3)
1347 : CASE default
1348 0 : CPABORT("")
1349 6780 : CYCLE
1350 : END SELECT
1351 : ELSE
1352 2260 : SELECT CASE (ik)
1353 : CASE (3)
1354 2260 : wk = w_border1(3)
1355 : CASE (1)
1356 2260 : wk = w_border1(2)
1357 : CASE (-1)
1358 2260 : wk = w_border1(1)
1359 : CASE default
1360 0 : CPABORT("")
1361 6780 : CYCLE
1362 : END SELECT
1363 : END IF
1364 : ELSE
1365 156216 : wk = weights_1d(ABS(ik) + 1)
1366 : END IF
1367 : END IF
1368 3150076 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1369 23778112 : DO ij = -3, 3
1370 20633564 : IF (pbc) THEN
1371 0 : wj = weights_1d(ABS(ij) + 1)*wk
1372 0 : fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
1373 : ELSE
1374 20633564 : fj = 2*j + ij + f_shift(2)
1375 20633564 : IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1376 3137328 : IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1377 1742960 : IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1378 697184 : IF (ij /= 0) CYCLE
1379 348592 : wj = w_border0*wk
1380 1045776 : ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
1381 174296 : SELECT CASE (ij)
1382 : CASE (1)
1383 174296 : wj = w_border1(1)*wk
1384 : CASE (-1)
1385 174296 : wj = w_border1(2)*wk
1386 : CASE (-3)
1387 174296 : wj = w_border1(3)*wk
1388 : CASE default
1389 522888 : CYCLE
1390 : END SELECT
1391 : ELSE
1392 174296 : SELECT CASE (ij)
1393 : CASE (-1)
1394 174296 : wj = w_border1(1)*wk
1395 : CASE (1)
1396 174296 : wj = w_border1(2)*wk
1397 : CASE (3)
1398 174296 : wj = w_border1(3)*wk
1399 : CASE default
1400 522888 : CYCLE
1401 : END SELECT
1402 : END IF
1403 : ELSE
1404 17496236 : wj = weights_1d(ABS(ij) + 1)*wk
1405 : END IF
1406 : END IF
1407 :
1408 21838256 : IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
1409 : ! CALL timeset(routineN//"_safe",handle2)
1410 25000 : DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1411 165000 : DO ii = -3, 3
1412 140000 : IF (pbc .AND. .NOT. is_split) THEN
1413 0 : wi = weights_1d(ABS(ii) + 1)*wj
1414 0 : fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1415 : ELSE
1416 140000 : fi = 2*i + ii + f_shift(1)
1417 140000 : IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1418 67500 : IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
1419 : fi >= fine_gbo(2, 1) - 1)) THEN
1420 25000 : IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1421 10000 : IF (ii /= 0) CYCLE
1422 5000 : wi = w_border0*wj
1423 15000 : ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1424 2500 : SELECT CASE (ii)
1425 : CASE (1)
1426 2500 : wi = w_border1(1)*wj
1427 : CASE (-1)
1428 2500 : wi = w_border1(2)*wj
1429 : CASE (-3)
1430 2500 : wi = w_border1(3)*wj
1431 : CASE default
1432 7500 : CYCLE
1433 : END SELECT
1434 : ELSE
1435 2500 : SELECT CASE (ii)
1436 : CASE (-1)
1437 2500 : wi = w_border1(1)*wj
1438 : CASE (1)
1439 2500 : wi = w_border1(2)*wj
1440 : CASE (3)
1441 2500 : wi = w_border1(3)*wj
1442 : CASE default
1443 7500 : CYCLE
1444 : END SELECT
1445 : END IF
1446 : ELSE
1447 42500 : wi = weights_1d(ABS(ii) + 1)*wj
1448 : END IF
1449 : END IF
1450 : fine_values(fi, fj, fk) = &
1451 : fine_values(fi, fj, fk) + &
1452 160000 : wi*coarse_coeffs(i, j, k)
1453 : END DO
1454 : END DO
1455 : ! CALL timestop(handle2)
1456 : ELSE
1457 : ! CALL timeset(routineN//"_core1",handle2)
1458 75542416 : ww0 = wj*w_0
1459 94428020 : ww1 = wj*w_1
1460 18885604 : IF (pbc .AND. .NOT. is_split) THEN
1461 0 : v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
1462 0 : i = coarse_bo(1, 1)
1463 0 : fi = 2*i + f_shift(1)
1464 0 : v0 = coarse_coeffs(i, j, k)
1465 0 : v1 = coarse_coeffs(i + 1, j, k)
1466 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1467 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1468 0 : v2 = coarse_coeffs(i + 2, j, k)
1469 0 : fi = fi + 1
1470 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1471 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1472 18885604 : ELSE IF (.NOT. has_i_lbound) THEN
1473 18826844 : i = coarse_bo(1, 1)
1474 18826844 : fi = 2*i + f_shift(1)
1475 18826844 : v0 = coarse_coeffs(i, j, k)
1476 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1477 18826844 : w_border0*wj*v0
1478 18826844 : v1 = coarse_coeffs(i + 1, j, k)
1479 18826844 : v2 = coarse_coeffs(i + 2, j, k)
1480 18826844 : fi = fi + 1
1481 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1482 : wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
1483 18826844 : w_border1(3)*v2)
1484 : ELSE
1485 58760 : i = coarse_bo(1, 1)
1486 58760 : v0 = coarse_coeffs(i, j, k)
1487 58760 : v1 = coarse_coeffs(i + 1, j, k)
1488 58760 : v2 = coarse_coeffs(i + 2, j, k)
1489 58760 : fi = 2*i + f_shift(1) + 1
1490 58760 : IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
1491 : fi + 2 == fine_bo(1, 1))) THEN
1492 : CALL cp_abort(__LOCATION__, &
1493 : "unexpected start index "// &
1494 : TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
1495 0 : TRIM(cp_to_string(fi)))
1496 : END IF
1497 : END IF
1498 18885604 : fi = fi + 1
1499 18885604 : IF (fi >= fine_bo(1, 1)) THEN
1500 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1501 : ww0(1)*v0 + ww0(2)*v1 + &
1502 18885604 : ww0(3)*v2
1503 : ELSE
1504 0 : CPASSERT(fi + 1 == fine_bo(1, 1))
1505 : END IF
1506 : ! CALL timestop(handle2)
1507 : ! CALL timeset(routineN//"_core",handle2)
1508 18885604 : DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
1509 84267688 : v3 = coarse_coeffs(i, j, k)
1510 84267688 : fi = fi + 1
1511 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1512 : (ww1(1)*v0 + ww1(2)*v1 + &
1513 84267688 : ww1(3)*v2 + ww1(4)*v3)
1514 84267688 : fi = fi + 1
1515 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1516 : (ww0(1)*v1 + ww0(2)*v2 + &
1517 84267688 : ww0(3)*v3)
1518 84267688 : v0 = coarse_coeffs(i + 1, j, k)
1519 84267688 : fi = fi + 1
1520 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1521 : (ww1(4)*v0 + ww1(1)*v1 + &
1522 84267688 : ww1(2)*v2 + ww1(3)*v3)
1523 84267688 : fi = fi + 1
1524 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1525 : (ww0(1)*v2 + ww0(2)*v3 + &
1526 84267688 : ww0(3)*v0)
1527 84267688 : v1 = coarse_coeffs(i + 2, j, k)
1528 84267688 : fi = fi + 1
1529 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1530 : (ww1(3)*v0 + ww1(4)*v1 + &
1531 84267688 : ww1(1)*v2 + ww1(2)*v3)
1532 84267688 : fi = fi + 1
1533 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1534 : (ww0(1)*v3 + ww0(2)*v0 + &
1535 84267688 : ww0(3)*v1)
1536 84267688 : v2 = coarse_coeffs(i + 3, j, k)
1537 84267688 : fi = fi + 1
1538 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1539 : (ww1(2)*v0 + ww1(3)*v1 + &
1540 84267688 : ww1(4)*v2 + ww1(1)*v3)
1541 84267688 : fi = fi + 1
1542 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1543 : (ww0(1)*v0 + ww0(2)*v1 + &
1544 84543910 : ww0(3)*v2)
1545 : END DO
1546 : ! CALL timestop(handle2)
1547 : ! CALL timeset(routineN//"_clean",handle2)
1548 18885604 : rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
1549 18885604 : IF (rest_b > 0) THEN
1550 18508720 : i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
1551 18508720 : v3 = coarse_coeffs(i, j, k)
1552 18508720 : fi = fi + 1
1553 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1554 : (ww1(1)*v0 + ww1(2)*v1 + &
1555 18508720 : ww1(3)*v2 + ww1(4)*v3)
1556 18508720 : fi = fi + 1
1557 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1558 : (ww0(1)*v1 + ww0(2)*v2 + &
1559 18508720 : ww0(3)*v3)
1560 18508720 : IF (rest_b > 1) THEN
1561 18449960 : v0 = coarse_coeffs(i + 1, j, k)
1562 18449960 : fi = fi + 1
1563 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1564 : (ww1(4)*v0 + ww1(1)*v1 + &
1565 18449960 : ww1(2)*v2 + ww1(3)*v3)
1566 18449960 : fi = fi + 1
1567 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1568 : (ww0(1)*v2 + ww0(2)*v3 + &
1569 18449960 : ww0(3)*v0)
1570 18449960 : IF (rest_b > 2) THEN
1571 73160 : v1 = coarse_coeffs(i + 2, j, k)
1572 73160 : fi = fi + 1
1573 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1574 : (ww1(3)*v0 + ww1(4)*v1 + &
1575 73160 : ww1(1)*v2 + ww1(2)*v3)
1576 73160 : fi = fi + 1
1577 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1578 : (ww0(1)*v3 + ww0(2)*v0 + &
1579 73160 : ww0(3)*v1)
1580 73160 : IF (pbc .AND. .NOT. is_split) THEN
1581 0 : v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
1582 0 : fi = fi + 1
1583 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1584 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1585 0 : fi = fi + 1
1586 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1587 0 : ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1588 0 : v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1589 0 : fi = fi + 1
1590 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1591 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1592 73160 : ELSE IF (has_i_ubound) THEN
1593 0 : v2 = coarse_coeffs(i + 3, j, k)
1594 0 : fi = fi + 1
1595 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1596 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1597 0 : fi = fi + 1
1598 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1599 0 : ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1600 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1601 0 : v3 = coarse_coeffs(i + 4, j, k)
1602 0 : fi = fi + 1
1603 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1604 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1605 : END IF
1606 : ELSE
1607 73160 : fi = fi + 1
1608 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1609 : wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
1610 73160 : w_border1(1)*v1)
1611 73160 : fi = fi + 1
1612 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1613 73160 : w_border0*wj*v1
1614 : END IF
1615 18376800 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1616 0 : v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
1617 0 : fi = fi + 1
1618 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1619 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1620 0 : fi = fi + 1
1621 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1622 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1623 0 : v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1624 0 : fi = fi + 1
1625 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1626 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1627 18376800 : ELSE IF (has_i_ubound) THEN
1628 0 : v1 = coarse_coeffs(i + 2, j, k)
1629 0 : fi = fi + 1
1630 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1631 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1632 0 : fi = fi + 1
1633 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1634 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1635 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1636 0 : v2 = coarse_coeffs(i + 3, j, k)
1637 0 : fi = fi + 1
1638 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1639 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1640 : END IF
1641 : ELSE
1642 18376800 : fi = fi + 1
1643 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1644 : wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
1645 18376800 : w_border1(1)*v0)
1646 18376800 : fi = fi + 1
1647 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1648 18376800 : w_border0*wj*v0
1649 : END IF
1650 58760 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1651 0 : v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
1652 0 : fi = fi + 1
1653 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1654 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1655 0 : fi = fi + 1
1656 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1657 0 : ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1658 0 : v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1659 0 : fi = fi + 1
1660 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1661 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1662 58760 : ELSE IF (has_i_ubound) THEN
1663 58760 : v0 = coarse_coeffs(i + 1, j, k)
1664 58760 : fi = fi + 1
1665 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1666 58760 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1667 58760 : fi = fi + 1
1668 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1669 58760 : ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1670 58760 : IF (fi + 1 == fine_bo(2, 1)) THEN
1671 58760 : v1 = coarse_coeffs(i + 2, j, k)
1672 58760 : fi = fi + 1
1673 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1674 58760 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1675 : END IF
1676 : ELSE
1677 0 : fi = fi + 1
1678 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1679 : wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
1680 0 : w_border1(1)*v3)
1681 0 : fi = fi + 1
1682 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1683 0 : w_border0*wj*v3
1684 : END IF
1685 376884 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1686 0 : v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
1687 0 : fi = fi + 1
1688 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1689 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1690 0 : fi = fi + 1
1691 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1692 0 : ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1693 0 : v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1694 0 : fi = fi + 1
1695 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1696 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1697 376884 : ELSE IF (has_i_ubound) THEN
1698 0 : v3 = coarse_coeffs(i, j, k)
1699 0 : fi = fi + 1
1700 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1701 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1702 0 : fi = fi + 1
1703 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1704 0 : ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1705 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1706 0 : v0 = coarse_coeffs(i + 1, j, k)
1707 0 : fi = fi + 1
1708 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1709 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1710 : END IF
1711 : ELSE
1712 376884 : fi = fi + 1
1713 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1714 : wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
1715 376884 : w_border1(1)*v2)
1716 376884 : fi = fi + 1
1717 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1718 376884 : w_border0*wj*v2
1719 : END IF
1720 18885604 : CPASSERT(fi == fine_bo(2, 1))
1721 : END IF
1722 : ! CALL timestop(handle2)
1723 : END DO
1724 : END DO
1725 : END DO
1726 : END DO
1727 :
1728 2260 : IF (is_split) THEN
1729 24 : DEALLOCATE (coarse_coeffs)
1730 : END IF
1731 2260 : CALL timestop(handle)
1732 4520 : END SUBROUTINE add_coarse2fine
1733 :
1734 : ! **************************************************************************************************
1735 : !> \brief low level function that adds a coarse grid (without boundary)
1736 : !> to a fine grid.
1737 : !>
1738 : !> It will add to
1739 : !>
1740 : !> coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
1741 : !> coarse_bounds(1,2):coarse_bounds(2,2),
1742 : !> coarse_bounds(1,3):coarse_bounds(2,3))
1743 : !>
1744 : !> using
1745 : !>
1746 : !> fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
1747 : !> 2*coarse_bounds(1,2):2*coarse_bounds(2,2),
1748 : !> 2*coarse_bounds(1,3):2*coarse_bounds(2,3))
1749 : !>
1750 : !> composed with the weights obtained by the direct product of the
1751 : !> 1d coefficients weights:
1752 : !>
1753 : !> for i,j,k in -3..3
1754 : !> w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
1755 : !> weights_1d(abs(k)+1)
1756 : !> \param fine_values_pw 3d array where to add the values due to the
1757 : !> coarse coeffs
1758 : !> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
1759 : !> coefficients
1760 : !> \param weights_1d the weights of the 1d smearing
1761 : !> \param w_border0 the 1d weight at the border
1762 : !> \param w_border1 the 1d weights for a point one off the border
1763 : !> (w_border1(1) is the weight of the coefficent at the border)
1764 : !> \param pbc ...
1765 : !> \param safe_computation ...
1766 : !> \author fawzi
1767 : !> \note
1768 : !> see coarse2fine for some relevant notes
1769 : ! **************************************************************************************************
1770 1062 : SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
1771 : weights_1d, w_border0, w_border1, pbc, safe_computation)
1772 : TYPE(pw_r3d_rs_type), INTENT(IN) :: fine_values_pw, coarse_coeffs_pw
1773 : REAL(kind=dp), DIMENSION(4), INTENT(in) :: weights_1d
1774 : REAL(kind=dp), INTENT(in) :: w_border0
1775 : REAL(kind=dp), DIMENSION(3), INTENT(in) :: w_border1
1776 : LOGICAL, INTENT(in) :: pbc
1777 : LOGICAL, INTENT(in), OPTIONAL :: safe_computation
1778 :
1779 : CHARACTER(len=*), PARAMETER :: routineN = 'add_fine2coarse'
1780 :
1781 : INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
1782 : k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
1783 1062 : INTEGER, ALLOCATABLE, DIMENSION(:) :: pp_lb, pp_ub, rcv_offset, rcv_size, &
1784 1062 : real_rcv_size, send_offset, send_size, &
1785 1062 : sent_size
1786 : INTEGER, DIMENSION(2, 3) :: coarse_bo, coarse_gbo, fine_bo, &
1787 : fine_gbo, my_coarse_bo
1788 1062 : INTEGER, DIMENSION(:), POINTER :: pos_of_x
1789 : LOGICAL :: has_i_lbound, has_i_ubound, is_split, &
1790 : local_data, safe_calc
1791 : REAL(kind=dp) :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
1792 : wi, wj, wk
1793 1062 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: rcv_buf, send_buf
1794 : REAL(kind=dp), DIMENSION(3) :: w_0, ww0
1795 : REAL(kind=dp), DIMENSION(4) :: w_1, ww1
1796 1062 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: coarse_coeffs, fine_values
1797 :
1798 1062 : CALL timeset(routineN, handle)
1799 :
1800 1062 : safe_calc = .FALSE.
1801 1062 : IF (PRESENT(safe_computation)) safe_calc = safe_computation
1802 :
1803 10620 : my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1804 10620 : coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1805 10620 : fine_bo = fine_values_pw%pw_grid%bounds_local
1806 10620 : fine_gbo = fine_values_pw%pw_grid%bounds
1807 4248 : f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1808 3150 : is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1809 1062 : coarse_bo = my_coarse_bo
1810 1062 : IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1811 1062 : coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
1812 1062 : coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
1813 : ELSE
1814 0 : coarse_bo(1, 1) = coarse_gbo(2, 1)
1815 0 : coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1816 : END IF
1817 1062 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
1818 1062 : coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1819 1062 : coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1820 : END IF
1821 1062 : has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1822 1062 : has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1823 :
1824 1062 : IF (pbc) THEN
1825 0 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1826 0 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
1827 : ELSE
1828 4248 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1829 4248 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1830 : END IF
1831 1062 : CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
1832 1062 : local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
1833 1062 : IF (local_data) THEN
1834 : ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1835 : coarse_bo(1, 2):coarse_bo(2, 2), &
1836 120 : coarse_bo(1, 3):coarse_bo(2, 3)))
1837 31240 : coarse_coeffs = 0._dp
1838 : ELSE
1839 1038 : coarse_coeffs => coarse_coeffs_pw%array
1840 : END IF
1841 :
1842 1062 : fine_values => fine_values_pw%array
1843 4248 : w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
1844 5310 : w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
1845 :
1846 4248 : DO i = 1, 3
1847 4248 : s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1848 : END DO
1849 4248 : IF (ANY(s < 1)) RETURN
1850 :
1851 14092 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1852 105302 : DO ik = -3, 3
1853 91210 : IF (pbc) THEN
1854 0 : wk = weights_1d(ABS(ik) + 1)
1855 0 : fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1856 : ELSE
1857 91210 : fk = 2*k + ik + f_shift(3)
1858 91210 : IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1859 19116 : IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1860 10620 : IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1861 4248 : IF (ik /= 0) CYCLE
1862 2124 : wk = w_border0
1863 6372 : ELSE IF (fk == fine_bo(1, 3) + 1) THEN
1864 1062 : SELECT CASE (ik)
1865 : CASE (1)
1866 1062 : wk = w_border1(1)
1867 : CASE (-1)
1868 1062 : wk = w_border1(2)
1869 : CASE (-3)
1870 1062 : wk = w_border1(3)
1871 : CASE default
1872 0 : CPABORT("")
1873 3186 : CYCLE
1874 : END SELECT
1875 : ELSE
1876 1062 : SELECT CASE (ik)
1877 : CASE (3)
1878 1062 : wk = w_border1(3)
1879 : CASE (1)
1880 1062 : wk = w_border1(2)
1881 : CASE (-1)
1882 1062 : wk = w_border1(1)
1883 : CASE default
1884 0 : CPABORT("")
1885 3186 : CYCLE
1886 : END SELECT
1887 : END IF
1888 : ELSE
1889 72094 : wk = weights_1d(ABS(ik) + 1)
1890 : END IF
1891 : END IF
1892 1471974 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1893 11118042 : DO ij = -3, 3
1894 9648478 : IF (pbc) THEN
1895 : fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
1896 0 : 2*s(2))
1897 0 : wj = weights_1d(ABS(ij) + 1)*wk
1898 : ELSE
1899 9648478 : fj = 2*j + ij + f_shift(2)
1900 9648478 : IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1901 1450620 : IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1902 805900 : IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1903 322360 : IF (ij /= 0) CYCLE
1904 161180 : wj = w_border0*wk
1905 483540 : ELSE IF (fj == fine_bo(1, 2) + 1) THEN
1906 80590 : SELECT CASE (ij)
1907 : CASE (1)
1908 80590 : wj = w_border1(1)*wk
1909 : CASE (-1)
1910 80590 : wj = w_border1(2)*wk
1911 : CASE (-3)
1912 80590 : wj = w_border1(3)*wk
1913 : CASE default
1914 0 : CPABORT("")
1915 241770 : CYCLE
1916 : END SELECT
1917 : ELSE
1918 80590 : SELECT CASE (ij)
1919 : CASE (-1)
1920 80590 : wj = w_border1(1)*wk
1921 : CASE (1)
1922 80590 : wj = w_border1(2)*wk
1923 : CASE (3)
1924 80590 : wj = w_border1(3)*wk
1925 : CASE default
1926 0 : CPABORT("")
1927 241770 : CYCLE
1928 : END SELECT
1929 : END IF
1930 : ELSE
1931 8197858 : wj = weights_1d(ABS(ij) + 1)*wk
1932 : END IF
1933 : END IF
1934 :
1935 10220932 : IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
1936 1559844 : DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1937 10785788 : DO ii = -3, 3
1938 9225944 : IF (pbc .AND. .NOT. is_split) THEN
1939 0 : wi = weights_1d(ABS(ii) + 1)*wj
1940 0 : fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1941 : ELSE
1942 9225944 : fi = 2*i + ii + f_shift(1)
1943 9225944 : IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1944 7112560 : IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
1945 : ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
1946 2281160 : IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1947 912464 : IF (ii /= 0) CYCLE
1948 456232 : wi = w_border0*wj
1949 1368696 : ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1950 228116 : SELECT CASE (ii)
1951 : CASE (1)
1952 228116 : wi = w_border1(1)*wj
1953 : CASE (-1)
1954 228116 : wi = w_border1(2)*wj
1955 : CASE (-3)
1956 228116 : wi = w_border1(3)*wj
1957 : CASE default
1958 684348 : CYCLE
1959 : END SELECT
1960 : ELSE
1961 228116 : SELECT CASE (ii)
1962 : CASE (-1)
1963 228116 : wi = w_border1(1)*wj
1964 : CASE (1)
1965 228116 : wi = w_border1(2)*wj
1966 : CASE (3)
1967 228116 : wi = w_border1(3)*wj
1968 : CASE default
1969 684348 : CYCLE
1970 : END SELECT
1971 : END IF
1972 : ELSE
1973 4831400 : wi = weights_1d(ABS(ii) + 1)*wj
1974 : END IF
1975 : END IF
1976 : coarse_coeffs(i, j, k) = &
1977 : coarse_coeffs(i, j, k) + &
1978 10543936 : wi*fine_values(fi, fj, fk)
1979 : END DO
1980 : END DO
1981 : ELSE
1982 34402904 : ww0 = wj*w_0
1983 43003630 : ww1 = wj*w_1
1984 8600726 : IF (pbc .AND. .NOT. is_split) THEN
1985 0 : i = coarse_bo(1, 1) - 1
1986 0 : vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
1987 0 : vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
1988 0 : vv4 = fine_values(fine_bo(2, 1), fj, fk)
1989 0 : fi = fine_bo(1, 1)
1990 0 : vv5 = fine_values(fi, fj, fk)
1991 0 : fi = fi + 1
1992 0 : vv6 = fine_values(fi, fj, fk)
1993 0 : fi = fi + 1
1994 0 : vv7 = fine_values(fi, fj, fk)
1995 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
1996 0 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
1997 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
1998 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
1999 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2000 0 : + ww1(4)*vv6 + ww0(3)*vv7
2001 8600726 : ELSE IF (has_i_lbound) THEN
2002 47524 : i = coarse_bo(1, 1)
2003 47524 : fi = fine_bo(1, 1) - 1
2004 47524 : IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
2005 47524 : fi = fi + 1
2006 47524 : vv0 = fine_values(fi, fj, fk)
2007 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2008 47524 : vv0*ww0(3)
2009 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2010 47524 : vv0*ww0(2)
2011 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2012 47524 : vv0*ww0(1)
2013 : END IF
2014 : ELSE
2015 8553202 : i = coarse_bo(1, 1)
2016 8553202 : fi = 2*i + f_shift(1)
2017 8553202 : vv0 = fine_values(fi, fj, fk)
2018 8553202 : fi = fi + 1
2019 8553202 : vv1 = fine_values(fi, fj, fk)
2020 8553202 : fi = fi + 1
2021 8553202 : vv2 = fine_values(fi, fj, fk)
2022 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2023 8553202 : (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
2024 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2025 8553202 : wj*w_border1(2)*vv1 + ww0(2)*vv2
2026 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2027 8553202 : wj*w_border1(3)*vv1 + ww0(3)*vv2
2028 : END IF
2029 8600726 : DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
2030 37559568 : fi = fi + 1
2031 37559568 : vv0 = fine_values(fi, fj, fk)
2032 37559568 : fi = fi + 1
2033 37559568 : vv1 = fine_values(fi, fj, fk)
2034 37559568 : fi = fi + 1
2035 37559568 : vv2 = fine_values(fi, fj, fk)
2036 37559568 : fi = fi + 1
2037 37559568 : vv3 = fine_values(fi, fj, fk)
2038 37559568 : fi = fi + 1
2039 37559568 : vv4 = fine_values(fi, fj, fk)
2040 37559568 : fi = fi + 1
2041 37559568 : vv5 = fine_values(fi, fj, fk)
2042 37559568 : fi = fi + 1
2043 37559568 : vv6 = fine_values(fi, fj, fk)
2044 37559568 : fi = fi + 1
2045 37559568 : vv7 = fine_values(fi, fj, fk)
2046 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2047 37559568 : + ww1(1)*vv0
2048 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2049 37559568 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2050 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2051 37559568 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2052 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2053 37559568 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2054 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2055 37559568 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
2056 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2057 37559568 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
2058 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2059 37559568 : + ww1(4)*vv6 + ww0(3)*vv7
2060 : END DO
2061 8600726 : IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
2062 0 : CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
2063 : END IF
2064 8600726 : rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
2065 8600726 : i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
2066 8600726 : CPASSERT(fi == (i - 2)*2 + f_shift(1))
2067 8600726 : IF (rest_b > 0) THEN
2068 8540210 : fi = fi + 1
2069 8540210 : vv0 = fine_values(fi, fj, fk)
2070 8540210 : fi = fi + 1
2071 8540210 : vv1 = fine_values(fi, fj, fk)
2072 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2073 8540210 : + ww1(1)*vv0
2074 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2075 8540210 : + ww1(2)*vv0 + ww0(1)*vv1
2076 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2077 8540210 : + ww1(3)*vv0 + ww0(2)*vv1
2078 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2079 8540210 : + ww1(4)*vv0 + ww0(3)*vv1
2080 8540210 : IF (rest_b > 1) THEN
2081 8492686 : fi = fi + 1
2082 8492686 : vv2 = fine_values(fi, fj, fk)
2083 8492686 : fi = fi + 1
2084 8492686 : vv3 = fine_values(fi, fj, fk)
2085 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2086 8492686 : + ww1(1)*vv2
2087 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2088 8492686 : + ww1(2)*vv2 + ww0(1)*vv3
2089 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2090 8492686 : + ww1(3)*vv2 + ww0(2)*vv3
2091 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2092 8492686 : + ww1(4)*vv2 + ww0(3)*vv3
2093 8492686 : IF (rest_b > 2) THEN
2094 61924 : fi = fi + 1
2095 61924 : vv4 = fine_values(fi, fj, fk)
2096 61924 : fi = fi + 1
2097 61924 : vv5 = fine_values(fi, fj, fk)
2098 61924 : fi = fi + 1
2099 61924 : vv6 = fine_values(fi, fj, fk)
2100 61924 : fi = fi + 1
2101 61924 : vv7 = fine_values(fi, fj, fk)
2102 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2103 61924 : + ww1(1)*vv4
2104 61924 : IF (has_i_ubound) THEN
2105 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2106 0 : fi = fi + 1
2107 0 : vv0 = fine_values(fi, fj, fk)
2108 : coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
2109 0 : + vv0*ww1(4)
2110 : ELSE
2111 : vv0 = 0._dp
2112 : END IF
2113 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2114 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2115 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2116 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2117 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2118 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
2119 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2120 0 : + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
2121 61924 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2122 0 : fi = fi + 1
2123 0 : vv0 = fine_values(fi, fj, fk)
2124 0 : vv1 = fine_values(fine_bo(1, 1), fj, fk)
2125 0 : vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2126 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2127 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2128 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2129 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2130 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2131 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
2132 0 : + vv1*ww0(1) + vv2*ww1(1)
2133 : ELSE
2134 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2135 61924 : + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
2136 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2137 61924 : + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
2138 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2139 61924 : + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
2140 : END IF
2141 : ELSE
2142 8430762 : fi = fi + 1
2143 8430762 : vv4 = fine_values(fi, fj, fk)
2144 8430762 : fi = fi + 1
2145 8430762 : vv5 = fine_values(fi, fj, fk)
2146 8430762 : IF (has_i_ubound) THEN
2147 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2148 0 : fi = fi + 1
2149 0 : vv6 = fine_values(fi, fj, fk)
2150 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2151 0 : + ww1(4)*vv6
2152 : ELSE
2153 : vv6 = 0._dp
2154 : END IF
2155 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2156 0 : + ww1(1)*vv4
2157 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2158 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2159 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2160 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
2161 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2162 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
2163 8430762 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2164 0 : fi = fi + 1
2165 0 : vv6 = fine_values(fi, fj, fk)
2166 0 : vv7 = fine_values(fine_bo(1, 1), fj, fk)
2167 0 : vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2168 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2169 0 : + ww1(1)*vv4
2170 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2171 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
2172 0 : ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2173 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2174 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
2175 0 : + ww0(1)*vv7 + ww1(1)*vv0
2176 : ELSE
2177 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2178 8430762 : + wj*w_border1(3)*vv4
2179 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2180 8430762 : + wj*w_border1(2)*vv4
2181 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2182 8430762 : + wj*(w_border1(1)*vv4 + w_border0*vv5)
2183 : END IF
2184 : END IF
2185 : ELSE
2186 47524 : fi = fi + 1
2187 47524 : vv2 = fine_values(fi, fj, fk)
2188 47524 : fi = fi + 1
2189 47524 : vv3 = fine_values(fi, fj, fk)
2190 47524 : IF (has_i_ubound) THEN
2191 47524 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2192 47524 : fi = fi + 1
2193 47524 : vv4 = fine_values(fi, fj, fk)
2194 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2195 47524 : + ww1(4)*vv4
2196 : ELSE
2197 : vv4 = 0._dp
2198 : END IF
2199 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2200 47524 : + ww1(1)*vv2
2201 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2202 47524 : + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2203 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2204 47524 : + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
2205 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2206 47524 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
2207 0 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2208 0 : fi = fi + 1
2209 0 : vv4 = fine_values(fi, fj, fk)
2210 0 : vv5 = fine_values(fine_bo(1, 1), fj, fk)
2211 0 : vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2212 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2213 0 : + ww1(1)*vv2
2214 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2215 0 : + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2216 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2217 0 : + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
2218 : ELSE
2219 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2220 0 : + wj*w_border1(3)*vv2
2221 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2222 0 : + wj*w_border1(2)*vv2
2223 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2224 0 : + wj*(w_border1(1)*vv2 + w_border0*vv3)
2225 : END IF
2226 : END IF
2227 : ELSE
2228 60516 : fi = fi + 1
2229 60516 : vv0 = fine_values(fi, fj, fk)
2230 60516 : fi = fi + 1
2231 60516 : vv1 = fine_values(fi, fj, fk)
2232 60516 : IF (has_i_ubound) THEN
2233 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2234 0 : fi = fi + 1
2235 0 : vv2 = fine_values(fi, fj, fk)
2236 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2237 0 : + ww1(4)*vv2
2238 : ELSE
2239 : vv2 = 0._dp
2240 : END IF
2241 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2242 0 : + ww1(1)*vv0
2243 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2244 0 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2245 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2246 0 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
2247 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2248 0 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
2249 60516 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2250 0 : fi = fi + 1
2251 0 : vv2 = fine_values(fi, fj, fk)
2252 0 : vv3 = fine_values(fine_bo(1, 1), fk, fk)
2253 0 : vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
2254 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2255 0 : + ww1(1)*vv0
2256 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2257 0 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2258 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2259 0 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2260 : ELSE
2261 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2262 60516 : + wj*w_border1(3)*vv0
2263 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2264 60516 : + wj*w_border1(2)*vv0
2265 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2266 60516 : + wj*(w_border1(1)*vv0 + w_border0*vv1)
2267 : END IF
2268 : END IF
2269 8600726 : CPASSERT(fi == fine_bo(2, 1))
2270 : END IF
2271 : END DO
2272 : END DO
2273 : END DO
2274 : END DO
2275 :
2276 : ! *** parallel case
2277 1062 : IF (is_split) THEN
2278 24 : CALL timeset(routineN//"_comm", handle2)
2279 : coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
2280 24 : (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
2281 24 : n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
2282 : ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
2283 : sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
2284 : rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
2285 240 : pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
2286 :
2287 : ! ** send size count
2288 :
2289 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2290 72 : send_size = 0
2291 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2292 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2293 184 : send_size(p) = send_size(p) + coarse_slice_size
2294 : END DO
2295 :
2296 : ! ** rcv size count
2297 :
2298 24 : pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
2299 24 : p_old = pos_of_x(fine_gbo(1, 1))
2300 72 : pp_lb = fine_gbo(2, 1)
2301 72 : pp_ub = fine_gbo(2, 1) - 1
2302 24 : pp_lb(p_old) = fine_gbo(1, 1)
2303 496 : DO x = fine_gbo(1, 1), fine_gbo(2, 1)
2304 472 : p = pos_of_x(x)
2305 496 : IF (p /= p_old) THEN
2306 24 : pp_ub(p_old) = x - 1
2307 24 : pp_lb(p) = x
2308 24 : p_old = p
2309 : END IF
2310 : END DO
2311 24 : pp_ub(p_old) = fine_gbo(2, 1)
2312 :
2313 72 : DO ip = 0, n_procs - 1
2314 48 : IF (pp_lb(ip) <= pp_ub(ip)) THEN
2315 48 : pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
2316 48 : pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
2317 : ELSE
2318 0 : pp_lb(ip) = coarse_gbo(2, 1)
2319 0 : pp_ub(ip) = coarse_gbo(2, 1) - 1
2320 : END IF
2321 72 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
2322 48 : pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
2323 48 : pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
2324 : END IF
2325 : END DO
2326 :
2327 72 : rcv_size = 0
2328 72 : DO ip = 0, n_procs - 1
2329 48 : DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2330 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2331 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2332 0 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2333 : END IF
2334 : END DO
2335 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
2336 : MAX(0, &
2337 48 : MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
2338 72 : DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2339 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2340 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2341 0 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2342 : END IF
2343 : END DO
2344 : END DO
2345 :
2346 : ! ** offsets & alloc send-rcv
2347 :
2348 : send_tot_size = 0
2349 72 : DO ip = 0, n_procs - 1
2350 48 : send_offset(ip) = send_tot_size
2351 72 : send_tot_size = send_tot_size + send_size(ip)
2352 : END DO
2353 24 : IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
2354 0 : CPABORT("Error calculating send_tot_size")
2355 72 : ALLOCATE (send_buf(0:send_tot_size - 1))
2356 :
2357 24 : rcv_tot_size = 0
2358 72 : DO ip = 0, n_procs - 1
2359 48 : rcv_offset(ip) = rcv_tot_size
2360 72 : rcv_tot_size = rcv_tot_size + rcv_size(ip)
2361 : END DO
2362 72 : ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
2363 :
2364 : ! ** fill send buffer
2365 :
2366 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2367 : p_old = pos_of_x(coarse_gbo(1, 1) &
2368 24 : + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
2369 72 : sent_size(:) = send_offset
2370 24 : ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
2371 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2372 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2373 : CALL dcopy(coarse_slice_size, &
2374 : coarse_coeffs(x, coarse_bo(1, 2), &
2375 160 : coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
2376 184 : sent_size(p) = sent_size(p) + coarse_slice_size
2377 : END DO
2378 :
2379 48 : IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
2380 0 : CPABORT("error 1 filling send buffer")
2381 24 : IF (sent_size(n_procs - 1) /= send_tot_size) &
2382 0 : CPABORT("error 2 filling send buffer")
2383 :
2384 : IF (local_data) THEN
2385 24 : DEALLOCATE (coarse_coeffs)
2386 : ELSE
2387 : NULLIFY (coarse_coeffs)
2388 : END IF
2389 :
2390 48 : CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
2391 24 : CPASSERT(sent_size(n_procs - 1) == send_tot_size)
2392 : ! test send/rcv sizes
2393 24 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
2394 :
2395 72 : CPASSERT(ALL(real_rcv_size == rcv_size))
2396 : ! all2all
2397 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
2398 24 : rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
2399 :
2400 : ! ** sum & reorder rcv buffer
2401 :
2402 72 : sent_size(:) = rcv_offset
2403 72 : DO ip = 0, n_procs - 1
2404 :
2405 48 : DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2406 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2407 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2408 0 : ii = sent_size(ip)
2409 0 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2410 0 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2411 0 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2412 0 : ii = ii + 1
2413 : END DO
2414 : END DO
2415 0 : sent_size(ip) = ii
2416 : END IF
2417 : END DO
2418 :
2419 48 : ii = sent_size(ip)
2420 208 : DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
2421 2160 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2422 29920 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2423 27808 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2424 29760 : ii = ii + 1
2425 : END DO
2426 : END DO
2427 : END DO
2428 48 : sent_size(ip) = ii
2429 :
2430 72 : DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2431 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2432 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2433 0 : ii = sent_size(ip)
2434 0 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2435 0 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2436 0 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2437 0 : ii = ii + 1
2438 : END DO
2439 : END DO
2440 0 : sent_size(ip) = ii
2441 : END IF
2442 : END DO
2443 :
2444 : END DO
2445 :
2446 48 : IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
2447 0 : CPABORT("error 1 handling the rcv buffer")
2448 24 : IF (sent_size(n_procs - 1) /= rcv_tot_size) &
2449 0 : CPABORT("error 2 handling the rcv buffer")
2450 :
2451 : ! dealloc
2452 24 : DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
2453 24 : DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
2454 24 : DEALLOCATE (pp_ub, pp_lb)
2455 48 : CALL timestop(handle2)
2456 : ELSE
2457 : CPASSERT(.NOT. local_data)
2458 : END IF
2459 :
2460 1062 : CALL timestop(handle)
2461 2124 : END SUBROUTINE add_fine2coarse
2462 :
2463 : ! **************************************************************************************************
2464 : !> \brief ...
2465 : !> \param preconditioner the preconditioner to create
2466 : !> \param precond_kind the kind of preconditioner to use
2467 : !> \param pool a pool with grids of the same type as the elements to
2468 : !> precondition
2469 : !> \param pbc if periodic boundary conditions should be applied
2470 : !> \param transpose ...
2471 : !> \author fawzi
2472 : ! **************************************************************************************************
2473 33840 : SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
2474 : pool, pbc, transpose)
2475 : TYPE(pw_spline_precond_type), INTENT(OUT) :: preconditioner
2476 : INTEGER, INTENT(in) :: precond_kind
2477 : TYPE(pw_pool_type), INTENT(IN), POINTER :: pool
2478 : LOGICAL, INTENT(in) :: pbc, transpose
2479 :
2480 : preconditioner%kind = no_precond
2481 3760 : preconditioner%pool => pool
2482 3760 : preconditioner%pbc = pbc
2483 3760 : preconditioner%transpose = transpose
2484 3760 : CALL pool%retain()
2485 3760 : CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
2486 3760 : END SUBROUTINE pw_spline_precond_create
2487 :
2488 : ! **************************************************************************************************
2489 : !> \brief switches the types of precoditioner to use
2490 : !> \param preconditioner the preconditioner to be changed
2491 : !> \param precond_kind the new kind of preconditioner to use
2492 : !> \param pbc ...
2493 : !> \param transpose ...
2494 : !> \author fawzi
2495 : ! **************************************************************************************************
2496 7520 : SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
2497 : transpose)
2498 : TYPE(pw_spline_precond_type), INTENT(INOUT) :: preconditioner
2499 : INTEGER, INTENT(in) :: precond_kind
2500 : LOGICAL, INTENT(in), OPTIONAL :: pbc, transpose
2501 :
2502 : LOGICAL :: do_3d_coeff
2503 : REAL(kind=dp) :: s
2504 :
2505 7520 : IF (PRESENT(transpose)) preconditioner%transpose = transpose
2506 7520 : do_3d_coeff = .FALSE.
2507 7520 : preconditioner%kind = precond_kind
2508 7520 : IF (PRESENT(pbc)) preconditioner%pbc = pbc
2509 : SELECT CASE (precond_kind)
2510 : CASE (no_precond)
2511 : CASE (precond_spl3_aint2)
2512 0 : preconditioner%coeffs_1d = (/-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp/)
2513 0 : preconditioner%sharpen = .FALSE.
2514 0 : preconditioner%normalize = .FALSE.
2515 3760 : do_3d_coeff = .TRUE.
2516 : CASE (precond_spl3_3)
2517 3760 : preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
2518 3760 : preconditioner%coeffs_1d(2) = 1.6_dp
2519 3760 : preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
2520 3760 : preconditioner%sharpen = .FALSE.
2521 3760 : preconditioner%normalize = .FALSE.
2522 0 : do_3d_coeff = .TRUE.
2523 : CASE (precond_spl3_2)
2524 0 : preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
2525 0 : preconditioner%coeffs_1d(2) = 1.76_dp
2526 0 : preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
2527 0 : preconditioner%sharpen = .FALSE.
2528 0 : preconditioner%normalize = .FALSE.
2529 : do_3d_coeff = .TRUE.
2530 : CASE (precond_spl3_aint)
2531 15040 : preconditioner%coeffs_1d = spl3_1d_coeffs0
2532 3760 : preconditioner%sharpen = .TRUE.
2533 3760 : preconditioner%normalize = .TRUE.
2534 0 : do_3d_coeff = .TRUE.
2535 : CASE (precond_spl3_1)
2536 0 : preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
2537 0 : preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
2538 0 : preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
2539 0 : preconditioner%sharpen = .TRUE.
2540 0 : preconditioner%normalize = .FALSE.
2541 0 : do_3d_coeff = .TRUE.
2542 : CASE default
2543 7520 : CPABORT("")
2544 : END SELECT
2545 : IF (do_3d_coeff) THEN
2546 7520 : s = 1._dp
2547 7520 : IF (preconditioner%sharpen) s = -1._dp
2548 : preconditioner%coeffs(1) = &
2549 : s*preconditioner%coeffs_1d(2)* &
2550 : preconditioner%coeffs_1d(2)* &
2551 7520 : preconditioner%coeffs_1d(2)
2552 : preconditioner%coeffs(2) = &
2553 : s*preconditioner%coeffs_1d(1)* &
2554 : preconditioner%coeffs_1d(2)* &
2555 7520 : preconditioner%coeffs_1d(2)
2556 : preconditioner%coeffs(3) = &
2557 : s*preconditioner%coeffs_1d(1)* &
2558 : preconditioner%coeffs_1d(1)* &
2559 7520 : preconditioner%coeffs_1d(2)
2560 : preconditioner%coeffs(4) = &
2561 : s*preconditioner%coeffs_1d(1)* &
2562 : preconditioner%coeffs_1d(1)* &
2563 7520 : preconditioner%coeffs_1d(1)
2564 7520 : IF (preconditioner%sharpen) THEN
2565 3760 : IF (preconditioner%normalize) THEN
2566 : preconditioner%coeffs(1) = 2._dp + &
2567 3760 : preconditioner%coeffs(1)
2568 : ELSE
2569 0 : preconditioner%coeffs(1) = -preconditioner%coeffs(1)
2570 : END IF
2571 : END IF
2572 : END IF
2573 7520 : END SUBROUTINE pw_spline_precond_set_kind
2574 :
2575 : ! **************************************************************************************************
2576 : !> \brief releases the preconditioner
2577 : !> \param preconditioner the preconditioner to release
2578 : !> \author fawzi
2579 : ! **************************************************************************************************
2580 3760 : SUBROUTINE pw_spline_precond_release(preconditioner)
2581 : TYPE(pw_spline_precond_type), INTENT(INOUT) :: preconditioner
2582 :
2583 3760 : CALL pw_pool_release(preconditioner%pool)
2584 3760 : END SUBROUTINE pw_spline_precond_release
2585 :
2586 : ! **************************************************************************************************
2587 : !> \brief applies the preconditioner to the system of equations to find the
2588 : !> coefficients of the spline
2589 : !> \param preconditioner the preconditioner to apply
2590 : !> \param in_v the grid on which the preconditioner should be applied
2591 : !> \param out_v place to store the preconditioner applied on v_out
2592 : !> \author fawzi
2593 : ! **************************************************************************************************
2594 76107 : SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
2595 : TYPE(pw_spline_precond_type), INTENT(IN) :: preconditioner
2596 : TYPE(pw_r3d_rs_type), INTENT(IN) :: in_v
2597 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: out_v
2598 :
2599 76107 : SELECT CASE (preconditioner%kind)
2600 : CASE (no_precond)
2601 0 : CALL pw_copy(in_v, out_v)
2602 : CASE (precond_spl3_aint, precond_spl3_1)
2603 3760 : CALL pw_zero(out_v)
2604 3760 : IF (preconditioner%pbc) THEN
2605 : CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2606 438 : coeffs=preconditioner%coeffs)
2607 : ELSE
2608 : CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2609 : pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2610 : normalize=preconditioner%normalize, &
2611 3322 : transpose=preconditioner%transpose)
2612 : END IF
2613 : CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
2614 72347 : CALL pw_zero(out_v)
2615 72347 : IF (preconditioner%pbc) THEN
2616 : CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2617 6322 : coeffs=preconditioner%coeffs)
2618 : ELSE
2619 : CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2620 : pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2621 : normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
2622 66025 : transpose=preconditioner%transpose)
2623 : END IF
2624 : CASE default
2625 76107 : CPABORT("")
2626 : END SELECT
2627 76107 : END SUBROUTINE pw_spline_do_precond
2628 :
2629 : ! **************************************************************************************************
2630 : !> \brief solves iteratively (CG) a systmes of linear equations
2631 : !> linOp(coeffs)=values
2632 : !> (for example those needed to find the coefficients of a spline)
2633 : !> Returns true if the it succeeded to achieve the requested accuracy
2634 : !> \param values the right hand side of the system
2635 : !> \param coeffs will contain the solution of the system (and on entry
2636 : !> it contains the starting point)
2637 : !> \param linOp the linear operator to be inverted
2638 : !> \param preconditioner the preconditioner to apply
2639 : !> \param pool a pool of grids (for the temporary objects)
2640 : !> \param eps_r the requested precision on the residual
2641 : !> \param eps_x the requested precision on the solution
2642 : !> \param max_iter maximum number of iteration allowed
2643 : !> \param sumtype ...
2644 : !> \return ...
2645 : !> \author fawzi
2646 : ! **************************************************************************************************
2647 3760 : FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
2648 : eps_r, eps_x, max_iter, sumtype) RESULT(res)
2649 :
2650 : TYPE(pw_r3d_rs_type), INTENT(IN) :: values
2651 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: coeffs
2652 : INTERFACE
2653 : SUBROUTINE linOp(pw_in, pw_out)
2654 : USE pw_types, ONLY: pw_r3d_rs_type
2655 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
2656 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
2657 : END SUBROUTINE linOp
2658 : END INTERFACE
2659 : TYPE(pw_spline_precond_type), INTENT(IN) :: preconditioner
2660 : TYPE(pw_pool_type), POINTER :: pool
2661 : REAL(kind=dp), INTENT(in) :: eps_r, eps_x
2662 : INTEGER, INTENT(in) :: max_iter
2663 : INTEGER, INTENT(in), OPTIONAL :: sumtype
2664 : LOGICAL :: res
2665 :
2666 : INTEGER :: i, iiter, iter, j, k
2667 : INTEGER, DIMENSION(2, 3) :: bo
2668 : LOGICAL :: last
2669 : REAL(kind=dp) :: alpha, beta, eps_r_att, eps_x_att, r_z, &
2670 : r_z_new
2671 : TYPE(cp_logger_type), POINTER :: logger
2672 : TYPE(pw_r3d_rs_type) :: Ap, p, r, z
2673 :
2674 3760 : last = .FALSE.
2675 :
2676 3760 : res = .FALSE.
2677 3760 : logger => cp_get_default_logger()
2678 3760 : CALL pool%create_pw(r)
2679 3760 : CALL pool%create_pw(z)
2680 3760 : CALL pool%create_pw(p)
2681 3760 : CALL pool%create_pw(Ap)
2682 :
2683 : !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2684 8278 : ext_do: DO iiter = 1, max_iter, 10
2685 8278 : CALL pw_zero(r)
2686 8278 : CALL linOp(pw_in=coeffs, pw_out=r)
2687 61814924 : r%array = -r%array
2688 8278 : CALL pw_axpy(values, r)
2689 8278 : CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2690 8278 : CALL pw_copy(z, p)
2691 8278 : r_z = pw_integral_ab(r, z, sumtype)
2692 :
2693 72347 : DO iter = iiter, MIN(iiter + 9, max_iter)
2694 67829 : eps_r_att = SQRT(pw_integral_ab(r, r, sumtype))
2695 67829 : IF (eps_r_att == 0._dp) THEN
2696 67829 : eps_x_att = 0._dp
2697 : last = .TRUE.
2698 : ELSE
2699 67823 : CALL pw_zero(Ap)
2700 67823 : CALL linOp(pw_in=p, pw_out=Ap)
2701 67823 : alpha = r_z/pw_integral_ab(Ap, p, sumtype)
2702 :
2703 67823 : CALL pw_axpy(p, coeffs, alpha=alpha)
2704 :
2705 67823 : eps_x_att = alpha*SQRT(pw_integral_ab(p, p, sumtype)) ! try to spare if unneeded?
2706 67823 : IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
2707 : END IF
2708 : !CALL cp_iterate(logger%iter_info,last=last)
2709 64069 : IF (last) THEN
2710 : res = .TRUE.
2711 : EXIT ext_do
2712 : END IF
2713 :
2714 64069 : CALL pw_axpy(Ap, r, alpha=-alpha)
2715 :
2716 64069 : CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2717 :
2718 64069 : r_z_new = pw_integral_ab(r, z, sumtype)
2719 64069 : beta = r_z_new/r_z
2720 64069 : r_z = r_z_new
2721 :
2722 640690 : bo = p%pw_grid%bounds_local
2723 972636 : DO k = bo(1, 3), bo(2, 3)
2724 19594431 : DO j = bo(1, 2), bo(2, 2)
2725 417436721 : DO i = bo(1, 1), bo(2, 1)
2726 416532672 : p%array(i, j, k) = z%array(i, j, k) + beta*p%array(i, j, k)
2727 : END DO
2728 : END DO
2729 : END DO
2730 :
2731 : END DO
2732 : END DO ext_do
2733 : !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2734 :
2735 3760 : CALL pool%give_back_pw(r)
2736 3760 : CALL pool%give_back_pw(z)
2737 3760 : CALL pool%give_back_pw(p)
2738 3760 : CALL pool%give_back_pw(Ap)
2739 :
2740 3760 : END FUNCTION find_coeffs
2741 :
2742 : ! **************************************************************************************************
2743 : !> \brief adds to pw_out pw_in composed with the weights
2744 : !> pw_out%array(i,j,k)=pw_out%array(i,j,k)+sum(pw_in%array(i+l,j+m,k+n)*
2745 : !> weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
2746 : !> l=-1..1,m=-1..1,n=-1..1)
2747 : !> \param weights_1d ...
2748 : !> \param pw_in ...
2749 : !> \param pw_out ...
2750 : !> \param sharpen ...
2751 : !> \param normalize ...
2752 : !> \param transpose ...
2753 : !> \param smooth_boundary ...
2754 : !> \author fawzi
2755 : ! **************************************************************************************************
2756 138688 : SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
2757 : sharpen, normalize, transpose, smooth_boundary)
2758 : REAL(kind=dp), DIMENSION(-1:1) :: weights_1d
2759 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
2760 : LOGICAL, INTENT(in), OPTIONAL :: sharpen, normalize, transpose, &
2761 : smooth_boundary
2762 :
2763 : INTEGER :: first_index, i, j, jw, k, kw, &
2764 : last_index, myj, myk, n_els
2765 : INTEGER, DIMENSION(2, 3) :: bo, gbo
2766 : INTEGER, DIMENSION(3) :: s
2767 : LOGICAL :: has_l_boundary, has_u_boundary, &
2768 : is_split, my_normalize, my_sharpen, &
2769 : my_smooth_boundary, my_transpose
2770 : REAL(kind=dp) :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
2771 : REAL(kind=dp), DIMENSION(-1:1) :: w
2772 138688 : REAL(kind=dp), DIMENSION(:, :), POINTER :: l_boundary, tmp, u_boundary
2773 138688 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: in_val, out_val
2774 :
2775 1386880 : bo = pw_in%pw_grid%bounds_local
2776 1386880 : gbo = pw_in%pw_grid%bounds
2777 138688 : in_val => pw_in%array
2778 138688 : out_val => pw_out%array
2779 138688 : my_sharpen = .FALSE.
2780 138688 : IF (PRESENT(sharpen)) my_sharpen = sharpen
2781 138688 : my_normalize = .FALSE.
2782 138688 : IF (PRESENT(normalize)) my_normalize = normalize
2783 138688 : my_transpose = .FALSE.
2784 138688 : IF (PRESENT(transpose)) my_transpose = transpose
2785 138688 : my_smooth_boundary = .FALSE.
2786 138688 : IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
2787 138688 : CPASSERT(.NOT. my_normalize .OR. my_sharpen)
2788 138688 : CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
2789 554752 : DO i = 1, 3
2790 554752 : s(i) = bo(2, i) - bo(1, i) + 1
2791 : END DO
2792 554752 : IF (ANY(s < 1)) RETURN
2793 : is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
2794 412716 : pw_in%pw_grid%bounds(:, 1))
2795 138688 : has_l_boundary = (gbo(1, 1) == bo(1, 1))
2796 138688 : has_u_boundary = (gbo(2, 1) == bo(2, 1))
2797 138688 : IF (is_split) THEN
2798 : ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2799 : u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2800 17856 : tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
2801 311256 : tmp(:, :) = pw_in%array(bo(2, 1), :, :)
2802 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2803 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2804 : l_boundary, pw_in%pw_grid%para%pos_of_x( &
2805 620280 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
2806 311256 : tmp(:, :) = pw_in%array(bo(1, 1), :, :)
2807 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2808 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2809 : u_boundary, pw_in%pw_grid%para%pos_of_x( &
2810 620280 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
2811 2232 : DEALLOCATE (tmp)
2812 : END IF
2813 :
2814 138688 : n_els = s(1)
2815 138688 : IF (has_l_boundary) THEN
2816 137572 : n_els = n_els - 1
2817 137572 : first_index = bo(1, 1) + 1
2818 : ELSE
2819 1116 : first_index = bo(1, 1)
2820 : END IF
2821 138688 : IF (has_u_boundary) THEN
2822 137572 : n_els = n_els - 1
2823 137572 : last_index = bo(2, 1) - 1
2824 : ELSE
2825 1116 : last_index = bo(2, 1)
2826 : END IF
2827 : !$OMP PARALLEL DO DEFAULT(NONE) &
2828 : !$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
2829 : !$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
2830 : !$OMP my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
2831 138688 : !$OMP my_sharpen, last_index, first_index, my_normalize, n_els)
2832 : DO k = bo(1, 3), bo(2, 3)
2833 : DO kw = -1, 1
2834 : myk = k + kw
2835 : IF (my_transpose) THEN
2836 : IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2837 : IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2838 : IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
2839 : w_k = weights_1d(kw)
2840 : IF (my_smooth_boundary) THEN
2841 : w_k = weights_1d(kw)/weights_1d(0)
2842 : END IF
2843 : ELSE IF (kw == 0) THEN
2844 : w_k = 1._dp
2845 : ELSE
2846 : CYCLE
2847 : END IF
2848 : ELSE
2849 : IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
2850 : w_k = weights_1d(kw)
2851 : END IF
2852 : ELSE
2853 : w_k = weights_1d(kw)
2854 : END IF
2855 : ELSE
2856 : IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2857 : IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2858 : IF (kw /= 0) CYCLE
2859 : w_k = 1._dp
2860 : ELSE
2861 : IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
2862 : (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
2863 : w_k = weights_1d(kw)/weights_1d(0)
2864 : ELSE
2865 : w_k = weights_1d(kw)
2866 : END IF
2867 : END IF
2868 : ELSE
2869 : w_k = weights_1d(kw)
2870 : END IF
2871 : END IF
2872 : DO j = bo(1, 2), bo(2, 2)
2873 : DO jw = -1, 1
2874 : myj = j + jw
2875 : IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
2876 : w_j = w_k*weights_1d(jw)
2877 : ELSE
2878 : IF (my_transpose) THEN
2879 : IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2880 : IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
2881 : w_j = weights_1d(jw)*w_k
2882 : IF (my_smooth_boundary) THEN
2883 : w_j = weights_1d(jw)/weights_1d(0)*w_k
2884 : END IF
2885 : ELSE IF (jw == 0) THEN
2886 : w_j = w_k
2887 : ELSE
2888 : CYCLE
2889 : END IF
2890 : ELSE
2891 : IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
2892 : w_j = w_k*weights_1d(jw)
2893 : END IF
2894 : ELSE
2895 : IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2896 : IF (jw /= 0) CYCLE
2897 : w_j = w_k
2898 : ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
2899 : (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
2900 : w_j = w_k*weights_1d(jw)/weights_1d(0)
2901 : ELSE
2902 : w_j = w_k*weights_1d(jw)
2903 : END IF
2904 : END IF
2905 : END IF
2906 :
2907 : IF (has_l_boundary) THEN
2908 : IF (my_transpose) THEN
2909 : IF (s(1) == 1) THEN
2910 : CPASSERT(.NOT. has_u_boundary)
2911 : in_val_tmp = u_boundary(myj, myk)
2912 : ELSE
2913 : in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
2914 : END IF
2915 : IF (my_sharpen) THEN
2916 : IF (kw == 0 .AND. jw == 0) THEN
2917 : IF (my_normalize) THEN
2918 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2919 : (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
2920 : in_val_tmp*weights_1d(1)*w_j
2921 : ELSE
2922 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2923 : in_val(bo(1, 1), myj, myk)*w_j - &
2924 : in_val_tmp*weights_1d(1)*w_j
2925 : END IF
2926 : ELSE
2927 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
2928 : in_val(bo(1, 1), myj, myk)*w_j - &
2929 : in_val_tmp*weights_1d(1)*w_j
2930 : END IF
2931 : ELSE IF (my_smooth_boundary) THEN
2932 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2933 : w_j*(in_val(bo(1, 1), myj, myk) + &
2934 : in_val_tmp*weights_1d(1)/weights_1d(0))
2935 : ELSE
2936 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2937 : w_j*(in_val(bo(1, 1), myj, myk) + &
2938 : in_val_tmp*weights_1d(1))
2939 : END IF
2940 : in_val_f = 0.0_dp
2941 : ELSE
2942 : in_val_f = in_val(bo(1, 1), myj, myk)
2943 : IF (my_sharpen) THEN
2944 : IF (kw == 0 .AND. jw == 0) THEN
2945 : IF (my_normalize) THEN
2946 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2947 : (2.0_dp - w_j)*in_val_f
2948 : ELSE
2949 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2950 : in_val_f*w_j
2951 : END IF
2952 : ELSE
2953 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
2954 : in_val_f*w_j
2955 : END IF
2956 : ELSE
2957 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2958 : in_val_f*w_j
2959 : END IF
2960 : END IF
2961 : ELSE
2962 : in_val_f = l_boundary(myj, myk)
2963 : END IF
2964 : IF (has_u_boundary) THEN
2965 : IF (my_transpose) THEN
2966 : in_val_l = in_val(bo(2, 1), myj, myk)
2967 : IF (s(1) == 1) THEN
2968 : CPASSERT(.NOT. has_l_boundary)
2969 : in_val_tmp = l_boundary(myj, myk)
2970 : ELSE
2971 : in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
2972 : END IF
2973 : IF (my_sharpen) THEN
2974 : IF (kw == 0 .AND. jw == 0) THEN
2975 : IF (my_normalize) THEN
2976 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2977 : in_val_l*(2._dp - w_j) - &
2978 : in_val_tmp*weights_1d(1)*w_j
2979 : ELSE
2980 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2981 : in_val_l*w_j - &
2982 : in_val_tmp*weights_1d(1)*w_j
2983 : END IF
2984 : ELSE
2985 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
2986 : w_j*in_val_l - &
2987 : in_val_tmp*weights_1d(1)*w_j
2988 : END IF
2989 : ELSE IF (my_smooth_boundary) THEN
2990 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2991 : w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
2992 : ELSE
2993 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2994 : w_j*(in_val_l + in_val_tmp*weights_1d(1))
2995 : END IF
2996 : in_val_l = 0._dp
2997 : ELSE
2998 : in_val_l = in_val(bo(2, 1), myj, myk)
2999 : IF (my_sharpen) THEN
3000 : IF (kw == 0 .AND. jw == 0) THEN
3001 : IF (my_normalize) THEN
3002 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3003 : in_val_l*(2._dp - w_j)
3004 : ELSE
3005 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3006 : in_val_l*w_j
3007 : END IF
3008 : ELSE
3009 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
3010 : w_j*in_val_l
3011 : END IF
3012 : ELSE
3013 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3014 : w_j*in_val_l
3015 : END IF
3016 : END IF
3017 : ELSE
3018 : in_val_l = u_boundary(myj, myk)
3019 : END IF
3020 : IF (last_index >= first_index) THEN
3021 : IF (my_transpose) THEN
3022 : IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
3023 : in_val_f = 0._dp
3024 : ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
3025 : in_val_l = 0._dp
3026 : END IF
3027 : END IF
3028 : IF (my_sharpen) THEN
3029 : w = -weights_1d*w_j
3030 : IF (kw == 0 .AND. jw == 0) THEN
3031 : IF (my_normalize) THEN
3032 : w(0) = w(0) + 2._dp
3033 : ELSE
3034 : w(0) = -w(0)
3035 : END IF
3036 : END IF
3037 : ELSE
3038 : w = weights_1d*w_j
3039 : END IF
3040 : IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3041 : IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
3042 : gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3043 : IF (gbo(1, 1) >= bo(1, 1)) THEN
3044 : out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3045 : in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
3046 : (1._dp/weights_1d(0) - 1._dp)
3047 : ELSE
3048 : out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3049 : l_boundary(myj, myk)*w_j*weights_1d(-1)* &
3050 : (1._dp/weights_1d(0) - 1._dp)
3051 : END IF
3052 : END IF
3053 : END IF
3054 : CALL pw_compose_stripe(weights=w, &
3055 : in_val=in_val(first_index:last_index, myj, myk), &
3056 : in_val_first=in_val_f, in_val_last=in_val_l, &
3057 : out_val=out_val(first_index:last_index, j, k), &
3058 : n_el=n_els)
3059 : !FM call pw_compose_stripe2(weights=w,&
3060 : !FM in_val=in_val,&
3061 : !FM in_val_first=in_val_f,in_val_last=in_val_l,&
3062 : !FM out_val=out_val,&
3063 : !FM first_val=first_index,last_val=last_index,&
3064 : !FM myj=myj,myk=myk,j=j,k=k)
3065 : IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3066 : IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
3067 : gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3068 : IF (gbo(2, 1) <= bo(2, 1)) THEN
3069 : out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3070 : in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
3071 : (1._dp/weights_1d(0) - 1._dp)
3072 : ELSE
3073 : out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3074 : u_boundary(myj, myk)*w_j*weights_1d(1)* &
3075 : (1._dp/weights_1d(0) - 1._dp)
3076 : END IF
3077 : END IF
3078 : END IF
3079 :
3080 : END IF
3081 : END DO
3082 : END DO
3083 : END DO
3084 : END DO
3085 :
3086 138688 : IF (is_split) THEN
3087 2232 : DEALLOCATE (l_boundary, u_boundary)
3088 : END IF
3089 138688 : END SUBROUTINE pw_nn_compose_r_no_pbc
3090 :
3091 : ! **************************************************************************************************
3092 : !> \brief ...
3093 : !> \param pw_in ...
3094 : !> \param pw_out ...
3095 : ! **************************************************************************************************
3096 41588 : SUBROUTINE spl3_nopbc(pw_in, pw_out)
3097 :
3098 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3099 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3100 :
3101 41588 : CALL pw_zero(pw_out)
3102 : CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3103 41588 : pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
3104 :
3105 41588 : END SUBROUTINE spl3_nopbc
3106 :
3107 : ! **************************************************************************************************
3108 : !> \brief ...
3109 : !> \param pw_in ...
3110 : !> \param pw_out ...
3111 : ! **************************************************************************************************
3112 27753 : SUBROUTINE spl3_nopbct(pw_in, pw_out)
3113 :
3114 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3115 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3116 :
3117 27753 : CALL pw_zero(pw_out)
3118 : CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3119 27753 : pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
3120 :
3121 27753 : END SUBROUTINE spl3_nopbct
3122 :
3123 : ! **************************************************************************************************
3124 : !> \brief ...
3125 : !> \param pw_in ...
3126 : !> \param pw_out ...
3127 : ! **************************************************************************************************
3128 6760 : SUBROUTINE spl3_pbc(pw_in, pw_out)
3129 :
3130 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3131 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3132 :
3133 6760 : CALL pw_zero(pw_out)
3134 6760 : CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
3135 :
3136 6760 : END SUBROUTINE spl3_pbc
3137 :
3138 : ! **************************************************************************************************
3139 : !> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
3140 : !> input vector (vec)
3141 : !> \param vec ...
3142 : !> \param pw ...
3143 : !> \return ...
3144 : !> \par History
3145 : !> 12.2007 Adapted for use with distributed grids [rdeclerck]
3146 : !> \author Teodoro Laino 12/2005 [tlaino]
3147 : !> \note
3148 : !> Requires the Spline coefficients to be computed with PBC
3149 : ! **************************************************************************************************
3150 1821805 : FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
3151 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: vec
3152 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw
3153 : REAL(KIND=dp) :: val
3154 :
3155 : INTEGER :: i, ivec(3), j, k, npts(3)
3156 : INTEGER, DIMENSION(2, 3) :: bo, bo_l
3157 : INTEGER, DIMENSION(4) :: ii, ij, ik
3158 : LOGICAL :: my_mpsum
3159 : REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
3160 : f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
3161 : t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
3162 : REAL(KIND=dp), DIMENSION(4, 4, 4) :: box
3163 1821805 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: grid
3164 :
3165 1821805 : NULLIFY (grid)
3166 1821805 : my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3167 7287220 : npts = pw%pw_grid%npts
3168 7287220 : ivec = FLOOR(vec/pw%pw_grid%dr)
3169 1821805 : dr1 = pw%pw_grid%dr(1)
3170 1821805 : dr2 = pw%pw_grid%dr(2)
3171 1821805 : dr3 = pw%pw_grid%dr(3)
3172 :
3173 1821805 : xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3174 1821805 : xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3175 1821805 : xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3176 1821805 : grid => pw%array(:, :, :)
3177 18218050 : bo = pw%pw_grid%bounds
3178 18218050 : bo_l = pw%pw_grid%bounds_local
3179 :
3180 1821805 : ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3181 1821805 : ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3182 1821805 : ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3183 1821805 : ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3184 :
3185 1821805 : ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3186 1821805 : ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3187 1821805 : ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3188 1821805 : ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3189 :
3190 1821805 : ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3191 1821805 : ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3192 1821805 : ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3193 1821805 : ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3194 :
3195 9109025 : DO k = 1, 4
3196 38257905 : DO j = 1, 4
3197 153031620 : DO i = 1, 4
3198 : IF ( &
3199 : ii(i) >= bo_l(1, 1) .AND. &
3200 : ii(i) <= bo_l(2, 1) .AND. &
3201 : ij(j) >= bo_l(1, 2) .AND. &
3202 : ij(j) <= bo_l(2, 2) .AND. &
3203 116595520 : ik(k) >= bo_l(1, 3) .AND. &
3204 : ik(k) <= bo_l(2, 3) &
3205 29148880 : ) THEN
3206 : box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3207 : ij(j) + 1 - bo_l(1, 2), &
3208 58521472 : ik(k) + 1 - bo_l(1, 3))
3209 : ELSE
3210 58074048 : box(i, j, k) = 0.0_dp
3211 : END IF
3212 : END DO
3213 : END DO
3214 : END DO
3215 :
3216 1821805 : a1 = 3.0_dp + xd1
3217 1821805 : a2 = a1*a1
3218 1821805 : a3 = a2*a1
3219 1821805 : b1 = 2.0_dp + xd1
3220 1821805 : b2 = b1*b1
3221 1821805 : b3 = b2*b1
3222 1821805 : c1 = 1.0_dp + xd1
3223 1821805 : c2 = c1*c1
3224 1821805 : c3 = c2*c1
3225 1821805 : d1 = xd1
3226 1821805 : d2 = d1*d1
3227 1821805 : d3 = d2*d1
3228 1821805 : e1 = 3.0_dp + xd2
3229 1821805 : e2 = e1*e1
3230 1821805 : e3 = e2*e1
3231 1821805 : f1 = 2.0_dp + xd2
3232 1821805 : f2 = f1*f1
3233 1821805 : f3 = f2*f1
3234 1821805 : g1 = 1.0_dp + xd2
3235 1821805 : g2 = g1*g1
3236 1821805 : g3 = g2*g1
3237 1821805 : h1 = xd2
3238 1821805 : h2 = h1*h1
3239 1821805 : h3 = h2*h1
3240 1821805 : p1 = 3.0_dp + xd3
3241 1821805 : p2 = p1*p1
3242 1821805 : p3 = p2*p1
3243 1821805 : q1 = 2.0_dp + xd3
3244 1821805 : q2 = q1*q1
3245 1821805 : q3 = q2*q1
3246 1821805 : r1 = 1.0_dp + xd3
3247 1821805 : r2 = r1*r1
3248 1821805 : r3 = r2*r1
3249 1821805 : u1 = xd3
3250 1821805 : u2 = u1*u1
3251 1821805 : u3 = u2*u1
3252 :
3253 1821805 : t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3254 1821805 : t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3255 1821805 : t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3256 1821805 : t4 = 1.0_dp/6.0_dp*d3
3257 1821805 : s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3258 1821805 : s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3259 1821805 : s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3260 1821805 : s4 = 1.0_dp/6.0_dp*h3
3261 1821805 : v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3262 1821805 : v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3263 1821805 : v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3264 1821805 : v4 = 1.0_dp/6.0_dp*u3
3265 :
3266 : val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3267 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3268 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3269 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3270 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3271 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3272 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3273 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3274 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3275 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3276 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3277 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3278 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3279 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3280 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3281 1821805 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3282 :
3283 1821805 : IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
3284 :
3285 1821805 : END FUNCTION Eval_Interp_Spl3_pbc
3286 :
3287 : ! **************************************************************************************************
3288 : !> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
3289 : !> function on the generic input vector (vec)
3290 : !> \param vec ...
3291 : !> \param pw ...
3292 : !> \return ...
3293 : !> \par History
3294 : !> 12.2007 Adapted for use with distributed grids [rdeclerck]
3295 : !> \author Teodoro Laino 12/2005 [tlaino]
3296 : !> \note
3297 : !> Requires the Spline coefficients to be computed with PBC
3298 : ! **************************************************************************************************
3299 9964 : FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
3300 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: vec
3301 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw
3302 : REAL(KIND=dp) :: val(3)
3303 :
3304 : INTEGER :: i, ivec(3), j, k, npts(3)
3305 : INTEGER, DIMENSION(2, 3) :: bo, bo_l
3306 : INTEGER, DIMENSION(4) :: ii, ij, ik
3307 : LOGICAL :: my_mpsum
3308 : REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
3309 : dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
3310 : s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
3311 : t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
3312 : v4o, xd1, xd2, xd3
3313 : REAL(KIND=dp), DIMENSION(4, 4, 4) :: box
3314 4982 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: grid
3315 :
3316 4982 : NULLIFY (grid)
3317 4982 : my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3318 19928 : npts = pw%pw_grid%npts
3319 19928 : ivec = FLOOR(vec/pw%pw_grid%dr)
3320 4982 : dr1 = pw%pw_grid%dr(1)
3321 4982 : dr2 = pw%pw_grid%dr(2)
3322 4982 : dr3 = pw%pw_grid%dr(3)
3323 4982 : dr1i = 1.0_dp/dr1
3324 4982 : dr2i = 1.0_dp/dr2
3325 4982 : dr3i = 1.0_dp/dr3
3326 4982 : xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3327 4982 : xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3328 4982 : xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3329 4982 : grid => pw%array(:, :, :)
3330 49820 : bo = pw%pw_grid%bounds
3331 49820 : bo_l = pw%pw_grid%bounds_local
3332 :
3333 4982 : ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3334 4982 : ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3335 4982 : ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3336 4982 : ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3337 :
3338 4982 : ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3339 4982 : ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3340 4982 : ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3341 4982 : ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3342 :
3343 4982 : ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3344 4982 : ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3345 4982 : ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3346 4982 : ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3347 :
3348 24910 : DO k = 1, 4
3349 104622 : DO j = 1, 4
3350 418488 : DO i = 1, 4
3351 : IF ( &
3352 : ii(i) >= bo_l(1, 1) .AND. &
3353 : ii(i) <= bo_l(2, 1) .AND. &
3354 : ij(j) >= bo_l(1, 2) .AND. &
3355 : ij(j) <= bo_l(2, 2) .AND. &
3356 318848 : ik(k) >= bo_l(1, 3) .AND. &
3357 : ik(k) <= bo_l(2, 3) &
3358 79712 : ) THEN
3359 : box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3360 : ij(j) + 1 - bo_l(1, 2), &
3361 317696 : ik(k) + 1 - bo_l(1, 3))
3362 : ELSE
3363 1152 : box(i, j, k) = 0.0_dp
3364 : END IF
3365 : END DO
3366 : END DO
3367 : END DO
3368 :
3369 4982 : a1 = 3.0_dp + xd1
3370 4982 : a2 = a1*a1
3371 4982 : a3 = a2*a1
3372 4982 : b1 = 2.0_dp + xd1
3373 4982 : b2 = b1*b1
3374 4982 : b3 = b2*b1
3375 4982 : c1 = 1.0_dp + xd1
3376 4982 : c2 = c1*c1
3377 4982 : c3 = c2*c1
3378 4982 : d1 = xd1
3379 4982 : d2 = d1*d1
3380 4982 : d3 = d2*d1
3381 4982 : e1 = 3.0_dp + xd2
3382 4982 : e2 = e1*e1
3383 4982 : e3 = e2*e1
3384 4982 : f1 = 2.0_dp + xd2
3385 4982 : f2 = f1*f1
3386 4982 : f3 = f2*f1
3387 4982 : g1 = 1.0_dp + xd2
3388 4982 : g2 = g1*g1
3389 4982 : g3 = g2*g1
3390 4982 : h1 = xd2
3391 4982 : h2 = h1*h1
3392 4982 : h3 = h2*h1
3393 4982 : p1 = 3.0_dp + xd3
3394 4982 : p2 = p1*p1
3395 4982 : p3 = p2*p1
3396 4982 : q1 = 2.0_dp + xd3
3397 4982 : q2 = q1*q1
3398 4982 : q3 = q2*q1
3399 4982 : r1 = 1.0_dp + xd3
3400 4982 : r2 = r1*r1
3401 4982 : r3 = r2*r1
3402 4982 : u1 = xd3
3403 4982 : u2 = u1*u1
3404 4982 : u3 = u2*u1
3405 :
3406 4982 : t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3407 4982 : t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3408 4982 : t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3409 4982 : t4o = 1.0_dp/6.0_dp*d3
3410 4982 : s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3411 4982 : s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3412 4982 : s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3413 4982 : s4o = 1.0_dp/6.0_dp*h3
3414 4982 : v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3415 4982 : v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3416 4982 : v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3417 4982 : v4o = 1.0_dp/6.0_dp*u3
3418 :
3419 4982 : t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
3420 4982 : t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
3421 4982 : t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
3422 4982 : t4d = 0.5_dp*d2
3423 4982 : s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
3424 4982 : s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
3425 4982 : s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
3426 4982 : s4d = 0.5_dp*h2
3427 4982 : v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
3428 4982 : v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
3429 4982 : v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
3430 4982 : v4d = 0.5_dp*u2
3431 :
3432 4982 : t1 = t1d*dr1i
3433 4982 : t2 = t2d*dr1i
3434 4982 : t3 = t3d*dr1i
3435 4982 : t4 = t4d*dr1i
3436 4982 : s1 = s1o
3437 4982 : s2 = s2o
3438 4982 : s3 = s3o
3439 4982 : s4 = s4o
3440 4982 : v1 = v1o
3441 4982 : v2 = v2o
3442 4982 : v3 = v3o
3443 4982 : v4 = v4o
3444 : val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3445 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3446 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3447 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3448 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3449 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3450 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3451 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3452 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3453 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3454 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3455 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3456 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3457 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3458 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3459 4982 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3460 :
3461 4982 : t1 = t1o
3462 4982 : t2 = t2o
3463 4982 : t3 = t3o
3464 4982 : t4 = t4o
3465 4982 : s1 = s1d*dr2i
3466 4982 : s2 = s2d*dr2i
3467 4982 : s3 = s3d*dr2i
3468 4982 : s4 = s4d*dr2i
3469 4982 : v1 = v1o
3470 4982 : v2 = v2o
3471 4982 : v3 = v3o
3472 4982 : v4 = v4o
3473 : val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3474 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3475 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3476 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3477 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3478 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3479 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3480 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3481 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3482 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3483 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3484 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3485 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3486 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3487 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3488 4982 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3489 :
3490 4982 : t1 = t1o
3491 4982 : t2 = t2o
3492 4982 : t3 = t3o
3493 4982 : t4 = t4o
3494 4982 : s1 = s1o
3495 4982 : s2 = s2o
3496 4982 : s3 = s3o
3497 4982 : s4 = s4o
3498 4982 : v1 = v1d*dr3i
3499 4982 : v2 = v2d*dr3i
3500 4982 : v3 = v3d*dr3i
3501 4982 : v4 = v4d*dr3i
3502 : val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3503 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3504 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3505 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3506 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3507 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3508 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3509 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3510 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3511 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3512 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3513 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3514 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3515 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3516 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3517 4982 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3518 :
3519 4982 : IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
3520 :
3521 4982 : END FUNCTION Eval_d_Interp_Spl3_pbc
3522 :
3523 0 : END MODULE pw_spline_utils
|