Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to split blocks and to convert between tensors with different block sizes.
10 : !> \author Patrick Seewald
11 : ! **************************************************************************************************
12 : MODULE dbt_split
13 :
14 : #:include "dbt_macros.fypp"
15 : #:set maxdim = maxrank
16 : #:set ndims = range(2,maxdim+1)
17 :
18 : USE dbt_allocate_wrap, ONLY: allocate_any
19 : USE dbt_array_list_methods, ONLY: get_ith_array
20 : USE dbt_block, ONLY: dbt_iterator_type, &
21 : dbt_get_block, &
22 : dbt_put_block, &
23 : dbt_iterator_start, &
24 : dbt_iterator_num_blocks, &
25 : dbt_iterator_blocks_left, &
26 : dbt_iterator_stop, &
27 : dbt_iterator_next_block, &
28 : dbt_reserve_blocks
29 : USE dbt_index, ONLY: dbt_get_mapping_info, &
30 : dbt_inverse_order
31 : USE dbt_types, ONLY: dbt_create, &
32 : dbt_type, &
33 : ndims_tensor, &
34 : dbt_distribution_type, &
35 : dbt_distribution, &
36 : dbt_distribution_destroy, &
37 : dbt_distribution_new_expert, &
38 : dbt_clear, &
39 : dbt_finalize, &
40 : dbt_get_num_blocks, &
41 : dbt_blk_offsets, &
42 : dbt_blk_sizes, &
43 : ndims_matrix_row, &
44 : ndims_matrix_column, &
45 : dbt_filter, &
46 : dbt_copy_contraction_storage
47 : USE kinds, ONLY: dp, dp
48 :
49 : #include "../base/base_uses.f90"
50 : IMPLICIT NONE
51 : PRIVATE
52 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_split'
53 : PUBLIC :: &
54 : dbt_make_compatible_blocks, &
55 : dbt_split_blocks, &
56 : dbt_split_blocks_generic, &
57 : dbt_split_copyback, &
58 : dbt_crop
59 :
60 : CONTAINS
61 :
62 : ! **************************************************************************************************
63 : !> \brief Split tensor blocks into smaller blocks
64 : !> \param tensor_in Input tensor
65 : !> \param tensor_out Output tensor (splitted blocks)
66 : !> \param blk_size_i block sizes for each of the tensor dimensions
67 : !> \param nodata don't copy data from tensor_in to tensor_out
68 : !> \author Patrick Seewald
69 : ! **************************************************************************************************
70 1287720 : SUBROUTINE dbt_split_blocks_generic(tensor_in, tensor_out, ${varlist("blk_size")}$, nodata)
71 : TYPE(dbt_type), INTENT(INOUT) :: tensor_in
72 : TYPE(dbt_type), INTENT(OUT) :: tensor_out
73 : INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: ${varlist("blk_size")}$
74 : LOGICAL, INTENT(IN), OPTIONAL :: nodata
75 :
76 1860040 : TYPE(dbt_distribution_type) :: dist_old, dist_split
77 : TYPE(dbt_iterator_type) :: iter
78 143080 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("nd_dist_split")}$
79 143080 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("nd_blk_size_split")}$
80 143080 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("index_split_offset")}$
81 143080 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("inblock_offset")}$
82 143080 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_nsplit")}$
83 : INTEGER :: ${varlist("split_blk")}$
84 : INTEGER :: idim, i, isplit_sum, nsplit, handle, splitsum, bcount
85 143080 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: blks_to_allocate
86 286160 : INTEGER, DIMENSION(:), ALLOCATABLE :: dist_d, blk_size_d, blk_size_split_d, dist_split_d
87 286160 : INTEGER, DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d
88 286160 : INTEGER, DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d
89 143080 : INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset, &
90 143080 : blk_shape
91 : INTEGER, DIMENSION(${maxdim}$) :: bi_split, inblock_offset
92 : LOGICAL :: found
93 :
94 : #:for ndim in ndims
95 143080 : REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
96 : #:endfor
97 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_blocks_generic'
98 :
99 143080 : CALL timeset(routineN, handle)
100 :
101 143080 : dist_old = dbt_distribution(tensor_in)
102 :
103 505068 : DO idim = 1, ndims_tensor(tensor_in)
104 361988 : CALL get_ith_array(dist_old%nd_dist, idim, dist_d)
105 361988 : CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
106 :
107 : #:for idim in range(1, maxdim+1)
108 723976 : IF (idim == ${idim}$) THEN
109 : ! split block index offset for each normal block index
110 1085964 : ALLOCATE (index_split_offset_${idim}$ (SIZE(dist_d)))
111 : ! how many split blocks for each normal block index
112 723976 : ALLOCATE (blk_nsplit_${idim}$ (SIZE(dist_d)))
113 : ! data offset of split blocks w.r.t. corresponding normal block
114 1085964 : ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_${idim}$)))
115 4405076 : ALLOCATE (blk_size_split_d, source=blk_size_${idim}$)
116 : END IF
117 : #:endfor
118 :
119 : ! distribution vector for split blocks
120 1085964 : ALLOCATE (dist_split_d(SIZE(blk_size_split_d)))
121 :
122 361988 : isplit_sum = 0 ! counting splits
123 2809040 : DO i = 1, SIZE(blk_size_d)
124 : nsplit = 0 ! number of splits for current normal block
125 : splitsum = 0 ! summing split block sizes for current normal block
126 5766164 : DO WHILE (splitsum < blk_size_d(i))
127 3319112 : nsplit = nsplit + 1
128 3319112 : isplit_sum = isplit_sum + 1
129 : #:for idim in range(1, maxdim+1)
130 3319112 : IF (idim == ${idim}$) inblock_offset_${idim}$ (isplit_sum) = splitsum
131 : #:endfor
132 3319112 : dist_split_d(isplit_sum) = dist_d(i)
133 3319112 : splitsum = splitsum + blk_size_split_d(isplit_sum)
134 : END DO
135 2447052 : CPASSERT(splitsum == blk_size_d(i))
136 : #:for idim in range(1, maxdim+1)
137 5256092 : IF (idim == ${idim}$) THEN
138 2447052 : blk_nsplit_${idim}$ (i) = nsplit
139 2447052 : index_split_offset_${idim}$ (i) = isplit_sum - nsplit
140 : END IF
141 : #:endfor
142 : END DO
143 :
144 : #:for idim in range(1, maxdim+1)
145 723976 : IF (idim == ${idim}$) THEN
146 4405076 : ALLOCATE (nd_dist_split_${idim}$, source=dist_split_d)
147 4405076 : ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
148 : END IF
149 : #:endfor
150 361988 : DEALLOCATE (dist_split_d)
151 1229044 : DEALLOCATE (blk_size_split_d)
152 :
153 : END DO
154 :
155 143080 : CALL dbt_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d)
156 :
157 : #:for ndim in ndims
158 286160 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
159 : CALL dbt_distribution_new_expert(dist_split, tensor_in%pgrid, map1_2d, map2_2d, &
160 143080 : ${varlist("nd_dist_split", nmax=ndim)}$)
161 : CALL dbt_create(tensor_out, tensor_in%name, dist_split, map1_2d, map2_2d, &
162 143080 : ${varlist("nd_blk_size_split", nmax=ndim)}$)
163 : END IF
164 : #:endfor
165 :
166 143080 : CALL dbt_distribution_destroy(dist_split)
167 :
168 143080 : IF (PRESENT(nodata)) THEN
169 71021 : IF (nodata) THEN
170 57875 : CALL timestop(handle)
171 57875 : RETURN
172 : END IF
173 : END IF
174 :
175 : !$OMP PARALLEL DEFAULT(NONE) &
176 : !$OMP SHARED(tensor_in,tensor_out) &
177 : !$OMP SHARED(${varlist("blk_nsplit", nmax=ndim)}$) &
178 : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
179 : !$OMP SHARED(${varlist("blk_size", nmax=ndim)}$) &
180 : !$OMP SHARED(${varlist("index_split_offset", nmax=ndim)}$) &
181 : !$OMP PRIVATE(iter,found,bcount,blks_to_allocate,bi_split,inblock_offset) &
182 : !$OMP PRIVATE(blk_index,blk_size,blk_offset,blk_shape) &
183 85205 : !$OMP PRIVATE(block_2d,block_3d,block_4d)
184 : CALL dbt_iterator_start(iter, tensor_in)
185 :
186 : bcount = 0
187 : DO WHILE (dbt_iterator_blocks_left(iter))
188 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
189 : #:for ndim in ndims
190 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
191 : #:for idim in range(1,ndim+1)
192 : DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
193 : #:endfor
194 : bcount = bcount + 1
195 : #:for idim in range(1,ndim+1)
196 : END DO
197 : #:endfor
198 : END IF
199 : #:endfor
200 : END DO
201 : CALL dbt_iterator_stop(iter)
202 :
203 : ALLOCATE (blks_to_allocate(bcount, ndims_tensor(tensor_in)))
204 :
205 : CALL dbt_iterator_start(iter, tensor_in)
206 :
207 : bcount = 0
208 : DO WHILE (dbt_iterator_blocks_left(iter))
209 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
210 :
211 : #:for ndim in ndims
212 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
213 : #:for idim in range(1,ndim+1)
214 : DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
215 : bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
216 : #:endfor
217 : bcount = bcount + 1
218 : blks_to_allocate(bcount, :) = bi_split(1:ndims_tensor(tensor_in))
219 : #:for idim in range(1,ndim+1)
220 : END DO
221 : #:endfor
222 : END IF
223 : #:endfor
224 : END DO
225 :
226 : CALL dbt_iterator_stop(iter)
227 :
228 : CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
229 :
230 : CALL dbt_iterator_start(iter, tensor_in)
231 :
232 : DO WHILE (dbt_iterator_blocks_left(iter))
233 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
234 : #:for ndim in ndims
235 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
236 : CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
237 : CPASSERT(found)
238 : END IF
239 : #:endfor
240 : #:for ndim in ndims
241 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
242 : #:for idim in range(1,ndim+1)
243 : DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
244 : ! split block index
245 : bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
246 : blk_shape(${idim}$) = blk_size_${idim}$ (bi_split(${idim}$))
247 : #:endfor
248 :
249 : #:for idim in range(1,ndim+1)
250 : inblock_offset(${idim}$) = inblock_offset_${idim}$ (bi_split(${idim}$))
251 : #:endfor
252 : CALL dbt_put_block(tensor_out, bi_split(1:${ndim}$), &
253 : blk_shape, &
254 : block_${ndim}$d( &
255 : ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$))
256 :
257 : #:for idim in range(1,ndim+1)
258 : END DO
259 : #:endfor
260 :
261 : DEALLOCATE (block_${ndim}$d)
262 : END IF
263 : #:endfor
264 : END DO
265 : CALL dbt_iterator_stop(iter)
266 : !$OMP END PARALLEL
267 :
268 85205 : CALL dbt_finalize(tensor_out)
269 :
270 : ! remove blocks that are exactly 0, these can occur if a cropping operation was performed before splitting
271 85205 : CALL dbt_filter(tensor_out, TINY(0.0_dp))
272 :
273 85205 : CALL timestop(handle)
274 :
275 429240 : END SUBROUTINE
276 :
277 : ! **************************************************************************************************
278 : !> \brief Split tensor blocks into smaller blocks of maximum size PRODUCT(block_sizes).
279 : !> \param tensor_in Input tensor
280 : !> \param tensor_out Output tensor (split blocks)
281 : !> \param block_sizes block sizes for each of the tensor dimensions
282 : !> \param nodata don't copy data from tensor_in to tensor_out
283 : !> \author Patrick Seewald
284 : ! **************************************************************************************************
285 8304 : SUBROUTINE dbt_split_blocks(tensor_in, tensor_out, block_sizes, nodata)
286 :
287 : TYPE(dbt_type), INTENT(INOUT) :: tensor_in
288 : TYPE(dbt_type), INTENT(OUT) :: tensor_out
289 : INTEGER, DIMENSION(ndims_tensor(tensor_in)), &
290 : INTENT(IN) :: block_sizes
291 : LOGICAL, INTENT(IN), OPTIONAL :: nodata
292 :
293 1038 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("nd_blk_size_split")}$
294 : INTEGER :: idim, i, isplit_sum, blk_remainder, nsplit, isplit
295 1038 : INTEGER, DIMENSION(:), ALLOCATABLE :: blk_size_d, blk_size_split_d
296 :
297 3806 : DO idim = 1, ndims_tensor(tensor_in)
298 2768 : CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
299 :
300 2768 : isplit_sum = 0
301 6908 : DO i = 1, SIZE(blk_size_d)
302 4140 : nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
303 6908 : isplit_sum = isplit_sum + nsplit
304 : END DO
305 :
306 8304 : ALLOCATE (blk_size_split_d(isplit_sum))
307 :
308 2768 : isplit_sum = 0
309 6908 : DO i = 1, SIZE(blk_size_d)
310 4140 : nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
311 4140 : blk_remainder = blk_size_d(i)
312 18152 : DO isplit = 1, nsplit
313 11244 : isplit_sum = isplit_sum + 1
314 11244 : blk_size_split_d(isplit_sum) = MIN(block_sizes(idim), blk_remainder)
315 15384 : blk_remainder = blk_remainder - block_sizes(idim)
316 : END DO
317 :
318 : END DO
319 :
320 : #:for idim in range(1, maxdim+1)
321 5536 : IF (idim == ${idim}$) THEN
322 19548 : ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
323 : END IF
324 : #:endfor
325 6574 : DEALLOCATE (blk_size_split_d)
326 : END DO
327 :
328 : #:for ndim in ndims
329 2076 : IF (ndims_tensor(tensor_in) == ${ndim}$) CALL dbt_split_blocks_generic(tensor_in, tensor_out, &
330 : ${varlist("nd_blk_size_split", nmax=ndim)}$, &
331 1038 : nodata=nodata)
332 : #:endfor
333 :
334 1038 : END SUBROUTINE
335 :
336 : ! **************************************************************************************************
337 : !> \brief Copy tensor with split blocks to tensor with original block sizes.
338 : !> \param tensor_split_in tensor with smaller blocks
339 : !> \param tensor_out original tensor
340 : !> \author Patrick Seewald
341 : ! **************************************************************************************************
342 71021 : SUBROUTINE dbt_split_copyback(tensor_split_in, tensor_out, summation)
343 : TYPE(dbt_type), INTENT(INOUT) :: tensor_split_in
344 : TYPE(dbt_type), INTENT(INOUT) :: tensor_out
345 : LOGICAL, INTENT(IN), OPTIONAL :: summation
346 71021 : INTEGER, DIMENSION(:), ALLOCATABLE :: first_split_d, last_split_d
347 71021 : INTEGER, DIMENSION(:), ALLOCATABLE :: blk_size_split_d, blk_size_d
348 71021 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("last_split")}$, &
349 71021 : ${varlist("first_split")}$, &
350 71021 : ${varlist("split")}$
351 71021 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("inblock_offset")}$, ${varlist("blk_size_split")}$
352 71021 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: blks_to_allocate
353 : INTEGER :: idim, iblk, bcount
354 : INTEGER :: ${varlist("iblk")}$, isplit_sum, splitsum
355 : TYPE(dbt_iterator_type) :: iter
356 71021 : INTEGER, DIMENSION(ndims_tensor(tensor_out)) :: blk_index, blk_size, blk_offset, blk_shape, blk_index_n
357 : LOGICAL :: found
358 :
359 : INTEGER, DIMENSION(${maxdim}$) :: inblock_offset
360 : INTEGER :: handle
361 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_copyback'
362 : #:for ndim in ndims
363 71021 : REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
364 71021 : REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_split_${ndim}$d
365 : #:endfor
366 :
367 71021 : CALL timeset(routineN, handle)
368 71021 : CPASSERT(tensor_out%valid)
369 71021 : IF (PRESENT(summation)) THEN
370 13146 : IF (.NOT. summation) CALL dbt_clear(tensor_out)
371 : ELSE
372 57875 : CALL dbt_clear(tensor_out)
373 : END IF
374 :
375 250631 : DO idim = 1, ndims_tensor(tensor_split_in)
376 179610 : CALL get_ith_array(tensor_split_in%blk_sizes, idim, blk_size_split_d)
377 179610 : CALL get_ith_array(tensor_out%blk_sizes, idim, blk_size_d)
378 :
379 : #:for idim in range(1, maxdim+1)
380 359220 : IF (idim == ${idim}$) THEN
381 : ! data offset of split blocks w.r.t. corresponding normal block
382 538830 : ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_split_d)))
383 : ! normal block index for each split block
384 359220 : ALLOCATE (split_${idim}$ (SIZE(blk_size_split_d)))
385 : END IF
386 : #:endfor
387 :
388 538830 : ALLOCATE (last_split_d(SIZE(blk_size_d)))
389 359220 : ALLOCATE (first_split_d(SIZE(blk_size_d)))
390 179610 : first_split_d(1) = 1
391 179610 : isplit_sum = 0
392 1534004 : DO iblk = 1, SIZE(blk_size_d)
393 1354394 : splitsum = 0
394 1354394 : IF (iblk .GT. 1) first_split_d(iblk) = last_split_d(iblk - 1) + 1
395 3008328 : DO WHILE (splitsum < blk_size_d(iblk))
396 1653934 : isplit_sum = isplit_sum + 1
397 : #:for idim in range(1, maxdim+1)
398 3307868 : IF (idim == ${idim}$) THEN
399 1653934 : inblock_offset_${idim}$ (isplit_sum) = splitsum
400 1653934 : split_${idim}$ (isplit_sum) = iblk
401 : END IF
402 : #:endfor
403 1653934 : splitsum = splitsum + blk_size_split_d(isplit_sum)
404 : END DO
405 1354394 : CPASSERT(splitsum == blk_size_d(iblk))
406 1534004 : last_split_d(iblk) = isplit_sum
407 : END DO
408 : #:for idim in range(1, maxdim+1)
409 359220 : IF (idim == ${idim}$) THEN
410 1893224 : ALLOCATE (first_split_${idim}$, source=first_split_d)
411 1713614 : ALLOCATE (last_split_${idim}$, source=last_split_d)
412 2192764 : ALLOCATE (blk_size_split_${idim}$, source=blk_size_split_d)
413 : END IF
414 : #:endfor
415 179610 : DEALLOCATE (first_split_d, last_split_d)
416 609851 : DEALLOCATE (blk_size_split_d, blk_size_d)
417 : END DO
418 :
419 : !$OMP PARALLEL DEFAULT(NONE) &
420 : !$OMP SHARED(tensor_split_in,tensor_out,summation) &
421 : !$OMP SHARED(${varlist("split", nmax=ndim)}$) &
422 : !$OMP SHARED(${varlist("first_split", nmax=ndim)}$) &
423 : !$OMP SHARED(${varlist("last_split", nmax=ndim)}$) &
424 : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
425 : !$OMP PRIVATE(iter,blks_to_allocate,bcount,blk_index_n) &
426 : !$OMP PRIVATE(blk_index,blk_size,blk_shape,blk_offset,inblock_offset,found) &
427 71021 : !$OMP PRIVATE(block_2d,block_3d,block_4d,block_split_2d,block_split_3d,block_split_4d)
428 : CALL dbt_iterator_start(iter, tensor_split_in)
429 : ALLOCATE (blks_to_allocate(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_split_in)))
430 : bcount = 0
431 : DO WHILE (dbt_iterator_blocks_left(iter))
432 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
433 : #:for ndim in ndims
434 : IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
435 : #:for idim in range(1,ndim+1)
436 : blk_index_n(${idim}$) = split_${idim}$ (blk_index(${idim}$))
437 : #:endfor
438 : END IF
439 : #:endfor
440 : blks_to_allocate(bcount + 1, :) = blk_index_n
441 : bcount = bcount + 1
442 : END DO
443 : CALL dbt_iterator_stop(iter)
444 : CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
445 :
446 : CALL dbt_iterator_start(iter, tensor_out)
447 : DO WHILE (dbt_iterator_blocks_left(iter))
448 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
449 :
450 : #:for ndim in ndims
451 : IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
452 : CALL allocate_any(block_${ndim}$d, blk_size)
453 : block_${ndim}$d = 0.0_dp
454 : #:for idim in range(1,ndim+1)
455 : DO iblk_${idim}$ = first_split_${idim}$ (blk_index(${idim}$)), last_split_${idim}$ (blk_index(${idim}$))
456 : #:endfor
457 : #:for idim in range(1,ndim+1)
458 : inblock_offset(${idim}$) = inblock_offset_${idim}$ (iblk_${idim}$)
459 : #:endfor
460 :
461 : CALL dbt_get_block(tensor_split_in, [${", ".join(["iblk_"+str(idim) for idim in range(1, ndim+1)])}$], &
462 : block_split_${ndim}$d, found)
463 : IF (found) THEN
464 : blk_shape(1:${ndim}$) = SHAPE(block_split_${ndim}$d)
465 : block_${ndim}$d( &
466 : ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
467 : block_split_${ndim}$d
468 : END IF
469 :
470 : #:for idim in range(1,ndim+1)
471 : END DO
472 : #:endfor
473 : CALL dbt_put_block(tensor_out, blk_index, blk_size, block_${ndim}$d, summation=summation)
474 : DEALLOCATE (block_${ndim}$d)
475 : END IF
476 : #:endfor
477 : END DO
478 : CALL dbt_iterator_stop(iter)
479 : !$OMP END PARALLEL
480 :
481 71021 : CALL timestop(handle)
482 :
483 142042 : END SUBROUTINE
484 :
485 : ! **************************************************************************************************
486 : !> \brief split two tensors with same total sizes but different block sizes such that they have
487 : !> equal block sizes
488 : !> \param move_data memory optimization: transfer data s.t. tensor1 and tensor2 may be empty on return
489 : !> \param tensor1_split tensor 1 with split blocks
490 : !> \param tensor2_split tensor 2 with split blocks
491 : !> \param nodata1 don't copy data of tensor 1
492 : !> \param nodata2 don't copy data of tensor 2
493 : !> \param
494 : !> \param
495 : !> \param
496 : !> \param
497 : !> \author Patrick Seewald
498 : ! **************************************************************************************************
499 1065315 : SUBROUTINE dbt_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, &
500 17990 : order, nodata1, nodata2, move_data)
501 : TYPE(dbt_type), INTENT(INOUT) :: tensor1, tensor2
502 : TYPE(dbt_type), INTENT(OUT) :: tensor1_split, tensor2_split
503 : INTEGER, DIMENSION(ndims_tensor(tensor1)), &
504 : INTENT(IN), OPTIONAL :: order
505 : LOGICAL, INTENT(IN), OPTIONAL :: nodata1, nodata2, move_data
506 71021 : INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_size_split_1")}$, ${varlist("blk_size_split_2")}$, &
507 71021 : blk_size_d_1, blk_size_d_2, blk_size_d_split
508 : INTEGER :: size_sum_1, size_sum_2, size_sum, bind_1, bind_2, isplit, bs, idim, i
509 : LOGICAL :: move_prv, nodata1_prv, nodata2_prv
510 71021 : INTEGER, DIMENSION(ndims_tensor(tensor1)) :: order_prv
511 :
512 71021 : IF (PRESENT(move_data)) THEN
513 71021 : move_prv = move_data
514 : ELSE
515 : move_prv = .FALSE.
516 : END IF
517 :
518 71021 : IF (PRESENT(nodata1)) THEN
519 0 : nodata1_prv = nodata1
520 : ELSE
521 : nodata1_prv = .FALSE.
522 : END IF
523 71021 : IF (PRESENT(nodata2)) THEN
524 71021 : nodata2_prv = nodata2
525 : ELSE
526 : nodata2_prv = .FALSE.
527 : END IF
528 :
529 71021 : IF (PRESENT(order)) THEN
530 17990 : order_prv(:) = dbt_inverse_order(order)
531 : ELSE
532 313047 : order_prv(:) = (/(i, i=1, ndims_tensor(tensor1))/)
533 : END IF
534 :
535 250631 : DO idim = 1, ndims_tensor(tensor2)
536 179610 : CALL get_ith_array(tensor1%blk_sizes, order_prv(idim), blk_size_d_1)
537 179610 : CALL get_ith_array(tensor2%blk_sizes, idim, blk_size_d_2)
538 538830 : ALLOCATE (blk_size_d_split(SIZE(blk_size_d_1) + SIZE(blk_size_d_2)))
539 : size_sum_1 = 0
540 : size_sum_2 = 0
541 : size_sum = 0
542 1833544 : bind_1 = 0
543 1833544 : bind_2 = 0
544 1833544 : isplit = 0
545 :
546 1833544 : DO WHILE (bind_1 < SIZE(blk_size_d_1) .AND. bind_2 < SIZE(blk_size_d_2))
547 1833544 : IF (blk_size_d_1(bind_1 + 1) < blk_size_d_2(bind_2 + 1)) THEN
548 299540 : bind_1 = bind_1 + 1
549 299540 : bs = blk_size_d_1(bind_1)
550 299540 : blk_size_d_2(bind_2 + 1) = blk_size_d_2(bind_2 + 1) - bs
551 : size_sum = size_sum + bs
552 299540 : isplit = isplit + 1
553 299540 : blk_size_d_split(isplit) = bs
554 1354394 : ELSEIF (blk_size_d_1(bind_1 + 1) > blk_size_d_2(bind_2 + 1)) THEN
555 565416 : bind_2 = bind_2 + 1
556 565416 : bs = blk_size_d_2(bind_2)
557 565416 : blk_size_d_1(bind_1 + 1) = blk_size_d_1(bind_1 + 1) - bs
558 : size_sum = size_sum + bs
559 565416 : isplit = isplit + 1
560 565416 : blk_size_d_split(isplit) = bs
561 : ELSE
562 788978 : bind_1 = bind_1 + 1
563 788978 : bind_2 = bind_2 + 1
564 788978 : bs = blk_size_d_1(bind_1)
565 : size_sum = size_sum + bs
566 788978 : isplit = isplit + 1
567 788978 : blk_size_d_split(isplit) = bs
568 : END IF
569 : END DO
570 :
571 179610 : IF (bind_1 < SIZE(blk_size_d_1)) THEN
572 0 : bind_1 = bind_1 + 1
573 0 : bs = blk_size_d_1(bind_1)
574 : size_sum = size_sum + bs
575 0 : isplit = isplit + 1
576 0 : blk_size_d_split(isplit) = bs
577 : END IF
578 :
579 179610 : IF (bind_2 < SIZE(blk_size_d_2)) THEN
580 0 : bind_2 = bind_2 + 1
581 0 : bs = blk_size_d_2(bind_2)
582 : size_sum = size_sum + bs
583 0 : isplit = isplit + 1
584 0 : blk_size_d_split(isplit) = bs
585 : END IF
586 :
587 : #:for idim in range(1, maxdim+1)
588 359220 : IF (order_prv(idim) == ${idim}$) THEN
589 2192764 : ALLOCATE (blk_size_split_1_${idim}$, source=blk_size_d_split(:isplit))
590 : END IF
591 : #:endfor
592 :
593 : #:for idim in range(1, maxdim+1)
594 359220 : IF (idim == ${idim}$) THEN
595 2192764 : ALLOCATE (blk_size_split_2_${idim}$, source=blk_size_d_split(:isplit))
596 : END IF
597 : #:endfor
598 :
599 609851 : DEALLOCATE (blk_size_d_split, blk_size_d_1, blk_size_d_2)
600 : END DO
601 :
602 : #:for ndim in ndims
603 142042 : IF (ndims_tensor(tensor1) == ${ndim}$) THEN
604 71021 : CALL dbt_split_blocks_generic(tensor1, tensor1_split, ${varlist("blk_size_split_1", nmax=ndim)}$, nodata=nodata1)
605 71021 : IF (move_prv .AND. .NOT. nodata1_prv) CALL dbt_clear(tensor1)
606 71021 : CALL dbt_split_blocks_generic(tensor2, tensor2_split, ${varlist("blk_size_split_2", nmax=ndim)}$, nodata=nodata2)
607 71021 : IF (move_prv .AND. .NOT. nodata2_prv) CALL dbt_clear(tensor2)
608 : END IF
609 : #:endfor
610 :
611 142042 : END SUBROUTINE
612 :
613 : ! **************************************************************************************************
614 : !> \author Patrick Seewald
615 : ! **************************************************************************************************
616 1116832 : SUBROUTINE dbt_crop(tensor_in, tensor_out, bounds, move_data)
617 : TYPE(dbt_type), INTENT(INOUT) :: tensor_in
618 : TYPE(dbt_type), INTENT(OUT) :: tensor_out
619 : INTEGER, DIMENSION(2, ndims_tensor(tensor_in)), INTENT(IN) :: bounds
620 : LOGICAL, INTENT(IN), OPTIONAL :: move_data
621 :
622 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_crop'
623 :
624 139604 : INTEGER, DIMENSION(2, ndims_tensor(tensor_in)) :: blk_bounds
625 : TYPE(dbt_iterator_type) :: iter
626 139604 : INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset
627 : LOGICAL :: found, move_data_prv
628 : INTEGER :: handle, idim, iblk_out
629 139604 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: blk_ind_out
630 : #:for ndim in ndims
631 139604 : REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d, block_put_${ndim}$d
632 : #:endfor
633 :
634 139604 : CALL timeset(routineN, handle)
635 :
636 139604 : IF (PRESENT(move_data)) THEN
637 139604 : move_data_prv = move_data
638 : ELSE
639 : move_data_prv = .FALSE.
640 : END IF
641 :
642 139604 : CALL dbt_create(tensor_in, tensor_out)
643 :
644 : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor_in,tensor_out,bounds) &
645 : !$OMP PRIVATE(iter,blk_ind_out,iblk_out,blk_index,blk_size,blk_offset,found,blk_bounds) &
646 139604 : !$OMP PRIVATE(block_2d,block_put_2d,block_3d,block_put_3d,block_4d,block_put_4d)
647 :
648 : ! reserve blocks inside bounds
649 : CALL dbt_iterator_start(iter, tensor_in)
650 : ALLOCATE (blk_ind_out(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_in)))
651 : blk_ind_out(:, :) = 0
652 : iblk_out = 0
653 : blk_loop: DO WHILE (dbt_iterator_blocks_left(iter))
654 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
655 : DO idim = 1, ndims_tensor(tensor_in)
656 : IF (bounds(1, idim) > blk_offset(idim) - 1 + blk_size(idim)) CYCLE blk_loop
657 : IF (bounds(2, idim) < blk_offset(idim)) CYCLE blk_loop
658 : END DO
659 : iblk_out = iblk_out + 1
660 : blk_ind_out(iblk_out, :) = blk_index
661 : END DO blk_loop
662 : CALL dbt_iterator_stop(iter)
663 :
664 : CALL dbt_reserve_blocks(tensor_out, blk_ind_out(1:iblk_out, :))
665 : DEALLOCATE (blk_ind_out)
666 :
667 : ! copy blocks
668 : CALL dbt_iterator_start(iter, tensor_out)
669 : iter_loop: DO WHILE (dbt_iterator_blocks_left(iter))
670 : CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
671 :
672 : DO idim = 1, ndims_tensor(tensor_in)
673 : blk_bounds(1, idim) = MAX(bounds(1, idim) - blk_offset(idim) + 1, 1)
674 : blk_bounds(2, idim) = MIN(bounds(2, idim) - blk_offset(idim) + 1, blk_size(idim))
675 : END DO
676 :
677 : #:for ndim in ndims
678 : IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
679 : CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
680 :
681 : ALLOCATE (block_put_${ndim}$d(${", ".join(["blk_size("+str(idim)+")" for idim in range(1, ndim+1)])}$))
682 : block_put_${ndim}$d = 0.0_dp
683 : block_put_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
684 : block_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$)
685 : CALL dbt_put_block(tensor_out, blk_index, blk_size, block_put_${ndim}$d)
686 : DEALLOCATE (block_${ndim}$d)
687 : DEALLOCATE (block_put_${ndim}$d)
688 : END IF
689 : #:endfor
690 : END DO iter_loop
691 : CALL dbt_iterator_stop(iter)
692 : !$OMP END PARALLEL
693 139604 : CALL dbt_finalize(tensor_out)
694 :
695 139604 : IF (move_data_prv) CALL dbt_clear(tensor_in)
696 :
697 : ! transfer data for batched contraction
698 139604 : CALL dbt_copy_contraction_storage(tensor_in, tensor_out)
699 :
700 139604 : CALL timestop(handle)
701 279208 : END SUBROUTINE
702 :
703 143080 : END MODULE
|