Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: BSD-3-Clause !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : MODULE dbm_tests
9 : USE OMP_LIB, ONLY: omp_get_wtime
10 : USE dbm_api, ONLY: &
11 : dbm_checksum, dbm_copy, dbm_create, dbm_create_from_template, dbm_distribution_new, &
12 : dbm_distribution_obj, dbm_distribution_release, dbm_get_col_block_sizes, &
13 : dbm_get_row_block_sizes, dbm_get_stored_coordinates, dbm_multiply, dbm_put_block, &
14 : dbm_release, dbm_type
15 : USE kinds, ONLY: dp,&
16 : int_8
17 : USE machine, ONLY: m_flush
18 : USE message_passing, ONLY: mp_cart_type,&
19 : mp_comm_type,&
20 : mp_dims_create
21 : #include "../base/base_uses.f90"
22 :
23 : IMPLICIT NONE
24 :
25 : PRIVATE
26 :
27 : PUBLIC :: dbm_run_tests, generate_larnv_seed
28 :
29 : INTEGER, PRIVATE, SAVE :: randmat_counter = 0
30 :
31 : CONTAINS
32 :
33 : ! **************************************************************************************************
34 : !> \brief Tests the DBM library.
35 : !> \param mp_group MPI communicator
36 : !> \param io_unit Unit to write to, if not negative
37 : !> \param matrix_sizes Size of matrices to test
38 : !> \param trs Transposes of the two matrices
39 : !> \param bs_m Block sizes of the 1. dimension
40 : !> \param bs_n Block sizes of the 2. dimension
41 : !> \param bs_k Block sizes of the 3. dimension
42 : !> \param sparsities Sparsities of matrices to create
43 : !> \param alpha Alpha value to use in multiply
44 : !> \param beta Beta value to use in multiply
45 : !> \param n_loops Number of repetition for each multiplication
46 : !> \param eps Epsilon value for filtering
47 : !> \param retain_sparsity Retain the result matrix's sparsity
48 : !> \param always_checksum Checksum after each multiplication
49 : !> \author Ole Schuett
50 : ! **************************************************************************************************
51 14 : SUBROUTINE dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, &
52 : bs_m, bs_n, bs_k, sparsities, alpha, beta, &
53 : n_loops, eps, retain_sparsity, always_checksum)
54 :
55 : CLASS(mp_comm_type), INTENT(IN) :: mp_group
56 : INTEGER, INTENT(IN) :: io_unit
57 : INTEGER, DIMENSION(:), INTENT(in) :: matrix_sizes
58 : LOGICAL, DIMENSION(2), INTENT(in) :: trs
59 : INTEGER, DIMENSION(:), POINTER :: bs_m, bs_n, bs_k
60 : REAL(kind=dp), DIMENSION(3), INTENT(in) :: sparsities
61 : REAL(kind=dp), INTENT(in) :: alpha, beta
62 : INTEGER, INTENT(IN) :: n_loops
63 : REAL(kind=dp), INTENT(in) :: eps
64 : LOGICAL, INTENT(in) :: retain_sparsity, always_checksum
65 :
66 : CHARACTER(len=*), PARAMETER :: routineN = 'dbm_run_tests'
67 :
68 : INTEGER :: bmax, bmin, handle
69 14 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: col_dist_a, col_dist_b, col_dist_c, &
70 14 : row_dist_a, row_dist_b, row_dist_c, &
71 14 : sizes_k, sizes_m, sizes_n
72 : INTEGER, DIMENSION(2) :: npdims
73 : TYPE(dbm_distribution_obj) :: dist_a, dist_b, dist_c
74 : TYPE(dbm_type), TARGET :: matrix_a, matrix_b, matrix_c
75 14 : TYPE(mp_cart_type) :: cart_group
76 :
77 14 : CALL timeset(routineN, handle)
78 :
79 : ! Create MPI processor grid.
80 14 : npdims(:) = 0
81 14 : CALL mp_dims_create(mp_group%num_pe, npdims)
82 14 : CALL cart_group%create(mp_group, 2, npdims)
83 42 : npdims = cart_group%num_pe_cart
84 :
85 : ! Initialize random number generator.
86 14 : randmat_counter = 12341313
87 :
88 : ! Create the row/column block sizes.
89 14 : NULLIFY (sizes_k, sizes_m, sizes_n)
90 14 : IF (ASSOCIATED(bs_m)) THEN
91 50 : bmin = MINVAL(bs_m(2::2))
92 50 : bmax = MAXVAL(bs_m(2::2))
93 14 : CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), bs_m)
94 : ELSE
95 0 : CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), (/1, 13, 2, 5/))
96 0 : bmin = 5; bmax = 13
97 : END IF
98 14 : IF (ASSOCIATED(bs_n)) THEN
99 50 : bmin = MIN(bmin, MINVAL(bs_n(2::2)))
100 50 : bmax = MAX(bmax, MAXVAL(bs_n(2::2)))
101 14 : CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), bs_n)
102 : ELSE
103 0 : CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), (/1, 13, 2, 5/))
104 0 : bmin = MIN(bmin, 5); bmax = MAX(bmax, 13)
105 : END IF
106 14 : IF (ASSOCIATED(bs_k)) THEN
107 50 : bmin = MIN(bmin, MINVAL(bs_k(2::2)))
108 50 : bmax = MAX(bmax, MAXVAL(bs_k(2::2)))
109 14 : CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), bs_k)
110 : ELSE
111 0 : CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), (/1, 13, 2, 5/))
112 0 : bmin = MIN(bmin, 5); bmax = MAX(bmax, 13)
113 : END IF
114 :
115 : ! Create Matrix C
116 14 : CALL generate_1d_dist(row_dist_c, SIZE(sizes_m), npdims(1), sizes_m)
117 14 : CALL generate_1d_dist(col_dist_c, SIZE(sizes_n), npdims(2), sizes_n)
118 14 : CALL dbm_distribution_new(dist_c, cart_group, row_dist_c, col_dist_c)
119 14 : CALL dbm_create(matrix_c, "Matrix C", dist_c, sizes_m, sizes_n)
120 14 : CALL fill_matrix(matrix_c, sparsity=sparsities(3), group=cart_group)
121 14 : CALL dbm_distribution_release(dist_c)
122 :
123 : ! Create Matrix A
124 14 : IF (trs(1)) THEN
125 0 : CALL generate_1d_dist(row_dist_a, SIZE(sizes_k), npdims(1), sizes_k)
126 0 : CALL generate_1d_dist(col_dist_a, SIZE(sizes_m), npdims(2), sizes_m)
127 0 : CALL dbm_distribution_new(dist_a, cart_group, row_dist_a, col_dist_a)
128 0 : CALL dbm_create(matrix_a, "Matrix A", dist_a, sizes_k, sizes_m)
129 0 : CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
130 0 : DEALLOCATE (row_dist_a, col_dist_a)
131 : ELSE
132 14 : CALL generate_1d_dist(col_dist_a, SIZE(sizes_k), npdims(2), sizes_k)
133 14 : CALL dbm_distribution_new(dist_a, cart_group, row_dist_c, col_dist_a)
134 14 : CALL dbm_create(matrix_a, "Matrix A", dist_a, sizes_m, sizes_k)
135 14 : CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
136 14 : DEALLOCATE (col_dist_a)
137 : END IF
138 14 : CALL dbm_distribution_release(dist_a)
139 :
140 : ! Create Matrix B
141 14 : IF (trs(2)) THEN
142 14 : CALL generate_1d_dist(row_dist_b, SIZE(sizes_n), npdims(1), sizes_n)
143 14 : CALL generate_1d_dist(col_dist_b, SIZE(sizes_k), npdims(2), sizes_k)
144 14 : CALL dbm_distribution_new(dist_b, cart_group, row_dist_b, col_dist_b)
145 14 : CALL dbm_create(matrix_b, "Matrix B", dist_b, sizes_n, sizes_k)
146 14 : CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
147 14 : DEALLOCATE (row_dist_b, col_dist_b)
148 : ELSE
149 0 : CALL generate_1d_dist(row_dist_b, SIZE(sizes_k), npdims(1), sizes_k)
150 0 : CALL dbm_distribution_new(dist_b, cart_group, row_dist_b, col_dist_c)
151 0 : CALL dbm_create(matrix_b, "Matrix B", dist_b, sizes_k, sizes_n)
152 0 : CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
153 0 : DEALLOCATE (row_dist_b)
154 : END IF
155 14 : CALL dbm_distribution_release(dist_b)
156 14 : DEALLOCATE (row_dist_c, col_dist_c, sizes_m, sizes_n, sizes_k)
157 :
158 : ! Prepare test parameters
159 14 : IF (io_unit > 0) THEN
160 : WRITE (io_unit, '(A,3(1X,I6),1X,A,2(1X,I5),1X,A,2(1X,L1))') &
161 7 : "Testing with sizes", matrix_sizes(1:3), &
162 14 : "min/max block sizes", bmin, bmax, "transposed?", trs(1:2)
163 : END IF
164 :
165 : CALL run_multiply_test(matrix_a, matrix_b, matrix_c, &
166 : transa=trs(1), transb=trs(2), &
167 : alpha=alpha, beta=beta, &
168 : n_loops=n_loops, &
169 : eps=eps, &
170 : group=cart_group, &
171 : io_unit=io_unit, &
172 : always_checksum=always_checksum, &
173 14 : retain_sparsity=retain_sparsity)
174 :
175 14 : CALL dbm_release(matrix_a)
176 14 : CALL dbm_release(matrix_b)
177 14 : CALL dbm_release(matrix_c)
178 14 : CALL cart_group%free()
179 :
180 14 : CALL timestop(handle)
181 28 : END SUBROUTINE dbm_run_tests
182 :
183 : ! **************************************************************************************************
184 : !> \brief Runs the multiplication test.
185 : !> \param matrix_a ...
186 : !> \param matrix_b ...
187 : !> \param matrix_c ...
188 : !> \param transa ...
189 : !> \param transb ...
190 : !> \param alpha ...
191 : !> \param beta ...
192 : !> \param retain_sparsity ...
193 : !> \param n_loops ...
194 : !> \param eps ...
195 : !> \param group ...
196 : !> \param io_unit ...
197 : !> \param always_checksum ...
198 : !> \author Ole Schuett
199 : ! **************************************************************************************************
200 14 : SUBROUTINE run_multiply_test(matrix_a, matrix_b, matrix_c, transa, transb, alpha, beta, &
201 : retain_sparsity, n_loops, eps, group, io_unit, always_checksum)
202 : TYPE(dbm_type), INTENT(in) :: matrix_a, matrix_b
203 : TYPE(dbm_type), INTENT(inout) :: matrix_c
204 : LOGICAL, INTENT(in) :: transa, transb
205 : REAL(kind=dp), INTENT(in) :: alpha, beta
206 : LOGICAL, INTENT(in) :: retain_sparsity
207 : INTEGER, INTENT(IN) :: n_loops
208 : REAL(kind=dp), INTENT(in) :: eps
209 :
210 : CLASS(mp_comm_type), INTENT(IN) :: group
211 : INTEGER, INTENT(IN) :: io_unit
212 : LOGICAL, INTENT(in) :: always_checksum
213 :
214 : CHARACTER(len=*), PARAMETER :: routineN = 'run_multiply_test'
215 :
216 : INTEGER :: handle, loop_iter
217 : INTEGER(kind=int_8) :: flop
218 : REAL(kind=dp) :: cs, duration, flops_all, time_start
219 : TYPE(dbm_type) :: matrix_c_orig
220 :
221 14 : CALL timeset(routineN, handle)
222 :
223 14 : CALL dbm_create_from_template(matrix_c_orig, "Original Matrix C", matrix_c)
224 14 : CALL dbm_copy(matrix_c_orig, matrix_c)
225 :
226 : ASSOCIATE (numnodes => group%num_pe)
227 44 : DO loop_iter = 1, n_loops
228 30 : CALL group%sync()
229 30 : time_start = omp_get_wtime()
230 30 : IF (eps < -0.0_dp) THEN
231 : CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
232 26 : retain_sparsity=retain_sparsity, flop=flop)
233 : ELSE
234 : CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
235 4 : retain_sparsity=retain_sparsity, flop=flop, filter_eps=eps)
236 : END IF
237 30 : duration = omp_get_wtime() - time_start
238 :
239 30 : CALL group%max(duration)
240 30 : CALL group%sum(flop)
241 30 : duration = MAX(duration, EPSILON(duration)) ! avoid division by zero
242 30 : flops_all = REAL(flop, KIND=dp)/duration/numnodes/(1024*1024)
243 30 : IF (io_unit .GT. 0) THEN
244 : WRITE (io_unit, '(A,I5,A,I5,A,F12.3,A,I9,A)') &
245 15 : " loop ", loop_iter, " with ", numnodes, " MPI ranks: using ", &
246 30 : duration, "s ", INT(flops_all), " Mflops/rank"
247 15 : CALL m_flush(io_unit)
248 : END IF
249 :
250 30 : IF (loop_iter .EQ. n_loops .OR. always_checksum) THEN
251 14 : cs = dbm_checksum(matrix_c)
252 14 : IF (io_unit > 0) THEN
253 7 : WRITE (io_unit, *) "Final checksums", cs
254 : END IF
255 : END IF
256 :
257 44 : CALL dbm_copy(matrix_c, matrix_c_orig)
258 : END DO
259 : END ASSOCIATE
260 :
261 14 : CALL dbm_release(matrix_c_orig)
262 14 : CALL timestop(handle)
263 14 : END SUBROUTINE run_multiply_test
264 :
265 : ! **************************************************************************************************
266 : !> \brief Fills give matrix with random blocks.
267 : !> \param matrix ...
268 : !> \param sparsity ...
269 : !> \param group ...
270 : ! **************************************************************************************************
271 42 : SUBROUTINE fill_matrix(matrix, sparsity, group)
272 : TYPE(dbm_type), INTENT(inout) :: matrix
273 : REAL(kind=dp), INTENT(in) :: sparsity
274 :
275 : CLASS(mp_comm_type), INTENT(IN) :: group
276 :
277 : CHARACTER(len=*), PARAMETER :: routineN = 'fill_matrix'
278 :
279 : INTEGER :: block_node, col, handle, ncol, &
280 : nrow, row
281 : INTEGER(KIND=int_8) :: counter, ele, increment, nmax
282 : INTEGER, DIMENSION(4) :: iseed, jseed
283 42 : INTEGER, DIMENSION(:), POINTER :: col_block_sizes, row_block_sizes
284 : REAL(kind=dp) :: my_sparsity
285 42 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: block
286 : REAL(kind=dp), DIMENSION(1) :: value
287 :
288 42 : CALL timeset(routineN, handle)
289 :
290 : ! Check that the counter was initialised (or has not overflowed)
291 42 : CPASSERT(randmat_counter .NE. 0)
292 : ! the counter goes into the seed. Every new call gives a new random matrix
293 42 : randmat_counter = randmat_counter + 1
294 :
295 42 : IF (sparsity .GT. 1) THEN
296 0 : my_sparsity = sparsity/100.0
297 : ELSE
298 : my_sparsity = sparsity
299 : END IF
300 :
301 42 : row_block_sizes => dbm_get_row_block_sizes(matrix)
302 42 : col_block_sizes => dbm_get_col_block_sizes(matrix)
303 42 : nrow = SIZE(row_block_sizes)
304 42 : ncol = SIZE(col_block_sizes)
305 42 : nmax = INT(nrow, KIND=int_8)*INT(ncol, KIND=int_8)
306 42 : ele = -1
307 42 : counter = 0
308 42 : jseed = generate_larnv_seed(7, 42, 3, 42, randmat_counter)
309 :
310 : ASSOCIATE (mynode => group%mepos)
311 42 : DO
312 : ! find the next block to add, this is given by a geometrically distributed variable
313 : ! we number the blocks of the matrix and jump to the next one
314 656652 : CALL dlarnv(1, jseed, 1, value)
315 656652 : IF (my_sparsity > 0) THEN
316 656652 : increment = 1 + FLOOR(LOG(value(1))/LOG(my_sparsity), KIND=int_8)
317 : ELSE
318 : increment = 1
319 : END IF
320 656652 : ele = ele + increment
321 656652 : IF (ele >= nmax) EXIT
322 656610 : counter = counter + 1
323 656610 : row = INT(ele/ncol) + 1
324 656610 : col = INT(MOD(ele, INT(ncol, KIND=KIND(ele)))) + 1
325 :
326 : ! Only deal with the local blocks.
327 656610 : CALL dbm_get_stored_coordinates(matrix, row, col, block_node)
328 656652 : IF (block_node == mynode) THEN
329 : ! fill based on a block based seed, makes this the same values in parallel
330 328305 : iseed = generate_larnv_seed(row, nrow, col, ncol, randmat_counter)
331 1310916 : ALLOCATE (block(row_block_sizes(row), col_block_sizes(col)))
332 984915 : CALL dlarnv(1, iseed, SIZE(block), block)
333 328305 : CALL dbm_put_block(matrix, row, col, block)
334 328305 : DEALLOCATE (block)
335 : END IF
336 : END DO
337 : END ASSOCIATE
338 :
339 42 : CALL timestop(handle)
340 84 : END SUBROUTINE fill_matrix
341 :
342 : ! **************************************************************************************************
343 : !> \brief Assigns given elements to bins. Uses given element_sizes for load balancing.
344 : !> \param bin_dist Distribution of elements to bins
345 : !> \param nelements Number of elements
346 : !> \param nbins Number of bins
347 : !> \param element_sizes sizes of elements
348 : !> \author Ole Schuett
349 : ! **************************************************************************************************
350 70 : SUBROUTINE generate_1d_dist(bin_dist, nelements, nbins, element_sizes)
351 : INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: bin_dist
352 : INTEGER, INTENT(IN) :: nelements, nbins
353 : INTEGER, DIMENSION(:), INTENT(IN) :: element_sizes
354 :
355 : INTEGER :: bin, i
356 140 : INTEGER, DIMENSION(nbins) :: bin_counts
357 :
358 70 : CPASSERT(SIZE(element_sizes) == nelements)
359 210 : ALLOCATE (bin_dist(nelements))
360 :
361 336 : bin_counts(:) = [(0, bin=0, nbins - 1)]
362 1002560 : DO i = 1, nelements
363 3408466 : bin = MINLOC(bin_counts, dim=1) ! greedy algorithm
364 1002490 : bin_dist(i) = bin - 1
365 1002560 : bin_counts(bin) = bin_counts(bin) + element_sizes(i)
366 : END DO
367 70 : END SUBROUTINE generate_1d_dist
368 :
369 : ! **************************************************************************************************
370 : !> \brief Generates a block_sizes by "randomly" selecting from size_mix.
371 : !> \param block_sizes ...
372 : !> \param size_sum ...
373 : !> \param size_mix ...
374 : !> \author Ole Schuett
375 : ! **************************************************************************************************
376 42 : SUBROUTINE generate_mixed_block_sizes(block_sizes, size_sum, size_mix)
377 : INTEGER, DIMENSION(:), INTENT(inout), POINTER :: block_sizes
378 : INTEGER, INTENT(in) :: size_sum
379 : INTEGER, DIMENSION(:), INTENT(in) :: size_mix
380 :
381 : INTEGER :: block_size, current_sum, ipass, nblocks, &
382 : nsize_mix, selector
383 42 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: mixer
384 :
385 42 : CPASSERT(.NOT. ASSOCIATED(block_sizes))
386 42 : nsize_mix = SIZE(size_mix)/2
387 126 : ALLOCATE (mixer(3, nsize_mix))
388 :
389 : ! 1st pass to compute nblocks and allocate block_sizes, 2nd pass to fill block_sizes.
390 126 : DO ipass = 1, 2
391 300 : mixer(1, :) = size_mix(1:nsize_mix*2 - 1:2)
392 300 : mixer(2, :) = size_mix(2:nsize_mix*2:2)
393 300 : mixer(3, :) = 1
394 : selector = 1
395 : nblocks = 0
396 : current_sum = 0
397 1203072 : DO WHILE (current_sum < size_sum)
398 1202988 : nblocks = nblocks + 1
399 1202988 : block_size = MIN(mixer(2, selector), size_sum - current_sum)
400 1202988 : IF (ipass == 2) THEN
401 601494 : block_sizes(nblocks) = block_size
402 : END IF
403 1202988 : current_sum = current_sum + block_size
404 1202988 : mixer(3, selector) = mixer(3, selector) + 1
405 1203072 : IF (mixer(3, selector) > mixer(1, selector)) THEN
406 1202988 : mixer(3, selector) = 1
407 1202988 : selector = MOD(selector, nsize_mix) + 1
408 : END IF
409 : END DO
410 126 : IF (ipass == 1) THEN
411 126 : ALLOCATE (block_sizes(nblocks))
412 : END IF
413 : END DO
414 :
415 601536 : current_sum = SUM(block_sizes)
416 42 : CPASSERT(current_sum == size_sum)
417 42 : END SUBROUTINE generate_mixed_block_sizes
418 :
419 : ! **************************************************************************************************
420 : !> \brief Generate a seed respecting the lapack constraints,
421 : !> - values between 0..4095 (2**12-1)
422 : !> - iseed(4) odd
423 : !> also try to avoid iseed that are zero.
424 : !> Have but with a unique 2D mapping (irow,icol), and a 'mini-seed' ival
425 : !>
426 : !> \param irow 1..nrow
427 : !> \param nrow ...
428 : !> \param icol 1..ncol
429 : !> \param ncol ...
430 : !> \param ival mini-seed
431 : !> \return a lapack compatible seed
432 : !> \author Patrick Seewald
433 : ! **************************************************************************************************
434 329664 : FUNCTION generate_larnv_seed(irow, nrow, icol, ncol, ival) RESULT(iseed)
435 :
436 : INTEGER, INTENT(IN) :: irow, nrow, icol, ncol, ival
437 : INTEGER :: iseed(4)
438 :
439 : INTEGER(KIND=int_8) :: map
440 :
441 329664 : map = ((irow - 1 + icol*INT(nrow, int_8))*(1 + MODULO(ival, 2**16)))*2 + 1 + 0*ncol ! ncol used
442 329664 : iseed(4) = INT(MODULO(map, 2_int_8**12)); map = map/2_int_8**12; ! keep odd
443 329664 : iseed(3) = INT(MODULO(IEOR(map, 3541_int_8), 2_int_8**12)); map = map/2_int_8**12
444 329664 : iseed(2) = INT(MODULO(IEOR(map, 1153_int_8), 2_int_8**12)); map = map/2_int_8**12
445 329664 : iseed(1) = INT(MODULO(IEOR(map, 2029_int_8), 2_int_8**12)); map = map/2_int_8**12
446 329664 : END FUNCTION generate_larnv_seed
447 :
448 : END MODULE dbm_tests
|