Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
10 : !> \par History
11 : !> 30-Nov-2000 (JGH) added input
12 : !> 02-Jan-2001 (JGH) Parallel FFT
13 : !> 28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
14 : !> 06-Jun-2003 (JGH) Real space grid test
15 : !> Eigensolver test (29.08.05,MK)
16 : !> \author JGH 6-NOV-2000
17 : ! **************************************************************************************************
18 : MODULE library_tests
19 :
20 : USE ai_coulomb_test, ONLY: eri_test
21 : USE cell_methods, ONLY: cell_create,&
22 : init_cell
23 : USE cell_types, ONLY: cell_release,&
24 : cell_type
25 : USE cg_test, ONLY: clebsch_gordon_test
26 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
27 : cp_blacs_env_release,&
28 : cp_blacs_env_type
29 : USE cp_dbcsr_api, ONLY: dbcsr_reset_randmat_seed,&
30 : dbcsr_run_tests
31 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_perf_acc_test
32 : USE cp_files, ONLY: close_file,&
33 : open_file
34 : USE cp_fm_basic_linalg, ONLY: cp_fm_gemm
35 : USE cp_fm_diag, ONLY: cp_fm_syevd,&
36 : cp_fm_syevx
37 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
38 : cp_fm_struct_get,&
39 : cp_fm_struct_release,&
40 : cp_fm_struct_type
41 : USE cp_fm_types, ONLY: cp_fm_create,&
42 : cp_fm_pilaenv,&
43 : cp_fm_release,&
44 : cp_fm_set_all,&
45 : cp_fm_set_submatrix,&
46 : cp_fm_to_fm,&
47 : cp_fm_type
48 : USE cp_log_handling, ONLY: cp_get_default_logger,&
49 : cp_logger_type
50 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
51 : cp_print_key_unit_nr
52 : USE cp_realspace_grid_init, ONLY: init_input_type
53 : USE dbm_tests, ONLY: dbm_run_tests
54 : USE fft_tools, ONLY: BWFFT,&
55 : FFT_RADIX_CLOSEST,&
56 : FWFFT,&
57 : fft3d,&
58 : fft_radix_operations,&
59 : finalize_fft,&
60 : init_fft
61 : USE global_types, ONLY: global_environment_type
62 : USE input_constants, ONLY: do_diag_syevd,&
63 : do_diag_syevx,&
64 : do_mat_random,&
65 : do_mat_read,&
66 : do_pwgrid_ns_fullspace,&
67 : do_pwgrid_ns_halfspace,&
68 : do_pwgrid_spherical
69 : USE input_section_types, ONLY: section_vals_get,&
70 : section_vals_get_subs_vals,&
71 : section_vals_type,&
72 : section_vals_val_get
73 : USE kinds, ONLY: dp
74 : USE machine, ONLY: m_flush,&
75 : m_walltime
76 : USE message_passing, ONLY: mp_para_env_type
77 : USE minimax_exp, ONLY: validate_exp_minimax
78 : USE mp2_grids, ONLY: test_least_square_ft
79 : USE mp_perf_test, ONLY: mpi_perf_test
80 : USE parallel_gemm_api, ONLY: parallel_gemm
81 : USE parallel_rng_types, ONLY: UNIFORM,&
82 : rng_stream_type
83 : USE pw_grid_types, ONLY: FULLSPACE,&
84 : HALFSPACE,&
85 : pw_grid_type
86 : USE pw_grids, ONLY: pw_grid_create,&
87 : pw_grid_release
88 : USE pw_methods, ONLY: pw_transfer,&
89 : pw_zero
90 : USE pw_types, ONLY: pw_c1d_gs_type,&
91 : pw_c3d_rs_type,&
92 : pw_r3d_rs_type
93 : USE realspace_grid_types, ONLY: &
94 : realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
95 : rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
96 : rs_grid_zero, transfer_pw2rs, transfer_rs2pw
97 : USE shg_integrals_test, ONLY: shg_integrals_perf_acc_test
98 : #include "./base/base_uses.f90"
99 :
100 : IMPLICIT NONE
101 :
102 : PRIVATE
103 : PUBLIC :: lib_test
104 :
105 : INTEGER :: runtest(100)
106 : REAL(KIND=dp) :: max_memory
107 : REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
108 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
109 :
110 : CONTAINS
111 :
112 : ! **************************************************************************************************
113 : !> \brief Master routine for tests
114 : !> \param root_section ...
115 : !> \param para_env ...
116 : !> \param globenv ...
117 : !> \par History
118 : !> none
119 : !> \author JGH 6-NOV-2000
120 : ! **************************************************************************************************
121 720 : SUBROUTINE lib_test(root_section, para_env, globenv)
122 :
123 : TYPE(section_vals_type), POINTER :: root_section
124 : TYPE(mp_para_env_type), POINTER :: para_env
125 : TYPE(global_environment_type), POINTER :: globenv
126 :
127 : CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test'
128 :
129 : INTEGER :: handle, iw
130 : LOGICAL :: explicit
131 : TYPE(cp_logger_type), POINTER :: logger
132 : TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
133 : dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
134 : rs_pw_transfer_section, shg_integrals_test_section
135 :
136 80 : CALL timeset(routineN, handle)
137 :
138 80 : logger => cp_get_default_logger()
139 80 : iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
140 :
141 80 : IF (iw > 0) THEN
142 40 : WRITE (iw, '(T2,79("*"))')
143 40 : WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
144 40 : WRITE (iw, '(T2,79("*"))')
145 : END IF
146 : !
147 80 : CALL test_input(root_section, para_env)
148 : !
149 80 : IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
150 : !
151 80 : IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
152 80 : IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
153 : !
154 80 : IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
155 2 : globenv%fftw_wisdom_file_name)
156 : !
157 80 : IF (runtest(4) /= 0) CALL eri_test(iw)
158 : !
159 80 : IF (runtest(6) /= 0) CALL clebsch_gordon_test()
160 : !
161 : ! runtest 7 has been deleted and can be recycled
162 : !
163 80 : IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
164 : !
165 80 : IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
166 : !
167 80 : IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
168 : !
169 :
170 80 : rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
171 80 : CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
172 80 : IF (explicit) THEN
173 2 : CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
174 : END IF
175 :
176 80 : pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
177 80 : CALL section_vals_get(pw_transfer_section, explicit=explicit)
178 80 : IF (explicit) THEN
179 10 : CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
180 : END IF
181 :
182 80 : cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
183 80 : CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
184 80 : IF (explicit) THEN
185 4 : CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
186 : END IF
187 :
188 80 : eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
189 80 : CALL section_vals_get(eigensolver_section, explicit=explicit)
190 80 : IF (explicit) THEN
191 2 : CALL eigensolver_test(para_env, iw, eigensolver_section)
192 : END IF
193 :
194 80 : eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
195 80 : CALL section_vals_get(eri_mme_test_section, explicit=explicit)
196 80 : IF (explicit) THEN
197 8 : CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
198 : END IF
199 :
200 80 : shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
201 80 : CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
202 80 : IF (explicit) THEN
203 4 : CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
204 : END IF
205 :
206 : ! DBCSR tests
207 80 : cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
208 80 : CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
209 80 : IF (explicit) THEN
210 32 : CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
211 : END IF
212 :
213 : ! DBM tests
214 80 : dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
215 80 : CALL section_vals_get(dbm_test_section, explicit=explicit)
216 80 : IF (explicit) THEN
217 14 : CALL run_dbm_tests(para_env, iw, dbm_test_section)
218 : END IF
219 :
220 80 : CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
221 :
222 80 : CALL timestop(handle)
223 :
224 80 : END SUBROUTINE lib_test
225 :
226 : ! **************************************************************************************************
227 : !> \brief Reads input section &TEST ... &END
228 : !> \param root_section ...
229 : !> \param para_env ...
230 : !> \author JGH 30-NOV-2000
231 : !> \note
232 : !> I---------------------------------------------------------------------------I
233 : !> I SECTION: &TEST ... &END I
234 : !> I I
235 : !> I MEMORY max_memory I
236 : !> I COPY n I
237 : !> I MATMUL n I
238 : !> I FFT n I
239 : !> I ERI n I
240 : !> I PW_FFT n I
241 : !> I Clebsch-Gordon n I
242 : !> I RS_GRIDS n I
243 : !> I MPI n I
244 : !> I RNG n -> Parallel random number generator I
245 : !> I---------------------------------------------------------------------------I
246 : ! **************************************************************************************************
247 160 : SUBROUTINE test_input(root_section, para_env)
248 : TYPE(section_vals_type), POINTER :: root_section
249 : TYPE(mp_para_env_type), POINTER :: para_env
250 :
251 : TYPE(section_vals_type), POINTER :: test_section
252 :
253 : !
254 : !..defaults
255 : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
256 :
257 80 : runtest = 0
258 80 : test_section => section_vals_get_subs_vals(root_section, "TEST")
259 80 : CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
260 80 : CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
261 80 : CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
262 80 : CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
263 80 : CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
264 80 : CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
265 80 : CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
266 80 : CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
267 80 : CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
268 80 : CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
269 :
270 80 : CALL para_env%sync()
271 80 : END SUBROUTINE test_input
272 :
273 : ! **************************************************************************************************
274 : !> \brief Tests the performance to copy two vectors.
275 : !> \param para_env ...
276 : !> \param iw ...
277 : !> \par History
278 : !> none
279 : !> \author JGH 6-NOV-2000
280 : !> \note
281 : !> The results of these tests allow to determine the size of the cache
282 : !> of the CPU. This can be used to optimize the performance of the
283 : !> FFTSG library.
284 : ! **************************************************************************************************
285 2 : SUBROUTINE copy_test(para_env, iw)
286 : TYPE(mp_para_env_type), POINTER :: para_env
287 : INTEGER :: iw
288 :
289 : INTEGER :: i, ierr, j, len, ntim, siz
290 : REAL(KIND=dp) :: perf, t, tend, tstart
291 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb
292 :
293 : ! test for copy --> Cache size
294 :
295 2 : siz = ABS(runtest(1))
296 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
297 34 : DO i = 6, 24
298 34 : len = 2**i
299 34 : IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
300 96 : ALLOCATE (ca(len), STAT=ierr)
301 32 : IF (ierr /= 0) EXIT
302 64 : ALLOCATE (cb(len), STAT=ierr)
303 32 : IF (ierr /= 0) EXIT
304 :
305 32 : CALL RANDOM_NUMBER(ca)
306 32 : ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
307 32 : ntim = MAX(ntim, 1)
308 32 : ntim = MIN(ntim, siz*10000)
309 :
310 32 : tstart = m_walltime()
311 512524 : DO j = 1, ntim
312 315058412 : cb(:) = ca(:)
313 512524 : ca(1) = REAL(j, KIND=dp)
314 : END DO
315 32 : tend = m_walltime()
316 32 : t = tend - tstart + threshold
317 32 : IF (t > 0.0_dp) THEN
318 32 : perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
319 : ELSE
320 0 : perf = 0.0_dp
321 : END IF
322 :
323 32 : IF (para_env%is_source()) THEN
324 16 : WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test: Size = 2^", i, &
325 32 : len/1024, " Kwords", perf, " Mcopy/s"
326 : END IF
327 :
328 32 : DEALLOCATE (ca)
329 34 : DEALLOCATE (cb)
330 : END DO
331 2 : CALL para_env%sync()
332 2 : END SUBROUTINE copy_test
333 :
334 : ! **************************************************************************************************
335 : !> \brief Tests the performance of different kinds of matrix matrix multiply
336 : !> kernels for the BLAS and F95 intrinsic matmul.
337 : !> \param para_env ...
338 : !> \param test_matmul ...
339 : !> \param test_dgemm ...
340 : !> \param iw ...
341 : !> \par History
342 : !> none
343 : !> \author JGH 6-NOV-2000
344 : ! **************************************************************************************************
345 2 : SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
346 : TYPE(mp_para_env_type), POINTER :: para_env
347 : LOGICAL :: test_matmul, test_dgemm
348 : INTEGER :: iw
349 :
350 : INTEGER :: i, ierr, j, len, ntim, siz
351 : REAL(KIND=dp) :: perf, t, tend, tstart, xdum
352 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ma, mb, mc
353 :
354 : ! test for matrix multpies
355 :
356 2 : IF (test_matmul) THEN
357 2 : siz = ABS(runtest(2))
358 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
359 2 : DO i = 5, siz, 2
360 4 : len = 2**i + 1
361 4 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
362 16 : ALLOCATE (ma(len, len), STAT=ierr)
363 4 : IF (ierr /= 0) EXIT
364 12 : ALLOCATE (mb(len, len), STAT=ierr)
365 4 : IF (ierr /= 0) EXIT
366 12 : ALLOCATE (mc(len, len), STAT=ierr)
367 4 : IF (ierr /= 0) EXIT
368 35788 : mc = 0.0_dp
369 :
370 4 : CALL RANDOM_NUMBER(xdum)
371 35788 : ma = xdum
372 4 : CALL RANDOM_NUMBER(xdum)
373 35788 : mb = xdum
374 4 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
375 4 : ntim = MAX(ntim, 1)
376 4 : ntim = MIN(ntim, siz*200)
377 4 : tstart = m_walltime()
378 2832 : DO j = 1, ntim
379 2828 : mc(:, :) = MATMUL(ma, mb)
380 2832 : ma(1, 1) = REAL(j, KIND=dp)
381 : END DO
382 4 : tend = m_walltime()
383 4 : t = tend - tstart + threshold
384 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
385 4 : IF (para_env%is_source()) THEN
386 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
387 2 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
388 : END IF
389 4 : tstart = m_walltime()
390 2832 : DO j = 1, ntim
391 3895652 : mc(:, :) = mc + MATMUL(ma, mb)
392 2832 : ma(1, 1) = REAL(j, KIND=dp)
393 : END DO
394 4 : tend = m_walltime()
395 4 : t = tend - tstart + threshold
396 4 : IF (t > 0.0_dp) THEN
397 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
398 : ELSE
399 0 : perf = 0.0_dp
400 : END IF
401 :
402 4 : IF (para_env%is_source()) THEN
403 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
404 2 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
405 : END IF
406 :
407 4 : tstart = m_walltime()
408 2832 : DO j = 1, ntim
409 3895652 : mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
410 2832 : ma(1, 1) = REAL(j, KIND=dp)
411 : END DO
412 4 : tend = m_walltime()
413 4 : t = tend - tstart + threshold
414 4 : IF (t > 0.0_dp) THEN
415 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
416 : ELSE
417 0 : perf = 0.0_dp
418 : END IF
419 :
420 4 : IF (para_env%is_source()) THEN
421 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
422 2 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
423 : END IF
424 :
425 4 : tstart = m_walltime()
426 2832 : DO j = 1, ntim
427 3895652 : mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
428 2832 : ma(1, 1) = REAL(j, KIND=dp)
429 : END DO
430 4 : tend = m_walltime()
431 4 : t = tend - tstart + threshold
432 4 : IF (t > 0.0_dp) THEN
433 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
434 : ELSE
435 0 : perf = 0.0_dp
436 : END IF
437 :
438 4 : IF (para_env%is_source()) THEN
439 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
440 2 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
441 : END IF
442 :
443 4 : DEALLOCATE (ma)
444 4 : DEALLOCATE (mb)
445 4 : DEALLOCATE (mc)
446 : END DO
447 : END IF
448 :
449 : ! test for matrix multpies
450 2 : IF (test_dgemm) THEN
451 0 : siz = ABS(runtest(5))
452 0 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
453 0 : DO i = 5, siz, 2
454 0 : len = 2**i + 1
455 0 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
456 0 : ALLOCATE (ma(len, len), STAT=ierr)
457 0 : IF (ierr /= 0) EXIT
458 0 : ALLOCATE (mb(len, len), STAT=ierr)
459 0 : IF (ierr /= 0) EXIT
460 0 : ALLOCATE (mc(len, len), STAT=ierr)
461 0 : IF (ierr /= 0) EXIT
462 0 : mc = 0.0_dp
463 :
464 0 : CALL RANDOM_NUMBER(xdum)
465 0 : ma = xdum
466 0 : CALL RANDOM_NUMBER(xdum)
467 0 : mb = xdum
468 0 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
469 0 : ntim = MAX(ntim, 1)
470 0 : ntim = MIN(ntim, 1000)
471 :
472 0 : tstart = m_walltime()
473 0 : DO j = 1, ntim
474 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
475 : END DO
476 0 : tend = m_walltime()
477 0 : t = tend - tstart + threshold
478 0 : IF (t > 0.0_dp) THEN
479 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
480 : ELSE
481 0 : perf = 0.0_dp
482 : END IF
483 :
484 0 : IF (para_env%is_source()) THEN
485 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
486 0 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
487 : END IF
488 :
489 0 : tstart = m_walltime()
490 0 : DO j = 1, ntim
491 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
492 : END DO
493 0 : tend = m_walltime()
494 0 : t = tend - tstart + threshold
495 0 : IF (t > 0.0_dp) THEN
496 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
497 : ELSE
498 0 : perf = 0.0_dp
499 : END IF
500 :
501 0 : IF (para_env%is_source()) THEN
502 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
503 0 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
504 : END IF
505 :
506 0 : tstart = m_walltime()
507 0 : DO j = 1, ntim
508 0 : CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
509 : END DO
510 0 : tend = m_walltime()
511 0 : t = tend - tstart + threshold
512 0 : IF (t > 0.0_dp) THEN
513 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
514 : ELSE
515 0 : perf = 0.0_dp
516 : END IF
517 :
518 0 : IF (para_env%is_source()) THEN
519 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
520 0 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
521 : END IF
522 :
523 0 : tstart = m_walltime()
524 0 : DO j = 1, ntim
525 0 : CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
526 : END DO
527 0 : tend = m_walltime()
528 0 : t = tend - tstart + threshold
529 0 : IF (t > 0.0_dp) THEN
530 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
531 : ELSE
532 0 : perf = 0.0_dp
533 : END IF
534 :
535 0 : IF (para_env%is_source()) THEN
536 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
537 0 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
538 : END IF
539 :
540 0 : DEALLOCATE (ma)
541 0 : DEALLOCATE (mb)
542 0 : DEALLOCATE (mc)
543 : END DO
544 : END IF
545 :
546 2 : CALL para_env%sync()
547 :
548 2 : END SUBROUTINE matmul_test
549 :
550 : ! **************************************************************************************************
551 : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
552 : !> \param para_env ...
553 : !> \param iw ...
554 : !> \param fftw_plan_type ...
555 : !> \param wisdom_file where FFTW3 should look to save/load wisdom
556 : !> \par History
557 : !> none
558 : !> \author JGH 6-NOV-2000
559 : ! **************************************************************************************************
560 2 : SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
561 :
562 : TYPE(mp_para_env_type), POINTER :: para_env
563 : INTEGER :: iw, fftw_plan_type
564 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
565 :
566 : INTEGER, PARAMETER :: ndate(3) = (/12, 48, 96/)
567 :
568 : INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
569 : radix_in, radix_out, siz, stat
570 : COMPLEX(KIND=dp), DIMENSION(4, 4, 4) :: zz
571 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ca, cb, cc
572 : CHARACTER(LEN=7) :: method
573 : REAL(KIND=dp) :: flops, perf, scale, t, tdiff, tend, &
574 : tstart
575 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ra
576 :
577 : ! test for 3d FFT
578 :
579 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
580 2 : siz = ABS(runtest(3))
581 :
582 8 : DO iall = 1, 100
583 : SELECT CASE (iall)
584 : CASE DEFAULT
585 2 : EXIT
586 : CASE (1)
587 : CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
588 2 : pool_limit=10, plan_style=fftw_plan_type)
589 2 : method = "FFTSG "
590 : CASE (2)
591 2 : CYCLE
592 : CASE (3)
593 : CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
594 2 : pool_limit=10, plan_style=fftw_plan_type)
595 10 : method = "FFTW3 "
596 : END SELECT
597 16 : n = 4
598 4 : zz = 0.0_dp
599 4 : CALL fft3d(FWFFT, n, zz, status=stat)
600 4 : IF (stat == 0) THEN
601 16 : DO it = 1, 3
602 12 : radix_in = ndate(it)
603 12 : CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
604 12 : len = radix_out
605 48 : n = len
606 12 : IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
607 60 : ALLOCATE (ra(len, len, len), STAT=ierr)
608 60 : ALLOCATE (ca(len, len, len), STAT=ierr)
609 12 : CALL RANDOM_NUMBER(ra)
610 4035516 : ca(:, :, :) = ra
611 12 : CALL RANDOM_NUMBER(ra)
612 4035516 : ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
613 12 : flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
614 12 : ntim = NINT(siz*1.e7_dp/flops)
615 12 : ntim = MAX(ntim, 1)
616 12 : ntim = MIN(ntim, 200)
617 12 : scale = 1.0_dp/REAL(len**3, KIND=dp)
618 12 : tstart = m_walltime()
619 884 : DO j = 1, ntim
620 872 : CALL fft3d(FWFFT, n, ca)
621 884 : CALL fft3d(BWFFT, n, ca)
622 : END DO
623 12 : tend = m_walltime()
624 12 : t = tend - tstart + threshold
625 12 : IF (t > 0.0_dp) THEN
626 12 : perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
627 : ELSE
628 0 : perf = 0.0_dp
629 : END IF
630 :
631 12 : IF (para_env%is_source()) THEN
632 : WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
633 6 : ADJUSTR(method), " test (in-place) Size = ", len, perf, " Mflop/s"
634 : END IF
635 12 : DEALLOCATE (ca)
636 16 : DEALLOCATE (ra)
637 : END DO
638 4 : IF (para_env%is_source()) WRITE (iw, *)
639 : ! test if input data is preserved
640 4 : len = 24
641 16 : n = len
642 4 : ALLOCATE (ra(len, len, len))
643 4 : ALLOCATE (ca(len, len, len))
644 4 : ALLOCATE (cb(len, len, len))
645 4 : ALLOCATE (cc(len, len, len))
646 4 : CALL RANDOM_NUMBER(ra)
647 57700 : ca(:, :, :) = ra
648 4 : CALL RANDOM_NUMBER(ra)
649 57700 : ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
650 57700 : cc(:, :, :) = ca
651 4 : CALL fft3d(FWFFT, n, ca, cb)
652 57700 : tdiff = MAXVAL(ABS(ca - cc))
653 4 : IF (tdiff > 1.0E-12_dp) THEN
654 0 : IF (para_env%is_source()) &
655 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
656 0 : " Input array is changed in out-of-place FFT !"
657 : ELSE
658 4 : IF (para_env%is_source()) &
659 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
660 4 : " Input array is not changed in out-of-place FFT !"
661 : END IF
662 57700 : ca(:, :, :) = cc
663 4 : CALL fft3d(BWFFT, n, ca, cb)
664 57700 : tdiff = MAXVAL(ABS(ca - cc))
665 4 : IF (tdiff > 1.0E-12_dp) THEN
666 0 : IF (para_env%is_source()) &
667 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
668 0 : " Input array is changed in out-of-place FFT !"
669 : ELSE
670 4 : IF (para_env%is_source()) &
671 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
672 4 : " Input array is not changed in out-of-place FFT !"
673 : END IF
674 4 : IF (para_env%is_source()) WRITE (iw, *)
675 :
676 4 : DEALLOCATE (ra)
677 4 : DEALLOCATE (ca)
678 4 : DEALLOCATE (cb)
679 4 : DEALLOCATE (cc)
680 : END IF
681 4 : CALL finalize_fft(para_env, wisdom_file=wisdom_file)
682 : END DO
683 :
684 2 : END SUBROUTINE fft_test
685 :
686 : ! **************************************************************************************************
687 : !> \brief test rs_pw_transfer performance
688 : !> \param para_env ...
689 : !> \param iw ...
690 : !> \param globenv ...
691 : !> \param rs_pw_transfer_section ...
692 : !> \author Joost VandeVondele
693 : !> 9.2008 Randomise rs grid [Iain Bethune]
694 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
695 : ! **************************************************************************************************
696 2 : SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
697 :
698 : TYPE(mp_para_env_type), POINTER :: para_env
699 : INTEGER :: iw
700 : TYPE(global_environment_type), POINTER :: globenv
701 : TYPE(section_vals_type), POINTER :: rs_pw_transfer_section
702 :
703 : CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
704 :
705 : INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
706 : INTEGER, DIMENSION(3) :: no, np
707 2 : INTEGER, DIMENSION(:), POINTER :: i_vals
708 : LOGICAL :: do_rs2pw
709 : REAL(KIND=dp) :: tend, tstart
710 : TYPE(cell_type), POINTER :: box
711 : TYPE(pw_grid_type), POINTER :: grid
712 : TYPE(pw_r3d_rs_type) :: ca
713 : TYPE(realspace_grid_desc_type), POINTER :: rs_desc
714 : TYPE(realspace_grid_input_type) :: input_settings
715 32 : TYPE(realspace_grid_type) :: rs_grid
716 : TYPE(section_vals_type), POINTER :: rs_grid_section
717 :
718 2 : CALL timeset(routineN, handle)
719 :
720 : !..set fft lib
721 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
722 : pool_limit=globenv%fft_pool_scratch_limit, &
723 : wisdom_file=globenv%fftw_wisdom_file_name, &
724 2 : plan_style=globenv%fftw_plan_type)
725 :
726 : ! .. set cell (should otherwise be irrelevant)
727 2 : NULLIFY (box)
728 2 : CALL cell_create(box)
729 : box%hmat = RESHAPE((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
730 26 : 0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
731 2 : CALL init_cell(box)
732 :
733 : ! .. grid type and pw_grid
734 2 : NULLIFY (grid)
735 2 : CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
736 8 : np = i_vals
737 2 : CALL pw_grid_create(grid, para_env, box%hmat, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
738 8 : no = grid%npts
739 :
740 2 : CALL ca%create(grid)
741 2 : CALL pw_zero(ca)
742 :
743 : ! .. rs input setting type
744 2 : CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
745 2 : rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
746 2 : ns_max = 2*halo_size + 1
747 2 : CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
748 :
749 : ! .. rs type
750 2 : NULLIFY (rs_desc)
751 2 : CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
752 2 : CALL rs_grid_create(rs_grid, rs_desc)
753 2 : CALL rs_grid_print(rs_grid, iw)
754 2 : CALL rs_grid_zero(rs_grid)
755 :
756 : ! Put random values on the grid, so summation check will pick up errors
757 2 : CALL RANDOM_NUMBER(rs_grid%r)
758 :
759 2 : CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
760 2 : CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
761 :
762 : ! go for the real loops, sync to get max timings
763 2 : IF (para_env%is_source()) THEN
764 1 : WRITE (iw, '(T2,A)') ""
765 1 : WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
766 1 : WRITE (iw, '(T2,A)') ""
767 1 : WRITE (iw, '(T2,A)') "iteration time[s]"
768 : END IF
769 8 : DO i_loop = 1, N_loop
770 6 : CALL para_env%sync()
771 6 : tstart = m_walltime()
772 6 : IF (do_rs2pw) THEN
773 6 : CALL transfer_rs2pw(rs_grid, ca)
774 : ELSE
775 0 : CALL transfer_pw2rs(rs_grid, ca)
776 : END IF
777 6 : CALL para_env%sync()
778 6 : tend = m_walltime()
779 8 : IF (para_env%is_source()) THEN
780 3 : WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
781 : END IF
782 : END DO
783 :
784 : !cleanup
785 2 : CALL rs_grid_release(rs_grid)
786 2 : CALL rs_grid_release_descriptor(rs_desc)
787 2 : CALL ca%release()
788 2 : CALL pw_grid_release(grid)
789 2 : CALL cell_release(box)
790 2 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
791 :
792 2 : CALL timestop(handle)
793 :
794 10 : END SUBROUTINE rs_pw_transfer_test
795 :
796 : ! **************************************************************************************************
797 : !> \brief Tests the performance of PW calls to FFT routines
798 : !> \param para_env ...
799 : !> \param iw ...
800 : !> \param globenv ...
801 : !> \param pw_transfer_section ...
802 : !> \par History
803 : !> JGH 6-Feb-2001 : Test and performance code
804 : !> Made input sensitive [Joost VandeVondele]
805 : !> \author JGH 1-JAN-2001
806 : ! **************************************************************************************************
807 10 : SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
808 :
809 : TYPE(mp_para_env_type), POINTER :: para_env
810 : INTEGER :: iw
811 : TYPE(global_environment_type), POINTER :: globenv
812 : TYPE(section_vals_type), POINTER :: pw_transfer_section
813 :
814 : REAL(KIND=dp), PARAMETER :: toler = 1.e-11_dp
815 :
816 : INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
817 : ig, ip, itmp, n_loop, n_rep, nn, p, q
818 10 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: layouts
819 : INTEGER, DIMENSION(2) :: distribution_layout
820 : INTEGER, DIMENSION(3) :: no, np
821 10 : INTEGER, DIMENSION(:), POINTER :: i_vals
822 : LOGICAL :: debug, is_fullspace, odd, &
823 : pw_grid_layout_all, spherical
824 : REAL(KIND=dp) :: em, et, flops, gsq, perf, t, t_max, &
825 : t_min, tend, tstart
826 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start
827 : TYPE(cell_type), POINTER :: box
828 : TYPE(pw_c1d_gs_type) :: ca, cc
829 : TYPE(pw_c3d_rs_type) :: cb
830 : TYPE(pw_grid_type), POINTER :: grid
831 :
832 : !..set fft lib
833 :
834 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
835 : pool_limit=globenv%fft_pool_scratch_limit, &
836 : wisdom_file=globenv%fftw_wisdom_file_name, &
837 10 : plan_style=globenv%fftw_plan_type)
838 :
839 : !..the unit cell (should not really matter, the number of grid points do)
840 10 : NULLIFY (box, grid)
841 10 : CALL cell_create(box)
842 : box%hmat = RESHAPE((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
843 130 : 0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
844 10 : CALL init_cell(box)
845 :
846 10 : CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
847 70 : DO i_rep = 1, n_rep
848 :
849 : ! how often should we do the transfer
850 60 : CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
851 180 : ALLOCATE (t_start(N_loop))
852 120 : ALLOCATE (t_end(N_loop))
853 :
854 : ! setup of the grids
855 60 : CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
856 240 : np = i_vals
857 :
858 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
859 60 : CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
860 :
861 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
862 60 : l_val=pw_grid_layout_all)
863 :
864 : ! prepare to loop over all or a specific layout
865 60 : IF (pw_grid_layout_all) THEN
866 : ! count layouts that fit
867 8 : itmp = 0
868 : ! start from 2, (/1,para_env%num_pe/) is not supported
869 16 : DO p = 2, para_env%num_pe
870 8 : q = para_env%num_pe/p
871 16 : IF (p*q == para_env%num_pe) THEN
872 8 : itmp = itmp + 1
873 : END IF
874 : END DO
875 : ! build list
876 24 : ALLOCATE (layouts(2, itmp))
877 8 : itmp = 0
878 16 : DO p = 2, para_env%num_pe
879 8 : q = para_env%num_pe/p
880 16 : IF (p*q == para_env%num_pe) THEN
881 8 : itmp = itmp + 1
882 24 : layouts(:, itmp) = (/p, q/)
883 : END IF
884 : END DO
885 : ELSE
886 52 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
887 52 : ALLOCATE (layouts(2, 1))
888 156 : layouts(:, 1) = i_vals
889 : END IF
890 :
891 120 : DO i_layout = 1, SIZE(layouts, 2)
892 :
893 180 : distribution_layout = layouts(:, i_layout)
894 :
895 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
896 :
897 : ! from cp_control_utils
898 16 : SELECT CASE (itmp)
899 : CASE (do_pwgrid_spherical)
900 16 : spherical = .TRUE.
901 16 : is_fullspace = .FALSE.
902 : CASE (do_pwgrid_ns_fullspace)
903 28 : spherical = .FALSE.
904 28 : is_fullspace = .TRUE.
905 : CASE (do_pwgrid_ns_halfspace)
906 16 : spherical = .FALSE.
907 60 : is_fullspace = .FALSE.
908 : END SELECT
909 :
910 : ! from pw_env_methods
911 60 : IF (spherical) THEN
912 16 : grid_span = HALFSPACE
913 16 : spherical = .TRUE.
914 16 : odd = .TRUE.
915 44 : ELSE IF (is_fullspace) THEN
916 28 : grid_span = FULLSPACE
917 28 : spherical = .FALSE.
918 28 : odd = .FALSE.
919 : ELSE
920 16 : grid_span = HALFSPACE
921 16 : spherical = .FALSE.
922 16 : odd = .TRUE.
923 : END IF
924 :
925 : ! actual setup
926 : CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
927 : blocked=blocked_id, npts=np, fft_usage=.TRUE., &
928 60 : rs_dims=distribution_layout, iounit=iw)
929 :
930 60 : IF (iw > 0) CALL m_flush(iw)
931 :
932 : ! note that the number of grid points might be different from what the user requested (fft-able needed)
933 240 : no = grid%npts
934 :
935 60 : CALL ca%create(grid)
936 60 : CALL cb%create(grid)
937 60 : CALL cc%create(grid)
938 :
939 : ! initialize data
940 60 : CALL pw_zero(ca)
941 60 : CALL pw_zero(cb)
942 60 : CALL pw_zero(cc)
943 60 : nn = SIZE(ca%array)
944 142042 : DO ig = 1, nn
945 141982 : gsq = grid%gsq(ig)
946 142042 : ca%array(ig) = EXP(-gsq)
947 : END DO
948 :
949 420 : flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
950 60 : tstart = m_walltime()
951 596 : DO ip = 1, n_loop
952 536 : CALL para_env%sync()
953 536 : t_start(ip) = m_walltime()
954 536 : CALL pw_transfer(ca, cb, debug)
955 536 : CALL pw_transfer(cb, cc, debug)
956 536 : CALL para_env%sync()
957 596 : t_end(ip) = m_walltime()
958 : END DO
959 60 : tend = m_walltime()
960 60 : t = tend - tstart + threshold
961 60 : IF (t > 0.0_dp) THEN
962 60 : perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
963 : ELSE
964 0 : perf = 0.0_dp
965 : END IF
966 :
967 142102 : em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
968 60 : CALL para_env%max(em)
969 142042 : et = SUM(ABS(ca%array(:) - cc%array(:)))
970 60 : CALL para_env%sum(et)
971 656 : t_min = MINVAL(t_end - t_start)
972 656 : t_max = MAXVAL(t_end - t_start)
973 :
974 60 : IF (para_env%is_source()) THEN
975 30 : WRITE (iw, *)
976 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
977 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
978 : WRITE (iw, '(A,T67,F14.0)') &
979 30 : " Parallel FFT Tests: Performance [Mflops] ", perf
980 30 : WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
981 30 : WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
982 30 : IF (iw > 0) CALL m_flush(iw)
983 : END IF
984 :
985 : ! need debugging ???
986 60 : IF (em > toler .OR. et > toler) THEN
987 0 : CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
988 0 : CALL pw_transfer(ca, cb, .TRUE.)
989 0 : CALL pw_transfer(cb, cc, .TRUE.)
990 : END IF
991 :
992 : ! done with these grids
993 60 : CALL ca%release()
994 60 : CALL cb%release()
995 60 : CALL cc%release()
996 180 : CALL pw_grid_release(grid)
997 :
998 : END DO
999 :
1000 : ! local arrays
1001 60 : DEALLOCATE (layouts)
1002 60 : DEALLOCATE (t_start)
1003 190 : DEALLOCATE (t_end)
1004 :
1005 : END DO
1006 :
1007 : ! cleanup
1008 10 : CALL cell_release(box)
1009 10 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1010 :
1011 20 : END SUBROUTINE pw_fft_test
1012 :
1013 : ! **************************************************************************************************
1014 : !> \brief Tests the eigensolver library routines
1015 : !> \param para_env ...
1016 : !> \param iw ...
1017 : !> \param eigensolver_section ...
1018 : !> \par History
1019 : !> JGH 6-Feb-2001 : Test and performance code
1020 : !> \author JGH 1-JAN-2001
1021 : ! **************************************************************************************************
1022 2 : SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1023 :
1024 : TYPE(mp_para_env_type), POINTER :: para_env
1025 : INTEGER :: iw
1026 : TYPE(section_vals_type), POINTER :: eigensolver_section
1027 :
1028 : INTEGER :: diag_method, i, i_loop, i_rep, &
1029 : init_method, j, n, n_loop, n_rep, &
1030 : neig, unit_number
1031 : REAL(KIND=dp) :: t1, t2
1032 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1033 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer
1034 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1035 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
1036 : TYPE(cp_fm_type) :: eigenvectors, matrix, work
1037 2 : TYPE(rng_stream_type), ALLOCATABLE :: rng_stream
1038 :
1039 2 : IF (iw > 0) THEN
1040 1 : WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
1041 : END IF
1042 :
1043 : ! create blacs env corresponding to para_env
1044 2 : NULLIFY (blacs_env)
1045 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1046 2 : para_env=para_env)
1047 :
1048 : ! loop over all tests
1049 2 : CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
1050 10 : DO i_rep = 1, n_rep
1051 :
1052 : ! parse section
1053 8 : CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
1054 8 : CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
1055 8 : CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1056 8 : CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1057 8 : CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1058 :
1059 : ! proper number of eigs
1060 8 : IF (neig < 0) neig = n
1061 8 : neig = MIN(neig, n)
1062 :
1063 : ! report
1064 8 : IF (iw > 0) THEN
1065 4 : WRITE (iw, *) "Matrix size", n
1066 4 : WRITE (iw, *) "Number of eigenvalues", neig
1067 4 : WRITE (iw, *) "Timing loops", n_loop
1068 2 : SELECT CASE (diag_method)
1069 : CASE (do_diag_syevd)
1070 2 : WRITE (iw, *) "Diag using syevd"
1071 : CASE (do_diag_syevx)
1072 4 : WRITE (iw, *) "Diag using syevx"
1073 : CASE DEFAULT
1074 : ! stop
1075 : END SELECT
1076 :
1077 4 : SELECT CASE (init_method)
1078 : CASE (do_mat_random)
1079 4 : WRITE (iw, *) "using random matrix"
1080 : CASE (do_mat_read)
1081 4 : WRITE (iw, *) "reading from file"
1082 : CASE DEFAULT
1083 : ! stop
1084 : END SELECT
1085 : END IF
1086 :
1087 : ! create matrix struct type
1088 8 : NULLIFY (fmstruct)
1089 : CALL cp_fm_struct_create(fmstruct=fmstruct, &
1090 : para_env=para_env, &
1091 : context=blacs_env, &
1092 : nrow_global=n, &
1093 8 : ncol_global=n)
1094 :
1095 : ! create all needed matrices, and buffers for the eigenvalues
1096 : CALL cp_fm_create(matrix=matrix, &
1097 : matrix_struct=fmstruct, &
1098 8 : name="MATRIX")
1099 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1100 :
1101 : CALL cp_fm_create(matrix=eigenvectors, &
1102 : matrix_struct=fmstruct, &
1103 8 : name="EIGENVECTORS")
1104 8 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1105 :
1106 : CALL cp_fm_create(matrix=work, &
1107 : matrix_struct=fmstruct, &
1108 8 : name="WORK")
1109 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1110 :
1111 24 : ALLOCATE (eigenvalues(n))
1112 100 : eigenvalues = 0.0_dp
1113 16 : ALLOCATE (buffer(1, n))
1114 :
1115 : ! generate initial matrix, either by reading a file, or using random numbers
1116 8 : IF (para_env%is_source()) THEN
1117 4 : SELECT CASE (init_method)
1118 : CASE (do_mat_random)
1119 : rng_stream = rng_stream_type( &
1120 : name="rng_stream", &
1121 : distribution_type=UNIFORM, &
1122 4 : extended_precision=.TRUE.)
1123 : CASE (do_mat_read)
1124 : CALL open_file(file_name="MATRIX", &
1125 : file_action="READ", &
1126 : file_form="FORMATTED", &
1127 : file_status="OLD", &
1128 4 : unit_number=unit_number)
1129 : END SELECT
1130 : END IF
1131 :
1132 100 : DO i = 1, n
1133 92 : IF (para_env%is_source()) THEN
1134 : SELECT CASE (init_method)
1135 : CASE (do_mat_random)
1136 347 : DO j = i, n
1137 347 : buffer(1, j) = rng_stream%next() - 0.5_dp
1138 : END DO
1139 : !MK activate/modify for a diagonal dominant symmetric matrix:
1140 : !MK buffer(1,i) = 10.0_dp*buffer(1,i)
1141 : CASE (do_mat_read)
1142 46 : READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
1143 : END SELECT
1144 : END IF
1145 92 : CALL para_env%bcast(buffer)
1146 8 : SELECT CASE (init_method)
1147 : CASE (do_mat_random)
1148 : CALL cp_fm_set_submatrix(fm=matrix, &
1149 : new_values=buffer, &
1150 : start_row=i, &
1151 : start_col=i, &
1152 : n_rows=1, &
1153 : n_cols=n - i + 1, &
1154 : alpha=1.0_dp, &
1155 : beta=0.0_dp, &
1156 92 : transpose=.FALSE.)
1157 : CALL cp_fm_set_submatrix(fm=matrix, &
1158 : new_values=buffer, &
1159 : start_row=i, &
1160 : start_col=i, &
1161 : n_rows=n - i + 1, &
1162 : n_cols=1, &
1163 : alpha=1.0_dp, &
1164 : beta=0.0_dp, &
1165 92 : transpose=.TRUE.)
1166 : CASE (do_mat_read)
1167 : CALL cp_fm_set_submatrix(fm=matrix, &
1168 : new_values=buffer, &
1169 : start_row=i, &
1170 : start_col=1, &
1171 : n_rows=1, &
1172 : n_cols=n, &
1173 : alpha=1.0_dp, &
1174 : beta=0.0_dp, &
1175 92 : transpose=.FALSE.)
1176 : END SELECT
1177 : END DO
1178 :
1179 8 : DEALLOCATE (buffer)
1180 :
1181 8 : IF (para_env%is_source()) THEN
1182 0 : SELECT CASE (init_method)
1183 : CASE (do_mat_read)
1184 4 : CALL close_file(unit_number=unit_number)
1185 : END SELECT
1186 : END IF
1187 :
1188 88 : DO i_loop = 1, n_loop
1189 1000 : eigenvalues = 0.0_dp
1190 80 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1191 : CALL cp_fm_to_fm(source=matrix, &
1192 80 : destination=work)
1193 :
1194 : ! DONE, now testing
1195 80 : t1 = m_walltime()
1196 40 : SELECT CASE (diag_method)
1197 : CASE (do_diag_syevd)
1198 : CALL cp_fm_syevd(matrix=work, &
1199 : eigenvectors=eigenvectors, &
1200 40 : eigenvalues=eigenvalues)
1201 : CASE (do_diag_syevx)
1202 : CALL cp_fm_syevx(matrix=work, &
1203 : eigenvectors=eigenvectors, &
1204 : eigenvalues=eigenvalues, &
1205 : neig=neig, &
1206 80 : work_syevx=1.0_dp)
1207 : END SELECT
1208 80 : t2 = m_walltime()
1209 88 : IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
1210 : END DO
1211 :
1212 8 : IF (iw > 0) THEN
1213 4 : WRITE (iw, *) "Eigenvalues: "
1214 4 : WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
1215 42 : WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
1216 4 : WRITE (iw, *) ""
1217 : END IF
1218 :
1219 : ! Clean up
1220 8 : DEALLOCATE (eigenvalues)
1221 8 : CALL cp_fm_release(matrix=work)
1222 8 : CALL cp_fm_release(matrix=eigenvectors)
1223 8 : CALL cp_fm_release(matrix=matrix)
1224 42 : CALL cp_fm_struct_release(fmstruct=fmstruct)
1225 :
1226 : END DO
1227 :
1228 2 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1229 :
1230 4 : END SUBROUTINE eigensolver_test
1231 :
1232 : ! **************************************************************************************************
1233 : !> \brief Tests the parallel matrix multiply
1234 : !> \param para_env ...
1235 : !> \param iw ...
1236 : !> \param cp_fm_gemm_test_section ...
1237 : ! **************************************************************************************************
1238 4 : SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1239 :
1240 : TYPE(mp_para_env_type), POINTER :: para_env
1241 : INTEGER :: iw
1242 : TYPE(section_vals_type), POINTER :: cp_fm_gemm_test_section
1243 :
1244 : CHARACTER(LEN=1) :: transa, transb
1245 : INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
1246 : ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1247 4 : INTEGER, DIMENSION(:), POINTER :: grid_2d
1248 : LOGICAL :: force_blocksize, row_major, transa_p, &
1249 : transb_p
1250 : REAL(KIND=dp) :: t1, t2, t3, t4
1251 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1252 : TYPE(cp_fm_struct_type), POINTER :: fmstruct_a, fmstruct_b, fmstruct_c
1253 : TYPE(cp_fm_type) :: matrix_a, matrix_b, matrix_c
1254 :
1255 4 : CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
1256 24 : DO i_rep = 1, n_rep
1257 :
1258 : ! how often should we do the multiply
1259 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1260 :
1261 : ! matrices def.
1262 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
1263 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
1264 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
1265 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1266 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1267 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1268 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1269 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1270 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1271 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1272 20 : transa = "N"
1273 20 : transb = "N"
1274 20 : IF (transa_p) transa = "T"
1275 20 : IF (transb_p) transb = "T"
1276 :
1277 20 : IF (iw > 0) THEN
1278 10 : WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1279 10 : WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
1280 10 : IF (transa_p) THEN
1281 2 : WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
1282 : ELSE
1283 8 : WRITE (iw, '(A)', ADVANCE="NO") "A x "
1284 : END IF
1285 10 : IF (transb_p) THEN
1286 2 : WRITE (iw, '(A)') "TRANSPOSE(B) "
1287 : ELSE
1288 8 : WRITE (iw, '(A)') "B "
1289 : END IF
1290 10 : WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
1291 10 : WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
1292 10 : WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
1293 30 : WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
1294 10 : WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
1295 : ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
1296 10 : np = cp_fm_pilaenv(0, 'D')
1297 10 : IF (np > 0) THEN
1298 10 : WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
1299 : END IF
1300 : END IF
1301 :
1302 20 : NULLIFY (blacs_env)
1303 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1304 : para_env=para_env, &
1305 : row_major=row_major, &
1306 20 : grid_2d=grid_2d)
1307 :
1308 20 : NULLIFY (fmstruct_a)
1309 20 : IF (transa_p) THEN
1310 4 : nrow_global = m; ncol_global = k
1311 : ELSE
1312 16 : nrow_global = k; ncol_global = m
1313 : END IF
1314 : CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
1315 : nrow_global=nrow_global, ncol_global=ncol_global, &
1316 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1317 20 : CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1318 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
1319 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1320 :
1321 20 : IF (transb_p) THEN
1322 4 : nrow_global = n; ncol_global = m
1323 : ELSE
1324 16 : nrow_global = m; ncol_global = n
1325 : END IF
1326 20 : NULLIFY (fmstruct_b)
1327 : CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
1328 : nrow_global=nrow_global, ncol_global=ncol_global, &
1329 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1330 20 : CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1331 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
1332 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1333 :
1334 20 : NULLIFY (fmstruct_c)
1335 20 : nrow_global = k
1336 20 : ncol_global = n
1337 : CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
1338 : nrow_global=nrow_global, ncol_global=ncol_global, &
1339 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1340 20 : CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1341 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
1342 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1343 :
1344 20 : CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
1345 20 : CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
1346 20 : CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
1347 :
1348 20 : CALL RANDOM_NUMBER(matrix_a%local_data)
1349 20 : CALL RANDOM_NUMBER(matrix_b%local_data)
1350 20 : CALL RANDOM_NUMBER(matrix_c%local_data)
1351 :
1352 20 : IF (iw > 0) CALL m_flush(iw)
1353 :
1354 20 : t1 = m_walltime()
1355 932 : DO i_loop = 1, N_loop
1356 912 : t3 = m_walltime()
1357 912 : CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1358 912 : t4 = m_walltime()
1359 932 : IF (iw > 0) THEN
1360 456 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
1361 456 : CALL m_flush(iw)
1362 : END IF
1363 : END DO
1364 20 : t2 = m_walltime()
1365 :
1366 20 : IF (iw > 0) THEN
1367 10 : WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
1368 10 : IF (t2 > t1) THEN
1369 10 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
1370 20 : 2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
1371 : END IF
1372 : END IF
1373 :
1374 20 : CALL cp_fm_release(matrix=matrix_a)
1375 20 : CALL cp_fm_release(matrix=matrix_b)
1376 20 : CALL cp_fm_release(matrix=matrix_c)
1377 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_a)
1378 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_b)
1379 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_c)
1380 144 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1381 :
1382 : END DO
1383 :
1384 4 : END SUBROUTINE cp_fm_gemm_test
1385 :
1386 : ! **************************************************************************************************
1387 : !> \brief Tests the DBCSR interface.
1388 : !> \param para_env ...
1389 : !> \param iw ...
1390 : !> \param input_section ...
1391 : ! **************************************************************************************************
1392 32 : SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1393 :
1394 : TYPE(mp_para_env_type), POINTER :: para_env
1395 : INTEGER :: iw
1396 : TYPE(section_vals_type), POINTER :: input_section
1397 :
1398 : CHARACTER, DIMENSION(3) :: types
1399 : INTEGER :: data_type, i_rep, k, m, n, N_loop, &
1400 : n_rep, test_type
1401 32 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n, nproc
1402 : LOGICAL :: always_checksum, retain_sparsity, &
1403 : transa_p, transb_p
1404 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1405 :
1406 : ! ---------------------------------------------------------------------------
1407 :
1408 32 : NULLIFY (bs_m, bs_n, bs_k)
1409 32 : CALL section_vals_get(input_section, n_repetition=n_rep)
1410 32 : CALL dbcsr_reset_randmat_seed()
1411 78 : DO i_rep = 1, n_rep
1412 : ! how often should we do the multiply
1413 46 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1414 :
1415 : ! matrices def.
1416 46 : CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
1417 46 : CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
1418 46 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1419 46 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1420 46 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1421 46 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1422 46 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1423 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
1424 46 : i_vals=bs_m)
1425 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
1426 46 : i_vals=bs_n)
1427 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
1428 46 : i_vals=bs_k)
1429 46 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1430 46 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1431 46 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1432 46 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1433 46 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1434 46 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1435 : CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
1436 46 : i_vals=nproc)
1437 : CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
1438 46 : c_val=types(1))
1439 : CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
1440 46 : c_val=types(2))
1441 : CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
1442 46 : c_val=types(3))
1443 : CALL section_vals_val_get(input_section, "filter_eps", &
1444 46 : i_rep_section=i_rep, r_val=filter_eps)
1445 46 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1446 :
1447 : CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1448 : (/m, n, k/), &
1449 : (/transa_p, transb_p/), &
1450 : bs_m, bs_n, bs_k, &
1451 : (/s_a, s_b, s_c/), &
1452 : alpha, beta, &
1453 : data_type=data_type, &
1454 : test_type=test_type, &
1455 : n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1456 538 : always_checksum=always_checksum)
1457 : END DO
1458 32 : END SUBROUTINE cp_dbcsr_tests
1459 :
1460 : ! **************************************************************************************************
1461 : !> \brief Tests the DBM library.
1462 : !> \param para_env ...
1463 : !> \param iw ...
1464 : !> \param input_section ...
1465 : ! **************************************************************************************************
1466 14 : SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1467 :
1468 : TYPE(mp_para_env_type), POINTER :: para_env
1469 : INTEGER :: iw
1470 : TYPE(section_vals_type), POINTER :: input_section
1471 :
1472 : INTEGER :: i_rep, k, m, n, N_loop, n_rep
1473 14 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n
1474 : LOGICAL :: always_checksum, retain_sparsity, &
1475 : transa_p, transb_p
1476 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1477 :
1478 : ! ---------------------------------------------------------------------------
1479 :
1480 14 : NULLIFY (bs_m, bs_n, bs_k)
1481 14 : CALL section_vals_get(input_section, n_repetition=n_rep)
1482 14 : CALL dbcsr_reset_randmat_seed()
1483 28 : DO i_rep = 1, n_rep
1484 14 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1485 14 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1486 14 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1487 14 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1488 14 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1489 14 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1490 14 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
1491 14 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
1492 14 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
1493 14 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1494 14 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1495 14 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1496 14 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1497 14 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1498 14 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1499 14 : CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
1500 14 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1501 :
1502 : CALL dbm_run_tests(mp_group=para_env, &
1503 : io_unit=iw, &
1504 : matrix_sizes=(/m, n, k/), &
1505 : trs=(/transa_p, transb_p/), &
1506 : bs_m=bs_m, &
1507 : bs_n=bs_n, &
1508 : bs_k=bs_k, &
1509 : sparsities=(/s_a, s_b, s_c/), &
1510 : alpha=alpha, &
1511 : beta=beta, &
1512 : n_loops=n_loop, &
1513 : eps=filter_eps, &
1514 : retain_sparsity=retain_sparsity, &
1515 154 : always_checksum=always_checksum)
1516 : END DO
1517 14 : END SUBROUTINE run_dbm_tests
1518 :
1519 8484 : END MODULE library_tests
|