Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Wrapper for ELPA
10 : !> \author Ole Schuett
11 : ! **************************************************************************************************
12 : MODULE cp_fm_elpa
13 : USE cp_log_handling, ONLY: cp_to_string
14 : USE machine, ONLY: m_cpuid, &
15 : MACHINE_X86, &
16 : MACHINE_CPU_GENERIC, &
17 : MACHINE_X86_SSE4, &
18 : MACHINE_X86_AVX, &
19 : MACHINE_X86_AVX2
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_fm_basic_linalg, ONLY: cp_fm_upper_to_full
22 : USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
23 : cp_fm_redistribute_end, &
24 : cp_fm_redistribute_info
25 : USE cp_fm_struct, ONLY: cp_fm_struct_get
26 : USE cp_fm_types, ONLY: cp_fm_type, &
27 : cp_fm_to_fm, &
28 : cp_fm_release, &
29 : cp_fm_create, &
30 : cp_fm_write_info
31 : USE cp_log_handling, ONLY: cp_get_default_logger, &
32 : cp_logger_get_default_io_unit, &
33 : cp_logger_type
34 : USE kinds, ONLY: default_string_length, &
35 : dp
36 : USE message_passing, ONLY: mp_comm_type
37 : USE OMP_LIB, ONLY: omp_get_max_threads
38 :
39 : #include "../base/base_uses.f90"
40 :
41 : #if defined (__ELPA)
42 : USE elpa_constants, ONLY: ELPA_2STAGE_REAL_INVALID, &
43 : ELPA_2STAGE_REAL_DEFAULT, &
44 : ELPA_2STAGE_REAL_GENERIC, &
45 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
46 : ELPA_2STAGE_REAL_BGP, &
47 : ELPA_2STAGE_REAL_BGQ, &
48 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
49 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
50 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
51 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
52 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
53 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
54 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
55 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
56 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
57 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
58 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
59 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
60 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
61 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
62 : ELPA_2STAGE_REAL_AMD_GPU, &
63 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL
64 :
65 : USE elpa, ONLY: elpa_t, elpa_solver_2stage, &
66 : elpa_init, elpa_uninit, &
67 : elpa_allocate, elpa_deallocate, elpa_ok
68 : #endif
69 :
70 : IMPLICIT NONE
71 :
72 : PRIVATE
73 :
74 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
75 :
76 : #if defined(__ELPA)
77 : INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
78 : ELPA_2STAGE_REAL_INVALID, & ! auto
79 : ELPA_2STAGE_REAL_GENERIC, &
80 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
81 : ELPA_2STAGE_REAL_BGP, &
82 : ELPA_2STAGE_REAL_BGQ, &
83 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
84 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
85 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
86 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
87 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
88 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
89 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
90 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
91 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
92 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
93 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
94 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
95 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
96 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
97 : ELPA_2STAGE_REAL_AMD_GPU, &
98 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
99 :
100 : CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
101 : elpa_kernel_names = [CHARACTER(len=14) :: &
102 : "AUTO", &
103 : "GENERIC", &
104 : "GENERIC_SIMPLE", &
105 : "BGP", &
106 : "BGQ", &
107 : "SSE", &
108 : "SSE_BLOCK2", &
109 : "SSE_BLOCK4", &
110 : "SSE_BLOCK6", &
111 : "AVX_BLOCK2", &
112 : "AVX_BLOCK4", &
113 : "AVX_BLOCK6", &
114 : "AVX2_BLOCK2", &
115 : "AVX2_BLOCK4", &
116 : "AVX2_BLOCK6", &
117 : "AVX512_BLOCK2", &
118 : "AVX512_BLOCK4", &
119 : "AVX512_BLOCK6", &
120 : "NVIDIA_GPU", &
121 : "AMD_GPU", &
122 : "INTEL_GPU"]
123 :
124 : CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
125 : elpa_kernel_descriptions = [CHARACTER(len=44) :: &
126 : "Automatically selected kernel", &
127 : "Generic kernel", &
128 : "Simplified generic kernel", &
129 : "Kernel optimized for IBM BGP", &
130 : "Kernel optimized for IBM BGQ", &
131 : "Kernel optimized for x86_64/SSE", &
132 : "Kernel optimized for x86_64/SSE (block=2)", &
133 : "Kernel optimized for x86_64/SSE (block=4)", &
134 : "Kernel optimized for x86_64/SSE (block=6)", &
135 : "Kernel optimized for Intel AVX (block=2)", &
136 : "Kernel optimized for Intel AVX (block=4)", &
137 : "Kernel optimized for Intel AVX (block=6)", &
138 : "Kernel optimized for Intel AVX2 (block=2)", &
139 : "Kernel optimized for Intel AVX2 (block=4)", &
140 : "Kernel optimized for Intel AVX2 (block=6)", &
141 : "Kernel optimized for Intel AVX-512 (block=2)", &
142 : "Kernel optimized for Intel AVX-512 (block=4)", &
143 : "Kernel optimized for Intel AVX-512 (block=6)", &
144 : "Kernel targeting Nvidia GPUs", &
145 : "Kernel targeting AMD GPUs", &
146 : "Kernel targeting Intel GPUs"]
147 :
148 : #else
149 : INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
150 : CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
151 : CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
152 : #endif
153 :
154 : #if defined(__ELPA)
155 : INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
156 : #endif
157 : LOGICAL, SAVE :: elpa_qr = .FALSE., &
158 : elpa_qr_unsafe = .FALSE., &
159 : elpa_should_print = .FALSE.
160 :
161 : PUBLIC :: cp_fm_diag_elpa, &
162 : set_elpa_kernel, &
163 : set_elpa_qr, &
164 : set_elpa_print, &
165 : elpa_kernel_ids, &
166 : elpa_kernel_names, &
167 : elpa_kernel_descriptions, &
168 : initialize_elpa_library, &
169 : finalize_elpa_library
170 :
171 : CONTAINS
172 :
173 : ! **************************************************************************************************
174 : !> \brief Initialize the ELPA library
175 : ! **************************************************************************************************
176 8939 : SUBROUTINE initialize_elpa_library()
177 : #if defined(__ELPA)
178 8939 : IF (elpa_init(20180525) /= elpa_ok) &
179 0 : CPABORT("The linked ELPA library does not support the required API version")
180 : #else
181 : CPABORT("Initialization of ELPA library requested but not enabled during build")
182 : #endif
183 8939 : END SUBROUTINE
184 :
185 : ! **************************************************************************************************
186 : !> \brief Finalize the ELPA library
187 : ! **************************************************************************************************
188 8729 : SUBROUTINE finalize_elpa_library()
189 : #if defined(__ELPA)
190 8729 : CALL elpa_uninit()
191 : #else
192 : CPABORT("Finalization of ELPA library requested but not enabled during build")
193 : #endif
194 8729 : END SUBROUTINE
195 :
196 : ! **************************************************************************************************
197 : !> \brief Sets the active ELPA kernel.
198 : !> \param requested_kernel one of the elpa_kernel_ids
199 : ! **************************************************************************************************
200 8939 : SUBROUTINE set_elpa_kernel(requested_kernel)
201 : INTEGER, INTENT(IN) :: requested_kernel
202 :
203 : #if defined (__ELPA)
204 : INTEGER :: cpuid
205 :
206 8939 : elpa_kernel = requested_kernel
207 :
208 : ! Resolve AUTO kernel.
209 8939 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
210 8933 : cpuid = m_cpuid()
211 8933 : IF ((MACHINE_CPU_GENERIC .LT. cpuid) .AND. (cpuid .LE. MACHINE_X86)) THEN
212 0 : SELECT CASE (cpuid)
213 : CASE (MACHINE_X86_SSE4)
214 0 : elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
215 : CASE (MACHINE_X86_AVX)
216 0 : elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
217 : CASE (MACHINE_X86_AVX2)
218 8933 : elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
219 : CASE DEFAULT
220 8933 : elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
221 : END SELECT
222 : END IF
223 :
224 : ! Prefer GPU kernel if available.
225 : #if defined (__ELPA_NVIDIA_GPU)
226 : elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
227 : #endif
228 : #if defined (__ELPA_AMD_GPU)
229 : elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
230 : #endif
231 : #if defined (__ELPA_INTEL_GPU)
232 : elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
233 : #endif
234 :
235 : ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
236 8933 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
237 0 : elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
238 : END IF
239 : END IF
240 : #else
241 : MARK_USED(requested_kernel)
242 : #endif
243 8939 : END SUBROUTINE set_elpa_kernel
244 :
245 : ! **************************************************************************************************
246 : !> \brief Sets flags that determines if ELPA should try to use QR during diagonalization
247 : !> If use_qr = .TRUE., the QR step is performed only if the size of the input matrix is
248 : !> suitable. Check cp_fm_diag_elpa for further details.
249 : !> \param use_qr the logical flag
250 : !> \param use_qr_unsafe logical which determines if block size checks should be bypassed for some
251 : !> ELPA versions, potentially leading to incorrect eigenvalues
252 : ! **************************************************************************************************
253 8939 : SUBROUTINE set_elpa_qr(use_qr, use_qr_unsafe)
254 : LOGICAL, INTENT(IN) :: use_qr, use_qr_unsafe
255 :
256 8939 : elpa_qr = use_qr
257 8939 : elpa_qr_unsafe = use_qr_unsafe
258 8939 : END SUBROUTINE set_elpa_qr
259 :
260 : ! **************************************************************************************************
261 : !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
262 : !> should be printed when the diagonalization routine is called.
263 : !> \param flag the logical flag
264 : ! **************************************************************************************************
265 8939 : SUBROUTINE set_elpa_print(flag)
266 : LOGICAL, INTENT(IN) :: flag
267 :
268 8939 : elpa_should_print = flag
269 8939 : END SUBROUTINE set_elpa_print
270 :
271 : ! **************************************************************************************************
272 : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
273 : !> \param matrix the matrix that is diagonalized
274 : !> \param eigenvectors eigenvectors of the input matrix
275 : !> \param eigenvalues eigenvalues of the input matrix
276 : ! **************************************************************************************************
277 59878 : SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
278 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
279 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
280 :
281 : #if defined(__ELPA)
282 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
283 :
284 : INTEGER :: handle
285 : TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
286 : TYPE(cp_fm_redistribute_info) :: rdinfo
287 :
288 59878 : CALL timeset(routineN, handle)
289 :
290 : ! Determine if the input matrix needs to be redistributed before diagonalization.
291 : ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
292 : ! The redistributed matrix is stored in matrix_new, which is just a pointer
293 : ! to the original matrix if no redistribution is required.
294 : ! With ELPA, we have to make sure that all processor columns have nonzero width
295 : CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
296 59878 : caller_is_elpa=.TRUE., redist_info=rdinfo)
297 :
298 : ! Call ELPA on CPUs that hold the new matrix
299 59878 : IF (ASSOCIATED(matrix_new%matrix_struct)) &
300 35129 : CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
301 :
302 : ! Redistribute results and clean up
303 59878 : CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
304 :
305 59878 : CALL timestop(handle)
306 : #else
307 : MARK_USED(matrix)
308 : MARK_USED(eigenvectors)
309 : MARK_USED(eigenvalues)
310 :
311 : CPABORT("CP2K compiled without the ELPA library.")
312 : #endif
313 59878 : END SUBROUTINE cp_fm_diag_elpa
314 :
315 : #if defined(__ELPA)
316 : ! **************************************************************************************************
317 : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
318 : !> \param matrix the matrix that is diagonalized
319 : !> \param eigenvectors eigenvectors of the input matrix
320 : !> \param eigenvalues eigenvalues of the input matrix
321 : !> \param rdinfo ...
322 : ! **************************************************************************************************
323 35129 : SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
324 :
325 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
326 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
327 : TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
328 :
329 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
330 :
331 : INTEGER :: handle
332 :
333 : CLASS(elpa_t), POINTER :: elpa_obj
334 : CHARACTER(len=default_string_length) :: kernel_name
335 : TYPE(mp_comm_type) :: group
336 : INTEGER :: i, &
337 : mypcol, myprow, n, &
338 : n_rows, n_cols, &
339 : nblk, neig, io_unit, &
340 : success
341 : LOGICAL :: use_qr, check_eigenvalues
342 35129 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
343 : TYPE(cp_blacs_env_type), POINTER :: context
344 : TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
345 : TYPE(cp_logger_type), POINTER :: logger
346 : REAL(KIND=dp), PARAMETER :: th = 1.0E-14_dp
347 35129 : INTEGER, DIMENSION(:), POINTER :: ncol_locals
348 :
349 35129 : CALL timeset(routineN, handle)
350 35129 : NULLIFY (logger)
351 35129 : NULLIFY (ncol_locals)
352 :
353 35129 : check_eigenvalues = .FALSE.
354 :
355 35129 : logger => cp_get_default_logger()
356 35129 : io_unit = cp_logger_get_default_io_unit(logger)
357 :
358 35129 : n = matrix%matrix_struct%nrow_global
359 35129 : context => matrix%matrix_struct%context
360 35129 : group = matrix%matrix_struct%para_env
361 :
362 35129 : myprow = context%mepos(1)
363 35129 : mypcol = context%mepos(2)
364 :
365 : ! elpa needs the full matrix
366 35129 : CALL cp_fm_upper_to_full(matrix, eigenvectors)
367 :
368 : CALL cp_fm_struct_get(matrix%matrix_struct, &
369 : local_leading_dimension=n_rows, &
370 : ncol_local=n_cols, &
371 : nrow_block=nblk, &
372 35129 : ncol_locals=ncol_locals)
373 :
374 : ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
375 70258 : IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
376 0 : CALL rdinfo%write(io_unit)
377 0 : CALL cp_fm_write_info(matrix, io_unit)
378 0 : CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
379 : END IF
380 :
381 35129 : neig = SIZE(eigenvalues, 1)
382 : ! Decide if matrix is suitable for ELPA to use QR
383 : ! The definition of what is considered a suitable matrix depends on the ELPA version
384 : ! The relevant ELPA files to check are
385 : ! - Proper matrix order: src/elpa2/elpa2_template.F90
386 : ! - Proper block size: test/Fortran/test.F90
387 : ! Note that the names of these files might change in different ELPA versions
388 : ! Matrix order must be even
389 35129 : use_qr = elpa_qr .AND. (MODULO(n, 2) .EQ. 0)
390 : ! Matrix order and block size must be greater than or equal to 64
391 35129 : IF (.NOT. elpa_qr_unsafe) &
392 35125 : use_qr = use_qr .AND. (n .GE. 64) .AND. (nblk .GE. 64)
393 :
394 : ! Check if eigenvalues computed with ELPA_QR_UNSAFE should be verified
395 8 : IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
396 2 : check_eigenvalues = .TRUE.
397 :
398 35129 : CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
399 :
400 35129 : IF (check_eigenvalues) THEN
401 : ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
402 12 : ALLOCATE (eval_noqr(n))
403 4 : CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
404 4 : CALL cp_fm_to_fm(matrix, matrix_noqr)
405 4 : CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
406 8 : CALL cp_fm_upper_to_full(matrix_noqr, eigenvectors_noqr)
407 : END IF
408 :
409 35129 : IF (io_unit > 0 .AND. elpa_should_print) THEN
410 : WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
411 119 : "ELPA| Matrix diagonalization information"
412 :
413 : ! Find name for given kernel id.
414 : ! In case ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
415 119 : kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
416 2618 : DO i = 1, SIZE(elpa_kernel_ids)
417 2618 : IF (elpa_kernel_ids(i) == elpa_kernel) THEN
418 119 : kernel_name = elpa_kernel_names(i)
419 : END IF
420 : END DO
421 :
422 : WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
423 119 : "ELPA| Matrix order (NA) ", n, &
424 119 : "ELPA| Matrix block size (NBLK) ", nblk, &
425 119 : "ELPA| Number of eigenvectors (NEV) ", neig, &
426 119 : "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
427 238 : "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
428 : WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
429 119 : "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
430 119 : IF (elpa_qr) THEN
431 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
432 4 : "ELPA| QR step requested ", "YES"
433 : ELSE
434 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
435 115 : "ELPA| QR step requested ", "NO"
436 : END IF
437 :
438 119 : IF (elpa_qr) THEN
439 4 : IF (elpa_qr_unsafe) THEN
440 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
441 2 : "ELPA| Use potentially unsafe QR ", "YES"
442 : ELSE
443 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
444 2 : "ELPA| Use potentially unsafe QR ", "NO"
445 : END IF
446 4 : IF (use_qr) THEN
447 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
448 4 : "ELPA| Matrix is suitable for QR ", "YES"
449 : ELSE
450 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
451 0 : "ELPA| Matrix is suitable for QR ", "NO"
452 : END IF
453 4 : IF (.NOT. use_qr) THEN
454 0 : IF (MODULO(n, 2) /= 0) THEN
455 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
456 0 : "ELPA| Matrix order is NOT even"
457 : END IF
458 0 : IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
459 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
460 0 : "ELPA| Matrix block size is NOT 64 or greater"
461 : END IF
462 : ELSE
463 4 : IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
464 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
465 2 : "ELPA| Matrix block size check was bypassed"
466 : END IF
467 : END IF
468 : END IF
469 : END IF
470 :
471 : ! the full eigenvalues vector is needed
472 105387 : ALLOCATE (eval(n))
473 :
474 35129 : elpa_obj => elpa_allocate()
475 :
476 35129 : CALL elpa_obj%set("na", n, success)
477 35129 : CPASSERT(success == elpa_ok)
478 :
479 35129 : CALL elpa_obj%set("nev", neig, success)
480 35129 : CPASSERT(success == elpa_ok)
481 :
482 35129 : CALL elpa_obj%set("local_nrows", n_rows, success)
483 35129 : CPASSERT(success == elpa_ok)
484 :
485 35129 : CALL elpa_obj%set("local_ncols", n_cols, success)
486 35129 : CPASSERT(success == elpa_ok)
487 :
488 35129 : CALL elpa_obj%set("nblk", nblk, success)
489 35129 : CPASSERT(success == elpa_ok)
490 :
491 35129 : CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
492 35129 : CPASSERT(success == elpa_ok)
493 :
494 35129 : CALL elpa_obj%set("process_row", myprow, success)
495 35129 : CPASSERT(success == elpa_ok)
496 :
497 35129 : CALL elpa_obj%set("process_col", mypcol, success)
498 35129 : CPASSERT(success == elpa_ok)
499 :
500 35129 : success = elpa_obj%setup()
501 35129 : CPASSERT(success == elpa_ok)
502 :
503 35129 : CALL elpa_obj%set("solver", elpa_solver_2stage, success)
504 35129 : CPASSERT(success == elpa_ok)
505 :
506 : ! enabling the GPU must happen before setting the kernel
507 35129 : IF (elpa_kernel == ELPA_2STAGE_REAL_NVIDIA_GPU) THEN
508 0 : CALL elpa_obj%set("nvidia-gpu", 1, success)
509 0 : CPASSERT(success == elpa_ok)
510 : END IF
511 35129 : IF (elpa_kernel == ELPA_2STAGE_REAL_AMD_GPU) THEN
512 0 : CALL elpa_obj%set("amd-gpu", 1, success)
513 0 : CPASSERT(success == elpa_ok)
514 : END IF
515 35129 : IF (elpa_kernel == ELPA_2STAGE_REAL_INTEL_GPU_SYCL) THEN
516 0 : CALL elpa_obj%set("intel-gpu", 1, success)
517 0 : CPASSERT(success == elpa_ok)
518 : END IF
519 :
520 35129 : CALL elpa_obj%set("real_kernel", elpa_kernel, success)
521 35129 : CPWARN_IF(success /= elpa_ok, "Setting real_kernel for ELPA failed")
522 :
523 35129 : IF (use_qr) THEN
524 8 : CALL elpa_obj%set("qr", 1, success)
525 8 : CPASSERT(success == elpa_ok)
526 : END IF
527 :
528 : ! Set number of threads only when ELPA was built with OpenMP support.
529 35129 : IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
530 35129 : CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
531 35129 : CPASSERT(success == elpa_ok)
532 : END IF
533 :
534 35129 : CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
535 35129 : IF (success /= elpa_ok) &
536 0 : CPABORT("ELPA failed to diagonalize a matrix")
537 :
538 35129 : IF (check_eigenvalues) THEN
539 : ! run again without QR
540 4 : CALL elpa_obj%set("qr", 0, success)
541 4 : CPASSERT(success == elpa_ok)
542 :
543 4 : CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
544 4 : IF (success /= elpa_ok) &
545 0 : CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
546 :
547 340 : IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) .GT. th)) &
548 0 : CPABORT("Eigenvalues calculated with QR decomp. in ELPA are wrong. Disable ELPA_QR_UNSAFE.")
549 :
550 4 : DEALLOCATE (eval_noqr)
551 4 : CALL cp_fm_release(matrix_noqr)
552 4 : CALL cp_fm_release(eigenvectors_noqr)
553 : END IF
554 :
555 35129 : CALL elpa_deallocate(elpa_obj, success)
556 35129 : CPASSERT(success == elpa_ok)
557 :
558 1207551 : eigenvalues(1:neig) = eval(1:neig)
559 35129 : DEALLOCATE (eval)
560 :
561 35129 : CALL timestop(handle)
562 :
563 140516 : END SUBROUTINE cp_fm_diag_elpa_base
564 : #endif
565 :
566 : END MODULE cp_fm_elpa
|