Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : ! **************************************************************************************************
8 : !> \brief Routines useful for iterative matrix calculations
9 : !> \par History
10 : !> 2010.10 created [Joost VandeVondele]
11 : !> \author Joost VandeVondele
12 : ! **************************************************************************************************
13 : MODULE iterate_matrix
14 : USE arnoldi_api, ONLY: arnoldi_env_type,&
15 : arnoldi_extremal
16 : USE bibliography, ONLY: Richters2018,&
17 : cite_reference
18 : USE cp_dbcsr_api, ONLY: &
19 : dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
20 : dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_get_diag, &
21 : dbcsr_get_info, dbcsr_get_matrix_type, dbcsr_get_occupation, dbcsr_multiply, dbcsr_p_type, &
22 : dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_set_diag, dbcsr_trace, dbcsr_transposed, &
23 : dbcsr_type, dbcsr_type_no_symmetry
24 : USE cp_dbcsr_contrib, ONLY: dbcsr_frobenius_norm,&
25 : dbcsr_gershgorin_norm,&
26 : dbcsr_maxabs
27 : USE cp_log_handling, ONLY: cp_get_default_logger,&
28 : cp_logger_get_default_unit_nr,&
29 : cp_logger_type
30 : USE input_constants, ONLY: ls_scf_submatrix_sign_direct,&
31 : ls_scf_submatrix_sign_direct_muadj,&
32 : ls_scf_submatrix_sign_direct_muadj_lowmem,&
33 : ls_scf_submatrix_sign_ns
34 : USE kinds, ONLY: dp,&
35 : int_8
36 : USE machine, ONLY: m_flush,&
37 : m_walltime
38 : USE mathconstants, ONLY: ifac
39 : USE mathlib, ONLY: abnormal_value
40 : USE message_passing, ONLY: mp_comm_type
41 : USE submatrix_dissection, ONLY: submatrix_dissection_type
42 : #include "./base/base_uses.f90"
43 :
44 : IMPLICIT NONE
45 :
46 : PRIVATE
47 :
48 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'iterate_matrix'
49 :
50 : TYPE :: eigbuf
51 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eigvals
52 : REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE :: eigvecs
53 : END TYPE eigbuf
54 :
55 : INTERFACE purify_mcweeny
56 : MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth
57 : END INTERFACE
58 :
59 : PUBLIC :: invert_Hotelling, matrix_sign_Newton_Schulz, matrix_sqrt_Newton_Schulz, &
60 : matrix_sqrt_proot, matrix_sign_proot, matrix_sign_submatrix, matrix_exponential, &
61 : matrix_sign_submatrix_mu_adjust, purify_mcweeny, invert_Taylor, determinant
62 :
63 : CONTAINS
64 :
65 : ! *****************************************************************************
66 : !> \brief Computes the determinant of a symmetric positive definite matrix
67 : !> using the trace of the matrix logarithm via Mercator series:
68 : !> det(A) = det(S)det(I+X)det(S), where S=diag(sqrt(Aii),..,sqrt(Ann))
69 : !> det(I+X) = Exp(Trace(Ln(I+X)))
70 : !> Ln(I+X) = X - X^2/2 + X^3/3 - X^4/4 + ..
71 : !> The series converges only if the Frobenius norm of X is less than 1.
72 : !> If it is more than one we compute (recursevily) the determinant of
73 : !> the square root of (I+X).
74 : !> \param matrix ...
75 : !> \param det - determinant
76 : !> \param threshold ...
77 : !> \par History
78 : !> 2015.04 created [Rustam Z Khaliullin]
79 : !> \author Rustam Z. Khaliullin
80 : ! **************************************************************************************************
81 132 : RECURSIVE SUBROUTINE determinant(matrix, det, threshold)
82 :
83 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix
84 : REAL(KIND=dp), INTENT(INOUT) :: det
85 : REAL(KIND=dp), INTENT(IN) :: threshold
86 :
87 : CHARACTER(LEN=*), PARAMETER :: routineN = 'determinant'
88 :
89 : INTEGER :: handle, i, max_iter_lanczos, nsize, &
90 : order_lanczos, sign_iter, unit_nr
91 : INTEGER(KIND=int_8) :: flop1
92 : INTEGER, SAVE :: recursion_depth = 0
93 : REAL(KIND=dp) :: det0, eps_lanczos, frobnorm, maxnorm, &
94 : occ_matrix, t1, t2, trace
95 132 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: diagonal
96 : TYPE(cp_logger_type), POINTER :: logger
97 : TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
98 :
99 132 : CALL timeset(routineN, handle)
100 :
101 : ! get a useful output_unit
102 132 : logger => cp_get_default_logger()
103 132 : IF (logger%para_env%is_source()) THEN
104 66 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
105 : ELSE
106 66 : unit_nr = -1
107 : END IF
108 :
109 : ! Note: tmp1 and tmp2 have the same matrix type as the
110 : ! initial matrix (tmp3 does not have symmetry constraints)
111 : ! this might lead to uninteded results with anti-symmetric
112 : ! matrices
113 : CALL dbcsr_create(tmp1, template=matrix, &
114 132 : matrix_type=dbcsr_type_no_symmetry)
115 : CALL dbcsr_create(tmp2, template=matrix, &
116 132 : matrix_type=dbcsr_type_no_symmetry)
117 : CALL dbcsr_create(tmp3, template=matrix, &
118 132 : matrix_type=dbcsr_type_no_symmetry)
119 :
120 : ! compute the product of the diagonal elements
121 : BLOCK
122 : TYPE(mp_comm_type) :: group
123 132 : CALL dbcsr_get_info(matrix, nfullrows_total=nsize, group=group)
124 396 : ALLOCATE (diagonal(nsize))
125 132 : CALL dbcsr_get_diag(matrix, diagonal)
126 132 : CALL group%sum(diagonal)
127 2308 : det = PRODUCT(diagonal)
128 : END BLOCK
129 :
130 : ! create diagonal SQRTI matrix
131 2176 : diagonal(:) = 1.0_dp/(SQRT(diagonal(:)))
132 : !ROLL CALL dbcsr_copy(tmp1,matrix)
133 132 : CALL dbcsr_desymmetrize(matrix, tmp1)
134 132 : CALL dbcsr_set(tmp1, 0.0_dp)
135 132 : CALL dbcsr_set_diag(tmp1, diagonal)
136 132 : CALL dbcsr_filter(tmp1, threshold)
137 132 : DEALLOCATE (diagonal)
138 :
139 : ! normalize the main diagonal, off-diagonal elements are scaled to
140 : ! make the norm of the matrix less than 1
141 : CALL dbcsr_multiply("N", "N", 1.0_dp, &
142 : matrix, &
143 : tmp1, &
144 : 0.0_dp, tmp3, &
145 132 : filter_eps=threshold)
146 : CALL dbcsr_multiply("N", "N", 1.0_dp, &
147 : tmp1, &
148 : tmp3, &
149 : 0.0_dp, tmp2, &
150 132 : filter_eps=threshold)
151 :
152 : ! subtract the main diagonal to create matrix X
153 132 : CALL dbcsr_add_on_diag(tmp2, -1.0_dp)
154 132 : frobnorm = dbcsr_frobenius_norm(tmp2)
155 132 : IF (unit_nr > 0) THEN
156 66 : IF (recursion_depth .EQ. 0) THEN
157 41 : WRITE (unit_nr, '()')
158 : ELSE
159 : WRITE (unit_nr, '(T6,A28,1X,I15)') &
160 25 : "Recursive iteration:", recursion_depth
161 : END IF
162 : WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
163 66 : "Frobenius norm:", frobnorm
164 66 : CALL m_flush(unit_nr)
165 : END IF
166 :
167 132 : IF (frobnorm .GE. 1.0_dp) THEN
168 :
169 50 : CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
170 : ! these controls should be provided as input
171 50 : order_lanczos = 3
172 50 : eps_lanczos = 1.0E-4_dp
173 50 : max_iter_lanczos = 40
174 : CALL matrix_sqrt_Newton_Schulz( &
175 : tmp3, & ! output sqrt
176 : tmp1, & ! output sqrti
177 : tmp2, & ! input original
178 : threshold=threshold, &
179 : order=order_lanczos, &
180 : eps_lanczos=eps_lanczos, &
181 50 : max_iter_lanczos=max_iter_lanczos)
182 50 : recursion_depth = recursion_depth + 1
183 50 : CALL determinant(tmp3, det0, threshold)
184 50 : recursion_depth = recursion_depth - 1
185 50 : det = det*det0*det0
186 :
187 : ELSE
188 :
189 : ! create accumulator
190 82 : CALL dbcsr_copy(tmp1, tmp2)
191 : ! re-create to make use of symmetry
192 : !ROLL CALL dbcsr_create(tmp3,template=matrix)
193 :
194 82 : IF (unit_nr > 0) WRITE (unit_nr, *)
195 :
196 : ! initialize the sign of the term
197 82 : sign_iter = -1
198 1078 : DO i = 1, 100
199 :
200 1078 : t1 = m_walltime()
201 :
202 : ! multiply X^i by X
203 : ! note that the first iteration evaluates X^2
204 : ! because the trace of X^1 is zero by construction
205 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, &
206 : 0.0_dp, tmp3, &
207 : filter_eps=threshold, &
208 1078 : flop=flop1)
209 1078 : CALL dbcsr_copy(tmp1, tmp3)
210 :
211 : ! get trace
212 1078 : CALL dbcsr_trace(tmp1, trace)
213 1078 : trace = trace*sign_iter/(1.0_dp*(i + 1))
214 1078 : sign_iter = -sign_iter
215 :
216 : ! update the determinant
217 1078 : det = det*EXP(trace)
218 :
219 1078 : occ_matrix = dbcsr_get_occupation(tmp1)
220 1078 : maxnorm = dbcsr_maxabs(tmp1)
221 :
222 1078 : t2 = m_walltime()
223 :
224 1078 : IF (unit_nr > 0) THEN
225 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') &
226 539 : "Determinant iter", i, occ_matrix, &
227 539 : det, t2 - t1, &
228 1078 : flop1/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
229 539 : CALL m_flush(unit_nr)
230 : END IF
231 :
232 : ! exit if the trace is close to zero
233 2156 : IF (maxnorm < threshold) EXIT
234 :
235 : END DO ! end iterations
236 :
237 82 : IF (unit_nr > 0) THEN
238 41 : WRITE (unit_nr, '()')
239 41 : CALL m_flush(unit_nr)
240 : END IF
241 :
242 : END IF ! decide to do sqrt or not
243 :
244 132 : IF (unit_nr > 0) THEN
245 66 : IF (recursion_depth .EQ. 0) THEN
246 : WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
247 41 : "Final determinant:", det
248 41 : WRITE (unit_nr, '()')
249 : ELSE
250 : WRITE (unit_nr, '(T6,A28,1X,F15.10)') &
251 25 : "Recursive determinant:", det
252 : END IF
253 66 : CALL m_flush(unit_nr)
254 : END IF
255 :
256 132 : CALL dbcsr_release(tmp1)
257 132 : CALL dbcsr_release(tmp2)
258 132 : CALL dbcsr_release(tmp3)
259 :
260 132 : CALL timestop(handle)
261 :
262 132 : END SUBROUTINE determinant
263 :
264 : ! **************************************************************************************************
265 : !> \brief invert a symmetric positive definite diagonally dominant matrix
266 : !> \param matrix_inverse ...
267 : !> \param matrix ...
268 : !> \param threshold convergence threshold nased on the max abs
269 : !> \param use_inv_as_guess logical whether input can be used as guess for inverse
270 : !> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions
271 : !> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres
272 : !> \param accelerator_order ...
273 : !> \param max_iter_lanczos ...
274 : !> \param eps_lanczos ...
275 : !> \param silent ...
276 : !> \par History
277 : !> 2010.10 created [Joost VandeVondele]
278 : !> 2011.10 guess option added [Rustam Z Khaliullin]
279 : !> \author Joost VandeVondele
280 : ! **************************************************************************************************
281 26 : SUBROUTINE invert_Taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, &
282 : norm_convergence, filter_eps, accelerator_order, &
283 : max_iter_lanczos, eps_lanczos, silent)
284 :
285 : TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix
286 : REAL(KIND=dp), INTENT(IN) :: threshold
287 : LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess
288 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps
289 : INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos
290 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_lanczos
291 : LOGICAL, INTENT(IN), OPTIONAL :: silent
292 :
293 : CHARACTER(LEN=*), PARAMETER :: routineN = 'invert_Taylor'
294 :
295 : INTEGER :: accelerator_type, handle, i, &
296 : my_max_iter_lanczos, nrows, unit_nr
297 : INTEGER(KIND=int_8) :: flop2
298 : LOGICAL :: converged, use_inv_guess
299 : REAL(KIND=dp) :: coeff, convergence, maxnorm_matrix, &
300 : my_eps_lanczos, occ_matrix, t1, t2
301 26 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: p_diagonal
302 : TYPE(cp_logger_type), POINTER :: logger
303 : TYPE(dbcsr_type), TARGET :: tmp1, tmp2, tmp3_sym
304 :
305 26 : CALL timeset(routineN, handle)
306 :
307 26 : logger => cp_get_default_logger()
308 26 : IF (logger%para_env%is_source()) THEN
309 13 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
310 : ELSE
311 13 : unit_nr = -1
312 : END IF
313 26 : IF (PRESENT(silent)) THEN
314 26 : IF (silent) unit_nr = -1
315 : END IF
316 :
317 26 : convergence = threshold
318 26 : IF (PRESENT(norm_convergence)) convergence = norm_convergence
319 :
320 26 : accelerator_type = 0
321 26 : IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order
322 0 : IF (accelerator_type .GT. 1) accelerator_type = 1
323 :
324 26 : use_inv_guess = .FALSE.
325 26 : IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
326 :
327 26 : my_max_iter_lanczos = 64
328 26 : my_eps_lanczos = 1.0E-3_dp
329 26 : IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
330 26 : IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
331 :
332 26 : CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
333 26 : CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
334 26 : CALL dbcsr_create(tmp3_sym, template=matrix_inverse)
335 :
336 26 : CALL dbcsr_get_info(matrix, nfullrows_total=nrows)
337 78 : ALLOCATE (p_diagonal(nrows))
338 :
339 : ! generate the initial guess
340 26 : IF (.NOT. use_inv_guess) THEN
341 :
342 26 : SELECT CASE (accelerator_type)
343 : CASE (0)
344 : ! use tmp1 to hold off-diagonal elements
345 26 : CALL dbcsr_desymmetrize(matrix, tmp1)
346 858 : p_diagonal(:) = 0.0_dp
347 26 : CALL dbcsr_set_diag(tmp1, p_diagonal)
348 : !CALL dbcsr_print(tmp1)
349 : ! invert the main diagonal
350 26 : CALL dbcsr_get_diag(matrix, p_diagonal)
351 858 : DO i = 1, nrows
352 858 : IF (p_diagonal(i) .NE. 0.0_dp) THEN
353 416 : p_diagonal(i) = 1.0_dp/p_diagonal(i)
354 : END IF
355 : END DO
356 26 : CALL dbcsr_set(matrix_inverse, 0.0_dp)
357 26 : CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
358 26 : CALL dbcsr_set_diag(matrix_inverse, p_diagonal)
359 : CASE DEFAULT
360 26 : CPABORT("Illegal accelerator order")
361 : END SELECT
362 :
363 : ELSE
364 :
365 0 : CPABORT("Guess is NYI")
366 :
367 : END IF
368 :
369 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, &
370 26 : 0.0_dp, tmp2, filter_eps=filter_eps)
371 :
372 26 : IF (unit_nr > 0) WRITE (unit_nr, *)
373 :
374 : ! scale the approximate inverse to be within the convergence radius
375 26 : t1 = m_walltime()
376 :
377 : ! done with the initial guess, start iterations
378 26 : converged = .FALSE.
379 26 : CALL dbcsr_desymmetrize(matrix_inverse, tmp1)
380 26 : coeff = 1.0_dp
381 284 : DO i = 1, 100
382 :
383 : ! coeff = +/- 1
384 284 : coeff = -1.0_dp*coeff
385 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, 0.0_dp, &
386 : tmp3_sym, &
387 284 : flop=flop2, filter_eps=filter_eps)
388 : !flop=flop2)
389 284 : CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff)
390 284 : CALL dbcsr_release(tmp1)
391 284 : CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
392 284 : CALL dbcsr_desymmetrize(tmp3_sym, tmp1)
393 :
394 : ! for the convergence check
395 284 : maxnorm_matrix = dbcsr_maxabs(tmp3_sym)
396 :
397 284 : t2 = m_walltime()
398 284 : occ_matrix = dbcsr_get_occupation(matrix_inverse)
399 :
400 284 : IF (unit_nr > 0) THEN
401 142 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Taylor iter", i, occ_matrix, &
402 142 : maxnorm_matrix, t2 - t1, &
403 284 : flop2/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
404 142 : CALL m_flush(unit_nr)
405 : END IF
406 :
407 284 : IF (maxnorm_matrix < convergence) THEN
408 : converged = .TRUE.
409 : EXIT
410 : END IF
411 :
412 258 : t1 = m_walltime()
413 :
414 : END DO
415 :
416 : !last convergence check
417 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, &
418 26 : filter_eps=filter_eps)
419 26 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
420 : !frob_matrix = dbcsr_frobenius_norm(tmp1)
421 26 : maxnorm_matrix = dbcsr_maxabs(tmp1)
422 26 : IF (unit_nr > 0) THEN
423 13 : WRITE (unit_nr, '(T6,A,E12.5)') "Final Taylor error", maxnorm_matrix
424 13 : WRITE (unit_nr, '()')
425 13 : CALL m_flush(unit_nr)
426 : END IF
427 26 : IF (maxnorm_matrix > convergence) THEN
428 0 : converged = .FALSE.
429 0 : IF (unit_nr > 0) THEN
430 0 : WRITE (unit_nr, *) 'Final convergence check failed'
431 : END IF
432 : END IF
433 :
434 26 : IF (.NOT. converged) THEN
435 0 : CPABORT("Taylor inversion did not converge")
436 : END IF
437 :
438 26 : CALL dbcsr_release(tmp1)
439 26 : CALL dbcsr_release(tmp2)
440 26 : CALL dbcsr_release(tmp3_sym)
441 :
442 26 : DEALLOCATE (p_diagonal)
443 :
444 26 : CALL timestop(handle)
445 :
446 52 : END SUBROUTINE invert_Taylor
447 :
448 : ! **************************************************************************************************
449 : !> \brief invert a symmetric positive definite matrix by Hotelling's method
450 : !> explicit symmetrization makes this code not suitable for other matrix types
451 : !> Currently a bit messy with the options, to to be cleaned soon
452 : !> \param matrix_inverse ...
453 : !> \param matrix ...
454 : !> \param threshold convergence threshold nased on the max abs
455 : !> \param use_inv_as_guess logical whether input can be used as guess for inverse
456 : !> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions
457 : !> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres
458 : !> \param accelerator_order ...
459 : !> \param max_iter_lanczos ...
460 : !> \param eps_lanczos ...
461 : !> \param silent ...
462 : !> \par History
463 : !> 2010.10 created [Joost VandeVondele]
464 : !> 2011.10 guess option added [Rustam Z Khaliullin]
465 : !> \author Joost VandeVondele
466 : ! **************************************************************************************************
467 2032 : SUBROUTINE invert_Hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, &
468 : norm_convergence, filter_eps, accelerator_order, &
469 : max_iter_lanczos, eps_lanczos, silent)
470 :
471 : TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix
472 : REAL(KIND=dp), INTENT(IN) :: threshold
473 : LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess
474 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps
475 : INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos
476 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_lanczos
477 : LOGICAL, INTENT(IN), OPTIONAL :: silent
478 :
479 : CHARACTER(LEN=*), PARAMETER :: routineN = 'invert_Hotelling'
480 :
481 : INTEGER :: accelerator_type, handle, i, &
482 : my_max_iter_lanczos, unit_nr
483 : INTEGER(KIND=int_8) :: flop1, flop2
484 : LOGICAL :: arnoldi_converged, converged, &
485 : use_inv_guess
486 : REAL(KIND=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, &
487 : my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2
488 : TYPE(cp_logger_type), POINTER :: logger
489 : TYPE(dbcsr_type), TARGET :: tmp1, tmp2
490 :
491 : !TYPE(arnoldi_env_type) :: arnoldi_env
492 : !TYPE(dbcsr_p_type), DIMENSION(1) :: mymat
493 :
494 2032 : CALL timeset(routineN, handle)
495 :
496 2032 : logger => cp_get_default_logger()
497 2032 : IF (logger%para_env%is_source()) THEN
498 1016 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
499 : ELSE
500 1016 : unit_nr = -1
501 : END IF
502 2032 : IF (PRESENT(silent)) THEN
503 2014 : IF (silent) unit_nr = -1
504 : END IF
505 :
506 2032 : convergence = threshold
507 2032 : IF (PRESENT(norm_convergence)) convergence = norm_convergence
508 :
509 2032 : accelerator_type = 1
510 2032 : IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order
511 1436 : IF (accelerator_type .GT. 1) accelerator_type = 1
512 :
513 2032 : use_inv_guess = .FALSE.
514 2032 : IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess
515 :
516 2032 : my_max_iter_lanczos = 64
517 2032 : my_eps_lanczos = 1.0E-3_dp
518 2032 : IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos
519 2032 : IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos
520 :
521 2032 : my_filter_eps = threshold
522 2032 : IF (PRESENT(filter_eps)) my_filter_eps = filter_eps
523 :
524 : ! generate the initial guess
525 2032 : IF (.NOT. use_inv_guess) THEN
526 :
527 0 : SELECT CASE (accelerator_type)
528 : CASE (0)
529 0 : gershgorin_norm = dbcsr_gershgorin_norm(matrix)
530 0 : frob_matrix = dbcsr_frobenius_norm(matrix)
531 0 : CALL dbcsr_set(matrix_inverse, 0.0_dp)
532 0 : CALL dbcsr_add_on_diag(matrix_inverse, 1/MIN(gershgorin_norm, frob_matrix))
533 : CASE (1)
534 : ! initialize matrix to unity and use arnoldi (below) to scale it into the convergence range
535 1558 : CALL dbcsr_set(matrix_inverse, 0.0_dp)
536 1558 : CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp)
537 : CASE DEFAULT
538 1558 : CPABORT("Illegal accelerator order")
539 : END SELECT
540 :
541 : ! everything commutes, therefore our all products will be symmetric
542 1558 : CALL dbcsr_create(tmp1, template=matrix_inverse)
543 :
544 : ELSE
545 :
546 : ! It is unlikely that our guess will commute with the matrix, therefore the first product will
547 : ! be non symmetric
548 474 : CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry)
549 :
550 : END IF
551 :
552 2032 : CALL dbcsr_create(tmp2, template=matrix_inverse)
553 :
554 2032 : IF (unit_nr > 0) WRITE (unit_nr, *)
555 :
556 : ! scale the approximate inverse to be within the convergence radius
557 2032 : t1 = m_walltime()
558 :
559 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, &
560 2032 : 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
561 :
562 2032 : IF (accelerator_type == 1) THEN
563 :
564 : ! scale the matrix to get into the convergence range
565 : CALL arnoldi_extremal(tmp1, max_eV, min_eV, threshold=my_eps_lanczos, &
566 2032 : max_iter=my_max_iter_lanczos, converged=arnoldi_converged)
567 : !mymat(1)%matrix => tmp1
568 : !CALL setup_arnoldi_env(arnoldi_env, mymat, max_iter=30, threshold=1.0E-3_dp, selection_crit=1, &
569 : ! nval_request=2, nrestarts=2, generalized_ev=.FALSE., iram=.TRUE.)
570 : !CALL arnoldi_ev(mymat, arnoldi_env)
571 : !max_eV = REAL(get_selected_ritz_val(arnoldi_env, 2), dp)
572 : !min_eV = REAL(get_selected_ritz_val(arnoldi_env, 1), dp)
573 : !CALL deallocate_arnoldi_env(arnoldi_env)
574 :
575 2032 : IF (unit_nr > 0) THEN
576 768 : WRITE (unit_nr, *)
577 768 : WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", my_eps_lanczos
578 768 : WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_eV, min_eV
579 768 : WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_eV/MAX(min_eV, EPSILON(min_eV))
580 : END IF
581 :
582 : ! 2.0 would be the correct scaling however, we should make sure here, that we are in the convergence radius
583 2032 : scalingf = 1.9_dp/(max_eV + min_eV)
584 2032 : CALL dbcsr_scale(tmp1, scalingf)
585 2032 : CALL dbcsr_scale(matrix_inverse, scalingf)
586 2032 : min_ev = min_ev*scalingf
587 :
588 : END IF
589 :
590 : ! done with the initial guess, start iterations
591 2032 : converged = .FALSE.
592 8998 : DO i = 1, 100
593 :
594 : ! tmp1 = S^-1 S
595 :
596 : ! for the convergence check
597 8998 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
598 8998 : maxnorm_matrix = dbcsr_maxabs(tmp1)
599 8998 : CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
600 :
601 : ! tmp2 = S^-1 S S^-1
602 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, &
603 8998 : flop=flop2, filter_eps=my_filter_eps)
604 : ! S^-1_{n+1} = 2 S^-1 - S^-1 S S^-1
605 8998 : CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp)
606 :
607 8998 : CALL dbcsr_filter(matrix_inverse, my_filter_eps)
608 8998 : t2 = m_walltime()
609 8998 : occ_matrix = dbcsr_get_occupation(matrix_inverse)
610 :
611 : ! use the scalar form of the algorithm to trace the EV
612 8998 : IF (accelerator_type == 1) THEN
613 8998 : min_ev = min_ev*(2.0_dp - min_ev)
614 8998 : IF (PRESENT(norm_convergence)) maxnorm_matrix = ABS(min_eV - 1.0_dp)
615 : END IF
616 :
617 8998 : IF (unit_nr > 0) THEN
618 3718 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Hotelling iter", i, occ_matrix, &
619 3718 : maxnorm_matrix, t2 - t1, &
620 7436 : (flop1 + flop2)/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
621 3718 : CALL m_flush(unit_nr)
622 : END IF
623 :
624 8998 : IF (maxnorm_matrix < convergence) THEN
625 : converged = .TRUE.
626 : EXIT
627 : END IF
628 :
629 : ! scale the matrix for improved convergence
630 6966 : IF (accelerator_type == 1) THEN
631 6966 : min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp)
632 6966 : CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp))
633 : END IF
634 :
635 6966 : t1 = m_walltime()
636 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, &
637 6966 : 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps)
638 :
639 : END DO
640 :
641 2032 : IF (.NOT. converged) THEN
642 0 : CPABORT("Hotelling inversion did not converge")
643 : END IF
644 :
645 : ! try to symmetrize the output matrix
646 2032 : IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry) THEN
647 100 : CALL dbcsr_transposed(tmp2, matrix_inverse)
648 2132 : CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp)
649 : END IF
650 :
651 2032 : IF (unit_nr > 0) THEN
652 : ! WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.8,E12.3)') "Final Hotelling ",i,occ_matrix,&
653 : ! !frob_matrix/frob_matrix_base
654 : ! maxnorm_matrix
655 768 : WRITE (unit_nr, '()')
656 768 : CALL m_flush(unit_nr)
657 : END IF
658 :
659 2032 : CALL dbcsr_release(tmp1)
660 2032 : CALL dbcsr_release(tmp2)
661 :
662 2032 : CALL timestop(handle)
663 :
664 2032 : END SUBROUTINE invert_Hotelling
665 :
666 : ! **************************************************************************************************
667 : !> \brief compute the sign a matrix using Newton-Schulz iterations
668 : !> \param matrix_sign ...
669 : !> \param matrix ...
670 : !> \param threshold ...
671 : !> \param sign_order ...
672 : !> \param iounit ...
673 : !> \par History
674 : !> 2010.10 created [Joost VandeVondele]
675 : !> 2019.05 extended to order byxond 2 [Robert Schade]
676 : !> \author Joost VandeVondele, Robert Schade
677 : ! **************************************************************************************************
678 1058 : SUBROUTINE matrix_sign_Newton_Schulz(matrix_sign, matrix, threshold, sign_order, iounit)
679 :
680 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
681 : REAL(KIND=dp), INTENT(IN) :: threshold
682 : INTEGER, INTENT(IN), OPTIONAL :: sign_order, iounit
683 :
684 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_Newton_Schulz'
685 :
686 : INTEGER :: count, handle, i, order, unit_nr
687 : INTEGER(KIND=int_8) :: flops
688 : REAL(KIND=dp) :: a0, a1, a2, a3, a4, a5, floptot, &
689 : frob_matrix, frob_matrix_base, &
690 : gersh_matrix, occ_matrix, prefactor, &
691 : t1, t2
692 : TYPE(cp_logger_type), POINTER :: logger
693 : TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4
694 :
695 1058 : CALL timeset(routineN, handle)
696 :
697 1058 : IF (PRESENT(iounit)) THEN
698 1058 : unit_nr = iounit
699 : ELSE
700 0 : logger => cp_get_default_logger()
701 0 : IF (logger%para_env%is_source()) THEN
702 0 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
703 : ELSE
704 0 : unit_nr = -1
705 : END IF
706 : END IF
707 :
708 1058 : IF (PRESENT(sign_order)) THEN
709 1058 : order = sign_order
710 : ELSE
711 : order = 2
712 : END IF
713 :
714 1058 : CALL dbcsr_create(tmp1, template=matrix_sign)
715 :
716 1058 : CALL dbcsr_create(tmp2, template=matrix_sign)
717 1058 : IF (ABS(order) .GE. 4) THEN
718 8 : CALL dbcsr_create(tmp3, template=matrix_sign)
719 : END IF
720 8 : IF (ABS(order) .GT. 4) THEN
721 6 : CALL dbcsr_create(tmp4, template=matrix_sign)
722 : END IF
723 :
724 1058 : CALL dbcsr_copy(matrix_sign, matrix)
725 1058 : CALL dbcsr_filter(matrix_sign, threshold)
726 :
727 : ! scale the matrix to get into the convergence range
728 1058 : frob_matrix = dbcsr_frobenius_norm(matrix_sign)
729 1058 : gersh_matrix = dbcsr_gershgorin_norm(matrix_sign)
730 1058 : CALL dbcsr_scale(matrix_sign, 1/MIN(frob_matrix, gersh_matrix))
731 :
732 1058 : IF (unit_nr > 0) WRITE (unit_nr, *)
733 :
734 1058 : count = 0
735 13202 : DO i = 1, 100
736 13202 : floptot = 0_dp
737 13202 : t1 = m_walltime()
738 : ! tmp1 = X * X
739 : CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
740 13202 : filter_eps=threshold, flop=flops)
741 13202 : floptot = floptot + flops
742 :
743 : ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
744 13202 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
745 13202 : CALL dbcsr_add_on_diag(tmp1, +1.0_dp)
746 13202 : frob_matrix = dbcsr_frobenius_norm(tmp1)
747 :
748 : ! f(y) approx 1/sqrt(1-y)
749 : ! f(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6
750 : ! f2(y)=1+y/2=1/2*(2+y)
751 : ! f3(y)=1+y/2+3/8*y^2=3/8*(8/3+4/3*y+y^2)
752 : ! f4(y)=1+y/2+3/8*y^2+5/16*y^3=5/16*(16/5+8/5*y+6/5*y^2+y^3)
753 : ! f5(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4=35/128*(128/35+128/70*y+48/35*y^2+8/7*y^3+y^4)
754 : ! z(y)=(y+a_0)*y+a_1
755 : ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3)
756 : ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4)
757 : ! a_0=1/14
758 : ! a_1=23819/13720
759 : ! a_2=1269/980-2a_1=-3734/1715
760 : ! a_3=832591127/188238400
761 : ! f6(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5
762 : ! =63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5)
763 : ! f7(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6
764 : ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6)
765 : ! z(y)=(y+a_0)*y+a_1
766 : ! w(y)=(y+a_2)*z(y)+a_3
767 : ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5
768 : ! a_0= 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422
769 : ! a_1= 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568
770 : ! a_2=-1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968
771 : ! a_3= 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453
772 : ! a_4=-3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667
773 : ! a_5= 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578
774 : !
775 : ! y=1-X*X
776 :
777 : ! tmp1 = I-x*x
778 : IF (order .EQ. 2) THEN
779 13156 : prefactor = 0.5_dp
780 :
781 : ! update the above to 3*I-X*X
782 13156 : CALL dbcsr_add_on_diag(tmp1, +2.0_dp)
783 13156 : occ_matrix = dbcsr_get_occupation(matrix_sign)
784 : ELSE IF (order .EQ. 3) THEN
785 : ! with one multiplication
786 : ! tmp1=y
787 12 : CALL dbcsr_copy(tmp2, tmp1)
788 12 : CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp)
789 12 : CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp)
790 :
791 : ! tmp2=y^2
792 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, &
793 12 : filter_eps=threshold, flop=flops)
794 12 : floptot = floptot + flops
795 12 : prefactor = 3.0_dp/8.0_dp
796 :
797 : ELSE IF (order .EQ. 4) THEN
798 : ! with two multiplications
799 : ! tmp1=y
800 10 : CALL dbcsr_copy(tmp3, tmp1)
801 10 : CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp)
802 10 : CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp)
803 :
804 : !
805 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
806 10 : filter_eps=threshold, flop=flops)
807 10 : floptot = floptot + flops
808 :
809 10 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp)
810 :
811 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
812 10 : filter_eps=threshold, flop=flops)
813 10 : floptot = floptot + flops
814 :
815 10 : prefactor = 5.0_dp/16.0_dp
816 : ELSE IF (order .EQ. -5) THEN
817 : ! with three multiplications
818 : ! tmp1=y
819 0 : CALL dbcsr_copy(tmp3, tmp1)
820 0 : CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp)
821 0 : CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp)
822 :
823 : !
824 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
825 0 : filter_eps=threshold, flop=flops)
826 0 : floptot = floptot + flops
827 :
828 0 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp)
829 :
830 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
831 0 : filter_eps=threshold, flop=flops)
832 0 : floptot = floptot + flops
833 :
834 0 : CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 8.0_dp/7.0_dp)
835 :
836 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp3, 1.0_dp, tmp1, &
837 0 : filter_eps=threshold, flop=flops)
838 0 : floptot = floptot + flops
839 :
840 0 : prefactor = 35.0_dp/128.0_dp
841 : ELSE IF (order .EQ. 5) THEN
842 : ! with two multiplications
843 : ! z(y)=(y+a_0)*y+a_1
844 : ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3)
845 : ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4)
846 : ! a_0=1/14
847 : ! a_1=23819/13720
848 : ! a_2=1269/980-2a_1=-3734/1715
849 : ! a_3=832591127/188238400
850 8 : a0 = 1.0_dp/14.0_dp
851 8 : a1 = 23819.0_dp/13720.0_dp
852 8 : a2 = -3734_dp/1715.0_dp
853 8 : a3 = 832591127_dp/188238400.0_dp
854 :
855 : ! tmp1=y
856 : ! tmp3=z
857 8 : CALL dbcsr_copy(tmp3, tmp1)
858 8 : CALL dbcsr_add_on_diag(tmp3, a0)
859 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
860 8 : filter_eps=threshold, flop=flops)
861 8 : floptot = floptot + flops
862 8 : CALL dbcsr_add_on_diag(tmp2, a1)
863 :
864 8 : CALL dbcsr_add_on_diag(tmp1, a2)
865 8 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
866 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, 0.0_dp, tmp3, &
867 8 : filter_eps=threshold, flop=flops)
868 8 : floptot = floptot + flops
869 8 : CALL dbcsr_add_on_diag(tmp3, a3)
870 8 : CALL dbcsr_copy(tmp1, tmp3)
871 :
872 8 : prefactor = 35.0_dp/128.0_dp
873 : ELSE IF (order .EQ. 6) THEN
874 : ! with four multiplications
875 : ! f6(y)=63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5)
876 : ! tmp1=y
877 8 : CALL dbcsr_copy(tmp3, tmp1)
878 8 : CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp)
879 8 : CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp)
880 :
881 : !
882 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, &
883 8 : filter_eps=threshold, flop=flops)
884 8 : floptot = floptot + flops
885 :
886 8 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp)
887 :
888 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp4, &
889 8 : filter_eps=threshold, flop=flops)
890 8 : floptot = floptot + flops
891 :
892 8 : CALL dbcsr_add(tmp1, tmp4, 1.0_dp, 80.0_dp/63.0_dp)
893 :
894 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp2, &
895 8 : filter_eps=threshold, flop=flops)
896 8 : floptot = floptot + flops
897 :
898 8 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp)
899 :
900 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, &
901 8 : filter_eps=threshold, flop=flops)
902 8 : floptot = floptot + flops
903 :
904 8 : prefactor = 63.0_dp/256.0_dp
905 : ELSE IF (order .EQ. 7) THEN
906 : ! with three multiplications
907 :
908 8 : a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp
909 8 : a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp
910 8 : a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp
911 8 : a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp
912 8 : a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp
913 8 : a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp
914 : ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6)
915 : ! z(y)=(y+a_0)*y+a_1
916 : ! w(y)=(y+a_2)*z(y)+a_3
917 : ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5
918 :
919 : ! tmp1=y
920 : ! tmp3=z
921 8 : CALL dbcsr_copy(tmp3, tmp1)
922 8 : CALL dbcsr_add_on_diag(tmp3, a0)
923 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp2, &
924 8 : filter_eps=threshold, flop=flops)
925 8 : floptot = floptot + flops
926 8 : CALL dbcsr_add_on_diag(tmp2, a1)
927 :
928 : ! tmp4=w
929 8 : CALL dbcsr_copy(tmp4, tmp1)
930 8 : CALL dbcsr_add_on_diag(tmp4, a2)
931 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp2, 0.0_dp, tmp3, &
932 8 : filter_eps=threshold, flop=flops)
933 8 : floptot = floptot + flops
934 8 : CALL dbcsr_add_on_diag(tmp3, a3)
935 :
936 8 : CALL dbcsr_add(tmp2, tmp3, 1.0_dp, 1.0_dp)
937 8 : CALL dbcsr_add_on_diag(tmp2, a4)
938 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
939 8 : filter_eps=threshold, flop=flops)
940 8 : floptot = floptot + flops
941 8 : CALL dbcsr_add_on_diag(tmp1, a5)
942 :
943 8 : prefactor = 231.0_dp/1024.0_dp
944 : ELSE
945 0 : CPABORT("requested order is not implemented.")
946 : END IF
947 :
948 : ! tmp2 = X * prefactor *
949 : CALL dbcsr_multiply("N", "N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, &
950 13202 : filter_eps=threshold, flop=flops)
951 13202 : floptot = floptot + flops
952 :
953 : ! done iterating
954 : ! CALL dbcsr_filter(tmp2,threshold)
955 13202 : CALL dbcsr_copy(matrix_sign, tmp2)
956 13202 : t2 = m_walltime()
957 :
958 13202 : occ_matrix = dbcsr_get_occupation(matrix_sign)
959 :
960 13202 : IF (unit_nr > 0) THEN
961 0 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sign iter ", i, occ_matrix, &
962 0 : frob_matrix/frob_matrix_base, t2 - t1, &
963 0 : floptot/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
964 0 : CALL m_flush(unit_nr)
965 : END IF
966 :
967 : ! frob_matrix/frob_matrix_base < SQRT(threshold)
968 13202 : IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base)) EXIT
969 :
970 : END DO
971 :
972 : ! this check is not really needed
973 : CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
974 1058 : filter_eps=threshold)
975 1058 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
976 1058 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
977 1058 : frob_matrix = dbcsr_frobenius_norm(tmp1)
978 1058 : occ_matrix = dbcsr_get_occupation(matrix_sign)
979 1058 : IF (unit_nr > 0) THEN
980 0 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sign iter", i, occ_matrix, &
981 0 : frob_matrix/frob_matrix_base
982 0 : WRITE (unit_nr, '()')
983 0 : CALL m_flush(unit_nr)
984 : END IF
985 :
986 1058 : CALL dbcsr_release(tmp1)
987 1058 : CALL dbcsr_release(tmp2)
988 1058 : IF (ABS(order) .GE. 4) THEN
989 8 : CALL dbcsr_release(tmp3)
990 : END IF
991 8 : IF (ABS(order) .GT. 4) THEN
992 6 : CALL dbcsr_release(tmp4)
993 : END IF
994 :
995 1058 : CALL timestop(handle)
996 :
997 1058 : END SUBROUTINE matrix_sign_Newton_Schulz
998 :
999 : ! **************************************************************************************************
1000 : !> \brief compute the sign a matrix using the general algorithm for the p-th root of Richters et al.
1001 : !> Commun. Comput. Phys., 25 (2019), pp. 564-585.
1002 : !> \param matrix_sign ...
1003 : !> \param matrix ...
1004 : !> \param threshold ...
1005 : !> \param sign_order ...
1006 : !> \par History
1007 : !> 2019.03 created [Robert Schade]
1008 : !> \author Robert Schade
1009 : ! **************************************************************************************************
1010 16 : SUBROUTINE matrix_sign_proot(matrix_sign, matrix, threshold, sign_order)
1011 :
1012 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1013 : REAL(KIND=dp), INTENT(IN) :: threshold
1014 : INTEGER, INTENT(IN), OPTIONAL :: sign_order
1015 :
1016 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_proot'
1017 :
1018 : INTEGER :: handle, order, unit_nr
1019 : INTEGER(KIND=int_8) :: flop0, flop1, flop2
1020 : LOGICAL :: converged, symmetrize
1021 : REAL(KIND=dp) :: frob_matrix, frob_matrix_base, occ_matrix
1022 : TYPE(cp_logger_type), POINTER :: logger
1023 : TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, &
1024 : tmp1, tmp2
1025 :
1026 8 : CALL cite_reference(Richters2018)
1027 :
1028 8 : CALL timeset(routineN, handle)
1029 :
1030 8 : logger => cp_get_default_logger()
1031 8 : IF (logger%para_env%is_source()) THEN
1032 4 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1033 : ELSE
1034 4 : unit_nr = -1
1035 : END IF
1036 :
1037 8 : IF (PRESENT(sign_order)) THEN
1038 8 : order = sign_order
1039 : ELSE
1040 0 : order = 2
1041 : END IF
1042 :
1043 8 : CALL dbcsr_create(tmp1, template=matrix_sign)
1044 :
1045 8 : CALL dbcsr_create(tmp2, template=matrix_sign)
1046 :
1047 8 : CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1048 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, &
1049 8 : filter_eps=threshold, flop=flop0)
1050 : !CALL dbcsr_filter(matrix2, threshold)
1051 :
1052 : !CALL dbcsr_copy(matrix_sign, matrix)
1053 : !CALL dbcsr_filter(matrix_sign, threshold)
1054 :
1055 8 : IF (unit_nr > 0) WRITE (unit_nr, *)
1056 :
1057 8 : CALL dbcsr_create(matrix_sqrt, template=matrix2)
1058 8 : CALL dbcsr_create(matrix_sqrt_inv, template=matrix2)
1059 8 : IF (unit_nr > 0) WRITE (unit_nr, *) "Threshold=", threshold
1060 :
1061 8 : symmetrize = .FALSE.
1062 : CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, &
1063 8 : 0.01_dp, 100, symmetrize, converged)
1064 :
1065 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, &
1066 8 : filter_eps=threshold, flop=flop1)
1067 :
1068 : ! this check is not really needed
1069 : CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, &
1070 8 : filter_eps=threshold, flop=flop2)
1071 8 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1072 8 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1073 8 : frob_matrix = dbcsr_frobenius_norm(tmp1)
1074 8 : occ_matrix = dbcsr_get_occupation(matrix_sign)
1075 8 : IF (unit_nr > 0) THEN
1076 4 : WRITE (unit_nr, '(T6,A,F10.8,E12.3)') "Final proot sign iter", occ_matrix, &
1077 8 : frob_matrix/frob_matrix_base
1078 4 : WRITE (unit_nr, '()')
1079 4 : CALL m_flush(unit_nr)
1080 : END IF
1081 :
1082 8 : CALL dbcsr_release(tmp1)
1083 8 : CALL dbcsr_release(tmp2)
1084 8 : CALL dbcsr_release(matrix2)
1085 8 : CALL dbcsr_release(matrix_sqrt)
1086 8 : CALL dbcsr_release(matrix_sqrt_inv)
1087 :
1088 8 : CALL timestop(handle)
1089 :
1090 8 : END SUBROUTINE matrix_sign_proot
1091 :
1092 : ! **************************************************************************************************
1093 : !> \brief compute the sign of a dense matrix using Newton-Schulz iterations
1094 : !> \param matrix_sign ...
1095 : !> \param matrix ...
1096 : !> \param matrix_id ...
1097 : !> \param threshold ...
1098 : !> \param sign_order ...
1099 : !> \author Michael Lass, Robert Schade
1100 : ! **************************************************************************************************
1101 2 : SUBROUTINE dense_matrix_sign_Newton_Schulz(matrix_sign, matrix, matrix_id, threshold, sign_order)
1102 :
1103 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: matrix_sign
1104 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: matrix
1105 : INTEGER, INTENT(IN), OPTIONAL :: matrix_id
1106 : REAL(KIND=dp), INTENT(IN) :: threshold
1107 : INTEGER, INTENT(IN), OPTIONAL :: sign_order
1108 :
1109 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dense_matrix_sign_Newton_Schulz'
1110 :
1111 : INTEGER :: handle, i, j, sz, unit_nr
1112 : LOGICAL :: converged
1113 : REAL(KIND=dp) :: frob_matrix, frob_matrix_base, &
1114 : gersh_matrix, prefactor, scaling_factor
1115 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp1, tmp2
1116 : REAL(KIND=dp), DIMENSION(1) :: work
1117 : REAL(KIND=dp), EXTERNAL :: dlange
1118 : TYPE(cp_logger_type), POINTER :: logger
1119 :
1120 2 : CALL timeset(routineN, handle)
1121 :
1122 : ! print output on all ranks
1123 2 : logger => cp_get_default_logger()
1124 2 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1125 :
1126 : ! scale the matrix to get into the convergence range
1127 2 : sz = SIZE(matrix, 1)
1128 2 : frob_matrix = dlange('F', sz, sz, matrix, sz, work) !dbcsr_frobenius_norm(matrix_sign)
1129 2 : gersh_matrix = dlange('1', sz, sz, matrix, sz, work) !dbcsr_gershgorin_norm(matrix_sign)
1130 2 : scaling_factor = 1/MIN(frob_matrix, gersh_matrix)
1131 86 : matrix_sign = matrix*scaling_factor
1132 8 : ALLOCATE (tmp1(sz, sz))
1133 6 : ALLOCATE (tmp2(sz, sz))
1134 :
1135 2 : converged = .FALSE.
1136 14 : DO i = 1, 100
1137 14 : CALL dgemm('N', 'N', sz, sz, sz, -1.0_dp, matrix_sign, sz, matrix_sign, sz, 0.0_dp, tmp1, sz)
1138 :
1139 : ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
1140 14 : frob_matrix_base = dlange('F', sz, sz, tmp1, sz, work)
1141 98 : DO j = 1, sz
1142 98 : tmp1(j, j) = tmp1(j, j) + 1.0_dp
1143 : END DO
1144 14 : frob_matrix = dlange('F', sz, sz, tmp1, sz, work)
1145 :
1146 14 : IF (sign_order .EQ. 2) THEN
1147 8 : prefactor = 0.5_dp
1148 : ! update the above to 3*I-X*X
1149 56 : DO j = 1, sz
1150 56 : tmp1(j, j) = tmp1(j, j) + 2.0_dp
1151 : END DO
1152 6 : ELSE IF (sign_order .EQ. 3) THEN
1153 258 : tmp2(:, :) = tmp1
1154 258 : tmp1 = tmp1*4.0_dp/3.0_dp
1155 42 : DO j = 1, sz
1156 42 : tmp1(j, j) = tmp1(j, j) + 8.0_dp/3.0_dp
1157 : END DO
1158 6 : CALL dgemm('N', 'N', sz, sz, sz, 1.0_dp, tmp2, sz, tmp2, sz, 1.0_dp, tmp1, sz)
1159 6 : prefactor = 3.0_dp/8.0_dp
1160 : ELSE
1161 0 : CPABORT("requested order is not implemented.")
1162 : END IF
1163 :
1164 14 : CALL dgemm('N', 'N', sz, sz, sz, prefactor, matrix_sign, sz, tmp1, sz, 0.0_dp, tmp2, sz)
1165 602 : matrix_sign = tmp2
1166 :
1167 : ! frob_matrix/frob_matrix_base < SQRT(threshold)
1168 14 : IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base)) THEN
1169 : WRITE (unit_nr, '(T6,A,1X,I6,1X,A,1X,I3,E12.3)') &
1170 2 : "Submatrix", matrix_id, "final NS sign iter", i, frob_matrix/frob_matrix_base
1171 2 : CALL m_flush(unit_nr)
1172 : converged = .TRUE.
1173 : EXIT
1174 : END IF
1175 : END DO
1176 :
1177 : IF (.NOT. converged) &
1178 0 : CPABORT("dense_matrix_sign_Newton_Schulz did not converge within 100 iterations")
1179 :
1180 2 : DEALLOCATE (tmp1)
1181 2 : DEALLOCATE (tmp2)
1182 :
1183 2 : CALL timestop(handle)
1184 :
1185 2 : END SUBROUTINE dense_matrix_sign_Newton_Schulz
1186 :
1187 : ! **************************************************************************************************
1188 : !> \brief Perform eigendecomposition of a dense matrix
1189 : !> \param sm ...
1190 : !> \param N ...
1191 : !> \param eigvals ...
1192 : !> \param eigvecs ...
1193 : !> \par History
1194 : !> 2020.05 Extracted from dense_matrix_sign_direct [Michael Lass]
1195 : !> \author Michael Lass, Robert Schade
1196 : ! **************************************************************************************************
1197 4 : SUBROUTINE eigdecomp(sm, N, eigvals, eigvecs)
1198 : INTEGER, INTENT(IN) :: N
1199 : REAL(KIND=dp), INTENT(IN) :: sm(N, N)
1200 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
1201 : INTENT(OUT) :: eigvals
1202 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1203 : INTENT(OUT) :: eigvecs
1204 :
1205 : INTEGER :: info, liwork, lwork
1206 4 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iwork
1207 4 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1208 4 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp
1209 :
1210 24 : ALLOCATE (eigvecs(N, N), tmp(N, N))
1211 12 : ALLOCATE (eigvals(N))
1212 :
1213 : ! symmetrize sm
1214 172 : eigvecs(:, :) = 0.5*(sm + TRANSPOSE(sm))
1215 :
1216 : ! probe optimal sizes for WORK and IWORK
1217 4 : LWORK = -1
1218 4 : LIWORK = -1
1219 4 : ALLOCATE (WORK(1))
1220 4 : ALLOCATE (IWORK(1))
1221 4 : CALL dsyevd('V', 'U', N, eigvecs, N, eigvals, WORK, LWORK, IWORK, LIWORK, INFO)
1222 4 : LWORK = INT(WORK(1))
1223 4 : LIWORK = INT(IWORK(1))
1224 4 : DEALLOCATE (IWORK)
1225 4 : DEALLOCATE (WORK)
1226 :
1227 : ! calculate eigenvalues and eigenvectors
1228 12 : ALLOCATE (WORK(LWORK))
1229 12 : ALLOCATE (IWORK(LIWORK))
1230 4 : CALL dsyevd('V', 'U', N, eigvecs, N, eigvals, WORK, LWORK, IWORK, LIWORK, INFO)
1231 4 : DEALLOCATE (IWORK)
1232 4 : DEALLOCATE (WORK)
1233 4 : IF (INFO .NE. 0) CPABORT("dsyevd did not succeed")
1234 :
1235 4 : DEALLOCATE (tmp)
1236 4 : END SUBROUTINE eigdecomp
1237 :
1238 : ! **************************************************************************************************
1239 : !> \brief Calculate the sign matrix from eigenvalues and eigenvectors of a matrix
1240 : !> \param sm_sign ...
1241 : !> \param eigvals ...
1242 : !> \param eigvecs ...
1243 : !> \param N ...
1244 : !> \param mu_correction ...
1245 : !> \par History
1246 : !> 2020.05 Extracted from dense_matrix_sign_direct [Michael Lass]
1247 : !> \author Michael Lass, Robert Schade
1248 : ! **************************************************************************************************
1249 4 : SUBROUTINE sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, mu_correction)
1250 : INTEGER :: N
1251 : REAL(KIND=dp), INTENT(IN) :: eigvecs(N, N), eigvals(N)
1252 : REAL(KIND=dp), INTENT(INOUT) :: sm_sign(N, N)
1253 : REAL(KIND=dp), INTENT(IN) :: mu_correction
1254 :
1255 : INTEGER :: i
1256 4 : REAL(KIND=dp) :: modified_eigval, tmp(N, N)
1257 :
1258 172 : sm_sign = 0
1259 28 : DO i = 1, N
1260 24 : modified_eigval = eigvals(i) - mu_correction
1261 28 : IF (modified_eigval > 0) THEN
1262 6 : sm_sign(i, i) = 1.0
1263 18 : ELSE IF (modified_eigval < 0) THEN
1264 18 : sm_sign(i, i) = -1.0
1265 : ELSE
1266 0 : sm_sign(i, i) = 0.0
1267 : END IF
1268 : END DO
1269 :
1270 : ! Create matrix with eigenvalues in {-1,0,1} and eigenvectors of sm:
1271 : ! sm_sign = eigvecs * sm_sign * eigvecs.T
1272 4 : CALL dgemm('N', 'N', N, N, N, 1.0_dp, eigvecs, N, sm_sign, N, 0.0_dp, tmp, N)
1273 4 : CALL dgemm('N', 'T', N, N, N, 1.0_dp, tmp, N, eigvecs, N, 0.0_dp, sm_sign, N)
1274 4 : END SUBROUTINE sign_from_eigdecomp
1275 :
1276 : ! **************************************************************************************************
1277 : !> \brief Compute partial trace of a matrix from its eigenvalues and eigenvectors
1278 : !> \param eigvals ...
1279 : !> \param eigvecs ...
1280 : !> \param firstcol ...
1281 : !> \param lastcol ...
1282 : !> \param mu_correction ...
1283 : !> \return ...
1284 : !> \par History
1285 : !> 2020.05 Created [Michael Lass]
1286 : !> \author Michael Lass
1287 : ! **************************************************************************************************
1288 36 : FUNCTION trace_from_eigdecomp(eigvals, eigvecs, firstcol, lastcol, mu_correction) RESULT(trace)
1289 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
1290 : INTENT(IN) :: eigvals
1291 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
1292 : INTENT(IN) :: eigvecs
1293 : INTEGER, INTENT(IN) :: firstcol, lastcol
1294 : REAL(KIND=dp), INTENT(IN) :: mu_correction
1295 : REAL(KIND=dp) :: trace
1296 :
1297 : INTEGER :: i, j, sm_size
1298 : REAL(KIND=dp) :: modified_eigval, tmpsum
1299 36 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: mapped_eigvals
1300 :
1301 36 : sm_size = SIZE(eigvals)
1302 108 : ALLOCATE (mapped_eigvals(sm_size))
1303 :
1304 252 : DO i = 1, sm_size
1305 216 : modified_eigval = eigvals(i) - mu_correction
1306 252 : IF (modified_eigval > 0) THEN
1307 26 : mapped_eigvals(i) = 1.0
1308 190 : ELSE IF (modified_eigval < 0) THEN
1309 190 : mapped_eigvals(i) = -1.0
1310 : ELSE
1311 0 : mapped_eigvals(i) = 0.0
1312 : END IF
1313 : END DO
1314 :
1315 36 : trace = 0.0_dp
1316 252 : DO i = firstcol, lastcol
1317 : tmpsum = 0.0_dp
1318 1512 : DO j = 1, sm_size
1319 1512 : tmpsum = tmpsum + (eigvecs(i, j)*mapped_eigvals(j)*eigvecs(i, j))
1320 : END DO
1321 252 : trace = trace - 0.5_dp*tmpsum + 0.5_dp
1322 : END DO
1323 36 : END FUNCTION trace_from_eigdecomp
1324 :
1325 : ! **************************************************************************************************
1326 : !> \brief Calculate the sign matrix by direct calculation of all eigenvalues and eigenvectors
1327 : !> \param sm_sign ...
1328 : !> \param sm ...
1329 : !> \param N ...
1330 : !> \par History
1331 : !> 2020.02 Created [Michael Lass, Robert Schade]
1332 : !> 2020.05 Extracted eigdecomp and sign_from_eigdecomp [Michael Lass]
1333 : !> \author Michael Lass, Robert Schade
1334 : ! **************************************************************************************************
1335 2 : SUBROUTINE dense_matrix_sign_direct(sm_sign, sm, N)
1336 : INTEGER, INTENT(IN) :: N
1337 : REAL(KIND=dp), INTENT(IN) :: sm(N, N)
1338 : REAL(KIND=dp), INTENT(INOUT) :: sm_sign(N, N)
1339 :
1340 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigvals
1341 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: eigvecs
1342 :
1343 : CALL eigdecomp(sm, N, eigvals, eigvecs)
1344 2 : CALL sign_from_eigdecomp(sm_sign, eigvals, eigvecs, N, 0.0_dp)
1345 :
1346 2 : DEALLOCATE (eigvals, eigvecs)
1347 2 : END SUBROUTINE dense_matrix_sign_direct
1348 :
1349 : ! **************************************************************************************************
1350 : !> \brief Submatrix method
1351 : !> \param matrix_sign ...
1352 : !> \param matrix ...
1353 : !> \param threshold ...
1354 : !> \param sign_order ...
1355 : !> \param submatrix_sign_method ...
1356 : !> \par History
1357 : !> 2019.03 created [Robert Schade]
1358 : !> 2019.06 impl. submatrix method [Michael Lass]
1359 : !> \author Robert Schade, Michael Lass
1360 : ! **************************************************************************************************
1361 6 : SUBROUTINE matrix_sign_submatrix(matrix_sign, matrix, threshold, sign_order, submatrix_sign_method)
1362 :
1363 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1364 : REAL(KIND=dp), INTENT(IN) :: threshold
1365 : INTEGER, INTENT(IN), OPTIONAL :: sign_order
1366 : INTEGER, INTENT(IN) :: submatrix_sign_method
1367 :
1368 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_submatrix'
1369 :
1370 : INTEGER :: handle, i, myrank, nblkcols, order, &
1371 : sm_size, unit_nr
1372 6 : INTEGER, ALLOCATABLE, DIMENSION(:) :: my_sms
1373 6 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: sm, sm_sign
1374 : TYPE(cp_logger_type), POINTER :: logger
1375 : TYPE(dbcsr_distribution_type) :: dist
1376 6 : TYPE(submatrix_dissection_type) :: dissection
1377 :
1378 6 : CALL timeset(routineN, handle)
1379 :
1380 : ! print output on all ranks
1381 6 : logger => cp_get_default_logger()
1382 6 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1383 :
1384 6 : IF (PRESENT(sign_order)) THEN
1385 6 : order = sign_order
1386 : ELSE
1387 0 : order = 2
1388 : END IF
1389 :
1390 6 : CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist)
1391 6 : CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1392 :
1393 6 : CALL dissection%init(matrix)
1394 6 : CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1395 :
1396 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1397 : !$OMP PRIVATE(sm, sm_sign, sm_size) &
1398 6 : !$OMP SHARED(dissection, myrank, my_sms, order, submatrix_sign_method, threshold, unit_nr)
1399 : !$OMP DO SCHEDULE(GUIDED)
1400 : DO i = 1, SIZE(my_sms)
1401 : WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "processing submatrix", my_sms(i)
1402 : CALL dissection%generate_submatrix(my_sms(i), sm)
1403 : sm_size = SIZE(sm, 1)
1404 : ALLOCATE (sm_sign(sm_size, sm_size))
1405 : SELECT CASE (submatrix_sign_method)
1406 : CASE (ls_scf_submatrix_sign_ns)
1407 : CALL dense_matrix_sign_Newton_Schulz(sm_sign, sm, my_sms(i), threshold, order)
1408 : CASE (ls_scf_submatrix_sign_direct, ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
1409 : CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1410 : CASE DEFAULT
1411 : CPABORT("Unkown submatrix sign method.")
1412 : END SELECT
1413 : CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1414 : DEALLOCATE (sm, sm_sign)
1415 : END DO
1416 : !$OMP END DO
1417 : !$OMP END PARALLEL
1418 :
1419 6 : CALL dissection%communicate_results(matrix_sign)
1420 6 : CALL dissection%final
1421 :
1422 6 : CALL timestop(handle)
1423 :
1424 12 : END SUBROUTINE matrix_sign_submatrix
1425 :
1426 : ! **************************************************************************************************
1427 : !> \brief Submatrix method with internal adjustment of chemical potential
1428 : !> \param matrix_sign ...
1429 : !> \param matrix ...
1430 : !> \param mu ...
1431 : !> \param nelectron ...
1432 : !> \param threshold ...
1433 : !> \param variant ...
1434 : !> \par History
1435 : !> 2020.05 Created [Michael Lass]
1436 : !> \author Robert Schade, Michael Lass
1437 : ! **************************************************************************************************
1438 4 : SUBROUTINE matrix_sign_submatrix_mu_adjust(matrix_sign, matrix, mu, nelectron, threshold, variant)
1439 :
1440 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix
1441 : REAL(KIND=dp), INTENT(INOUT) :: mu
1442 : INTEGER, INTENT(IN) :: nelectron
1443 : REAL(KIND=dp), INTENT(IN) :: threshold
1444 : INTEGER, INTENT(IN) :: variant
1445 :
1446 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_submatrix_mu_adjust'
1447 : REAL(KIND=dp), PARAMETER :: initial_increment = 0.01_dp
1448 :
1449 : INTEGER :: handle, i, j, myrank, nblkcols, &
1450 : sm_firstcol, sm_lastcol, sm_size, &
1451 : unit_nr
1452 4 : INTEGER, ALLOCATABLE, DIMENSION(:) :: my_sms
1453 : LOGICAL :: has_mu_high, has_mu_low
1454 : REAL(KIND=dp) :: increment, mu_high, mu_low, new_mu, trace
1455 4 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: sm, sm_sign, tmp
1456 : TYPE(cp_logger_type), POINTER :: logger
1457 : TYPE(dbcsr_distribution_type) :: dist
1458 4 : TYPE(eigbuf), ALLOCATABLE, DIMENSION(:) :: eigbufs
1459 : TYPE(mp_comm_type) :: group
1460 4 : TYPE(submatrix_dissection_type) :: dissection
1461 :
1462 4 : CALL timeset(routineN, handle)
1463 :
1464 : ! print output on all ranks
1465 4 : logger => cp_get_default_logger()
1466 4 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1467 :
1468 4 : CALL dbcsr_get_info(matrix=matrix, nblkcols_total=nblkcols, distribution=dist, group=group)
1469 4 : CALL dbcsr_distribution_get(dist=dist, mynode=myrank)
1470 :
1471 4 : CALL dissection%init(matrix)
1472 4 : CALL dissection%get_sm_ids_for_rank(myrank, my_sms)
1473 :
1474 12 : ALLOCATE (eigbufs(SIZE(my_sms)))
1475 :
1476 : trace = 0.0_dp
1477 :
1478 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1479 : !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j, tmp) &
1480 : !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, threshold, variant) &
1481 4 : !$OMP REDUCTION(+:trace)
1482 : !$OMP DO SCHEDULE(GUIDED)
1483 : DO i = 1, SIZE(my_sms)
1484 : CALL dissection%generate_submatrix(my_sms(i), sm)
1485 : sm_size = SIZE(sm, 1)
1486 : WRITE (unit_nr, *) "Rank", myrank, "processing submatrix", my_sms(i), "size", sm_size
1487 :
1488 : CALL dissection%get_relevant_sm_columns(my_sms(i), sm_firstcol, sm_lastcol)
1489 :
1490 : IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj) THEN
1491 : ! Store all eigenvectors in buffer. We will use it to compute sm_sign at the end.
1492 : CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=eigbufs(i)%eigvecs)
1493 : ELSE
1494 : ! Only store eigenvectors that are required for mu adjustment.
1495 : ! Calculate sm_sign right away in the hope that mu is already correct.
1496 : CALL eigdecomp(sm, sm_size, eigvals=eigbufs(i)%eigvals, eigvecs=tmp)
1497 : ALLOCATE (eigbufs(i)%eigvecs(sm_firstcol:sm_lastcol, 1:sm_size))
1498 : eigbufs(i)%eigvecs(:, :) = tmp(sm_firstcol:sm_lastcol, 1:sm_size)
1499 :
1500 : ALLOCATE (sm_sign(sm_size, sm_size))
1501 : CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, tmp, sm_size, 0.0_dp)
1502 : CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1503 : DEALLOCATE (sm_sign, tmp)
1504 : END IF
1505 :
1506 : DEALLOCATE (sm)
1507 : trace = trace + trace_from_eigdecomp(eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_firstcol, sm_lastcol, 0.0_dp)
1508 : END DO
1509 : !$OMP END DO
1510 : !$OMP END PARALLEL
1511 :
1512 4 : has_mu_low = .FALSE.
1513 4 : has_mu_high = .FALSE.
1514 4 : increment = initial_increment
1515 4 : new_mu = mu
1516 72 : DO i = 1, 30
1517 72 : CALL group%sum(trace)
1518 72 : IF (unit_nr > 0) WRITE (unit_nr, '(T2,A,1X,F13.9,1X,F15.9)') &
1519 72 : "Density matrix: mu, trace error: ", new_mu, trace - nelectron
1520 72 : IF (ABS(trace - nelectron) < 0.5_dp) EXIT
1521 68 : IF (trace < nelectron) THEN
1522 8 : mu_low = new_mu
1523 8 : new_mu = new_mu + increment
1524 8 : has_mu_low = .TRUE.
1525 8 : increment = increment*2
1526 : ELSE
1527 60 : mu_high = new_mu
1528 60 : new_mu = new_mu - increment
1529 60 : has_mu_high = .TRUE.
1530 60 : increment = increment*2
1531 : END IF
1532 :
1533 68 : IF (has_mu_low .AND. has_mu_high) THEN
1534 20 : new_mu = (mu_low + mu_high)/2
1535 20 : IF (ABS(mu_high - mu_low) < threshold) EXIT
1536 : END IF
1537 :
1538 : trace = 0
1539 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1540 : !$OMP PRIVATE(i, sm_sign, tmp, sm_size, sm_firstcol, sm_lastcol) &
1541 : !$OMP SHARED(dissection, my_sms, unit_nr, eigbufs, mu, new_mu, nelectron) &
1542 72 : !$OMP REDUCTION(+:trace)
1543 : !$OMP DO SCHEDULE(GUIDED)
1544 : DO j = 1, SIZE(my_sms)
1545 : sm_size = SIZE(eigbufs(j)%eigvals)
1546 : CALL dissection%get_relevant_sm_columns(my_sms(j), sm_firstcol, sm_lastcol)
1547 : trace = trace + trace_from_eigdecomp(eigbufs(j)%eigvals, eigbufs(j)%eigvecs, sm_firstcol, sm_lastcol, new_mu - mu)
1548 : END DO
1549 : !$OMP END DO
1550 : !$OMP END PARALLEL
1551 : END DO
1552 :
1553 : ! Finalize sign matrix from eigendecompositions if we kept all eigenvectors
1554 4 : IF (variant .EQ. ls_scf_submatrix_sign_direct_muadj) THEN
1555 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1556 : !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j) &
1557 2 : !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, mu, new_mu)
1558 : !$OMP DO SCHEDULE(GUIDED)
1559 : DO i = 1, SIZE(my_sms)
1560 : WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "finalizing submatrix", my_sms(i)
1561 : sm_size = SIZE(eigbufs(i)%eigvals)
1562 : ALLOCATE (sm_sign(sm_size, sm_size))
1563 : CALL sign_from_eigdecomp(sm_sign, eigbufs(i)%eigvals, eigbufs(i)%eigvecs, sm_size, new_mu - mu)
1564 : CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1565 : DEALLOCATE (sm_sign)
1566 : END DO
1567 : !$OMP END DO
1568 : !$OMP END PARALLEL
1569 : END IF
1570 :
1571 6 : DEALLOCATE (eigbufs)
1572 :
1573 : ! If we only stored parts of the eigenvectors and mu has changed, we need to recompute sm_sign
1574 4 : IF ((variant .EQ. ls_scf_submatrix_sign_direct_muadj_lowmem) .AND. (mu .NE. new_mu)) THEN
1575 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
1576 : !$OMP PRIVATE(sm, sm_sign, sm_size, sm_firstcol, sm_lastcol, j) &
1577 2 : !$OMP SHARED(dissection, myrank, my_sms, unit_nr, eigbufs, mu, new_mu)
1578 : !$OMP DO SCHEDULE(GUIDED)
1579 : DO i = 1, SIZE(my_sms)
1580 : WRITE (unit_nr, '(T3,A,1X,I4,1X,A,1X,I6)') "Rank", myrank, "reprocessing submatrix", my_sms(i)
1581 : CALL dissection%generate_submatrix(my_sms(i), sm)
1582 : sm_size = SIZE(sm, 1)
1583 : DO j = 1, sm_size
1584 : sm(j, j) = sm(j, j) + mu - new_mu
1585 : END DO
1586 : ALLOCATE (sm_sign(sm_size, sm_size))
1587 : CALL dense_matrix_sign_direct(sm_sign, sm, sm_size)
1588 : CALL dissection%copy_resultcol(my_sms(i), sm_sign)
1589 : DEALLOCATE (sm, sm_sign)
1590 : END DO
1591 : !$OMP END DO
1592 : !$OMP END PARALLEL
1593 : END IF
1594 :
1595 4 : mu = new_mu
1596 :
1597 4 : CALL dissection%communicate_results(matrix_sign)
1598 4 : CALL dissection%final
1599 :
1600 4 : CALL timestop(handle)
1601 :
1602 12 : END SUBROUTINE matrix_sign_submatrix_mu_adjust
1603 :
1604 : ! **************************************************************************************************
1605 : !> \brief compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations
1606 : !> the order of the algorithm should be 2..5, 3 or 5 is recommended
1607 : !> \param matrix_sqrt ...
1608 : !> \param matrix_sqrt_inv ...
1609 : !> \param matrix ...
1610 : !> \param threshold ...
1611 : !> \param order ...
1612 : !> \param eps_lanczos ...
1613 : !> \param max_iter_lanczos ...
1614 : !> \param symmetrize ...
1615 : !> \param converged ...
1616 : !> \param iounit ...
1617 : !> \par History
1618 : !> 2010.10 created [Joost VandeVondele]
1619 : !> \author Joost VandeVondele
1620 : ! **************************************************************************************************
1621 14414 : SUBROUTINE matrix_sqrt_Newton_Schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, &
1622 : eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
1623 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1624 : REAL(KIND=dp), INTENT(IN) :: threshold
1625 : INTEGER, INTENT(IN) :: order
1626 : REAL(KIND=dp), INTENT(IN) :: eps_lanczos
1627 : INTEGER, INTENT(IN) :: max_iter_lanczos
1628 : LOGICAL, OPTIONAL :: symmetrize, converged
1629 : INTEGER, INTENT(IN), OPTIONAL :: iounit
1630 :
1631 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sqrt_Newton_Schulz'
1632 :
1633 : INTEGER :: handle, i, unit_nr
1634 : INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5
1635 : LOGICAL :: arnoldi_converged, tsym
1636 : REAL(KIND=dp) :: a, b, c, conv, d, frob_matrix, &
1637 : frob_matrix_base, gershgorin_norm, &
1638 : max_ev, min_ev, oa, ob, oc, &
1639 : occ_matrix, od, scaling, t1, t2
1640 : TYPE(cp_logger_type), POINTER :: logger
1641 : TYPE(dbcsr_type) :: tmp1, tmp2, tmp3
1642 :
1643 14414 : CALL timeset(routineN, handle)
1644 :
1645 14414 : IF (PRESENT(iounit)) THEN
1646 13062 : unit_nr = iounit
1647 : ELSE
1648 1352 : logger => cp_get_default_logger()
1649 1352 : IF (logger%para_env%is_source()) THEN
1650 676 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1651 : ELSE
1652 676 : unit_nr = -1
1653 : END IF
1654 : END IF
1655 :
1656 14414 : IF (PRESENT(converged)) converged = .FALSE.
1657 14414 : IF (PRESENT(symmetrize)) THEN
1658 0 : tsym = symmetrize
1659 : ELSE
1660 : tsym = .TRUE.
1661 : END IF
1662 :
1663 : ! for stability symmetry can not be assumed
1664 14414 : CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1665 14414 : CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1666 14414 : IF (order .GE. 4) THEN
1667 20 : CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1668 : END IF
1669 :
1670 14414 : CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1671 14414 : CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1672 14414 : CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1673 14414 : CALL dbcsr_copy(matrix_sqrt, matrix)
1674 :
1675 : ! scale the matrix to get into the convergence range
1676 14414 : IF (order == 0) THEN
1677 :
1678 0 : gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt)
1679 0 : frob_matrix = dbcsr_frobenius_norm(matrix_sqrt)
1680 0 : scaling = 1.0_dp/MIN(frob_matrix, gershgorin_norm)
1681 :
1682 : ELSE
1683 :
1684 : ! scale the matrix to get into the convergence range
1685 : CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, &
1686 14414 : max_iter=max_iter_lanczos, converged=arnoldi_converged)
1687 14414 : IF (unit_nr > 0) THEN
1688 676 : WRITE (unit_nr, *)
1689 676 : WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos
1690 676 : WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev
1691 676 : WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/MAX(min_ev, EPSILON(min_ev))
1692 : END IF
1693 : ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates
1694 : ! and adjust the scaling to be on the safe side
1695 14414 : scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1696 :
1697 : END IF
1698 :
1699 14414 : CALL dbcsr_scale(matrix_sqrt, scaling)
1700 14414 : CALL dbcsr_filter(matrix_sqrt, threshold)
1701 14414 : IF (unit_nr > 0) THEN
1702 676 : WRITE (unit_nr, *)
1703 676 : WRITE (unit_nr, *) "Order=", order
1704 : END IF
1705 :
1706 67934 : DO i = 1, 100
1707 :
1708 67934 : t1 = m_walltime()
1709 :
1710 : ! tmp1 = Zk * Yk - I
1711 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1712 67934 : filter_eps=threshold, flop=flop1)
1713 67934 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1714 67934 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1715 :
1716 : ! check convergence (frob norm of what should be the identity matrix minus identity matrix)
1717 67934 : frob_matrix = dbcsr_frobenius_norm(tmp1)
1718 :
1719 67934 : flop4 = 0; flop5 = 0
1720 36 : SELECT CASE (order)
1721 : CASE (0, 2)
1722 : ! update the above to 0.5*(3*I-Zk*Yk)
1723 36 : CALL dbcsr_add_on_diag(tmp1, -2.0_dp)
1724 36 : CALL dbcsr_scale(tmp1, -0.5_dp)
1725 : CASE (3)
1726 : ! tmp2 = tmp1 ** 2
1727 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1728 67838 : filter_eps=threshold, flop=flop4)
1729 : ! tmp1 = 1/16 * (16*I-8*tmp1+6*tmp1**2-5*tmp1**3)
1730 67838 : CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp)
1731 67838 : CALL dbcsr_add_on_diag(tmp1, 8.0_dp)
1732 67838 : CALL dbcsr_scale(tmp1, 0.125_dp)
1733 : CASE (4) ! as expensive as case(5), so little need to use it
1734 : ! tmp2 = tmp1 ** 2
1735 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1736 32 : filter_eps=threshold, flop=flop4)
1737 : ! tmp3 = tmp2 * tmp1
1738 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, &
1739 32 : filter_eps=threshold, flop=flop5)
1740 32 : CALL dbcsr_scale(tmp1, -8.0_dp)
1741 32 : CALL dbcsr_add_on_diag(tmp1, 16.0_dp)
1742 32 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp)
1743 32 : CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp)
1744 32 : CALL dbcsr_scale(tmp1, 1/16.0_dp)
1745 : CASE (5)
1746 : ! Knuth's reformulation to evaluate the polynomial of 4th degree in 2 multiplications
1747 : ! p = y4+A*y3+B*y2+C*y+D
1748 : ! z := y * (y+a); P := (z+y+b) * (z+c) + d.
1749 : ! a=(A-1)/2 ; b=B*(a+1)-C-a*(a+1)*(a+1)
1750 : ! c=B-b-a*(a+1)
1751 : ! d=D-bc
1752 28 : oa = -40.0_dp/35.0_dp
1753 28 : ob = 48.0_dp/35.0_dp
1754 28 : oc = -64.0_dp/35.0_dp
1755 28 : od = 128.0_dp/35.0_dp
1756 28 : a = (oa - 1)/2
1757 28 : b = ob*(a + 1) - oc - a*(a + 1)**2
1758 28 : c = ob - b - a*(a + 1)
1759 28 : d = od - b*c
1760 : ! tmp2 = tmp1 ** 2 + a * tmp1
1761 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, &
1762 28 : filter_eps=threshold, flop=flop4)
1763 28 : CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a)
1764 : ! tmp3 = tmp2 + tmp1 + b
1765 28 : CALL dbcsr_copy(tmp3, tmp2)
1766 28 : CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp)
1767 28 : CALL dbcsr_add_on_diag(tmp3, b)
1768 : ! tmp2 = tmp2 + c
1769 28 : CALL dbcsr_add_on_diag(tmp2, c)
1770 : ! tmp1 = tmp2 * tmp3
1771 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, &
1772 28 : filter_eps=threshold, flop=flop5)
1773 : ! tmp1 = tmp1 + d
1774 28 : CALL dbcsr_add_on_diag(tmp1, d)
1775 : ! final scale
1776 28 : CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp)
1777 : CASE DEFAULT
1778 67934 : CPABORT("Illegal order value")
1779 : END SELECT
1780 :
1781 : ! tmp2 = Yk * tmp1 = Y(k+1)
1782 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, &
1783 67934 : filter_eps=threshold, flop=flop2)
1784 : ! CALL dbcsr_filter(tmp2,threshold)
1785 67934 : CALL dbcsr_copy(matrix_sqrt, tmp2)
1786 :
1787 : ! tmp2 = tmp1 * Zk = Z(k+1)
1788 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, &
1789 67934 : filter_eps=threshold, flop=flop3)
1790 : ! CALL dbcsr_filter(tmp2,threshold)
1791 67934 : CALL dbcsr_copy(matrix_sqrt_inv, tmp2)
1792 :
1793 67934 : occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1794 :
1795 : ! done iterating
1796 67934 : t2 = m_walltime()
1797 :
1798 67934 : conv = frob_matrix/frob_matrix_base
1799 :
1800 67934 : IF (unit_nr > 0) THEN
1801 3818 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sqrt iter ", i, occ_matrix, &
1802 3818 : conv, t2 - t1, &
1803 7636 : (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
1804 3818 : CALL m_flush(unit_nr)
1805 : END IF
1806 :
1807 67934 : IF (abnormal_value(conv)) &
1808 0 : CPABORT("conv is an abnormal value (NaN/Inf).")
1809 :
1810 : ! conv < SQRT(threshold)
1811 67934 : IF ((conv*conv) < threshold) THEN
1812 14414 : IF (PRESENT(converged)) converged = .TRUE.
1813 : EXIT
1814 : END IF
1815 :
1816 : END DO
1817 :
1818 : ! symmetrize the matrices as this is not guaranteed by the algorithm
1819 14414 : IF (tsym) THEN
1820 14414 : IF (unit_nr > 0) THEN
1821 676 : WRITE (unit_nr, '(T6,A20)') "Symmetrizing Results"
1822 : END IF
1823 14414 : CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
1824 14414 : CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
1825 14414 : CALL dbcsr_transposed(tmp1, matrix_sqrt)
1826 14414 : CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
1827 : END IF
1828 :
1829 : ! this check is not really needed
1830 : CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
1831 14414 : filter_eps=threshold)
1832 14414 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
1833 14414 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
1834 14414 : frob_matrix = dbcsr_frobenius_norm(tmp1)
1835 14414 : occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
1836 14414 : IF (unit_nr > 0) THEN
1837 676 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sqrt iter ", i, occ_matrix, &
1838 1352 : frob_matrix/frob_matrix_base
1839 676 : WRITE (unit_nr, '()')
1840 676 : CALL m_flush(unit_nr)
1841 : END IF
1842 :
1843 : ! scale to proper end results
1844 14414 : CALL dbcsr_scale(matrix_sqrt, 1/SQRT(scaling))
1845 14414 : CALL dbcsr_scale(matrix_sqrt_inv, SQRT(scaling))
1846 :
1847 14414 : CALL dbcsr_release(tmp1)
1848 14414 : CALL dbcsr_release(tmp2)
1849 14414 : IF (order .GE. 4) THEN
1850 20 : CALL dbcsr_release(tmp3)
1851 : END IF
1852 :
1853 14414 : CALL timestop(handle)
1854 :
1855 14414 : END SUBROUTINE matrix_sqrt_Newton_Schulz
1856 :
1857 : ! **************************************************************************************************
1858 : !> \brief compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al.
1859 : !> Commun. Comput. Phys., 25 (2019), pp. 564-585.
1860 : !> \param matrix_sqrt ...
1861 : !> \param matrix_sqrt_inv ...
1862 : !> \param matrix ...
1863 : !> \param threshold ...
1864 : !> \param order ...
1865 : !> \param eps_lanczos ...
1866 : !> \param max_iter_lanczos ...
1867 : !> \param symmetrize ...
1868 : !> \param converged ...
1869 : !> \par History
1870 : !> 2019.04 created [Robert Schade]
1871 : !> \author Robert Schade
1872 : ! **************************************************************************************************
1873 48 : SUBROUTINE matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, &
1874 : eps_lanczos, max_iter_lanczos, symmetrize, converged)
1875 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix
1876 : REAL(KIND=dp), INTENT(IN) :: threshold
1877 : INTEGER, INTENT(IN) :: order
1878 : REAL(KIND=dp), INTENT(IN) :: eps_lanczos
1879 : INTEGER, INTENT(IN) :: max_iter_lanczos
1880 : LOGICAL, OPTIONAL :: symmetrize, converged
1881 :
1882 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sqrt_proot'
1883 :
1884 : INTEGER :: choose, handle, i, ii, j, unit_nr
1885 : INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5
1886 : LOGICAL :: arnoldi_converged, test, tsym
1887 : REAL(KIND=dp) :: conv, frob_matrix, frob_matrix_base, &
1888 : max_ev, min_ev, occ_matrix, scaling, &
1889 : t1, t2
1890 : TYPE(cp_logger_type), POINTER :: logger
1891 : TYPE(dbcsr_type) :: BK2A, matrixS, Rmat, tmp1, tmp2, tmp3
1892 :
1893 16 : CALL cite_reference(Richters2018)
1894 :
1895 16 : test = .FALSE.
1896 :
1897 16 : CALL timeset(routineN, handle)
1898 :
1899 16 : logger => cp_get_default_logger()
1900 16 : IF (logger%para_env%is_source()) THEN
1901 8 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
1902 : ELSE
1903 8 : unit_nr = -1
1904 : END IF
1905 :
1906 16 : IF (PRESENT(converged)) converged = .FALSE.
1907 16 : IF (PRESENT(symmetrize)) THEN
1908 16 : tsym = symmetrize
1909 : ELSE
1910 : tsym = .TRUE.
1911 : END IF
1912 :
1913 : ! for stability symmetry can not be assumed
1914 16 : CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1915 16 : CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1916 16 : CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1917 16 : CALL dbcsr_create(Rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1918 16 : CALL dbcsr_create(matrixS, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1919 :
1920 16 : CALL dbcsr_copy(matrixS, matrix)
1921 : IF (1 .EQ. 1) THEN
1922 : ! scale the matrix to get into the convergence range
1923 : CALL arnoldi_extremal(matrixS, max_ev, min_ev, threshold=eps_lanczos, &
1924 16 : max_iter=max_iter_lanczos, converged=arnoldi_converged)
1925 16 : IF (unit_nr > 0) THEN
1926 8 : WRITE (unit_nr, *)
1927 8 : WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos
1928 8 : WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev
1929 8 : WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/MAX(min_ev, EPSILON(min_ev))
1930 : END IF
1931 : ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates
1932 : ! and adjust the scaling to be on the safe side
1933 16 : scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos)
1934 16 : CALL dbcsr_scale(matrixS, scaling)
1935 16 : CALL dbcsr_filter(matrixS, threshold)
1936 : ELSE
1937 : scaling = 1.0_dp
1938 : END IF
1939 :
1940 16 : CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp)
1941 16 : CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp)
1942 : !CALL dbcsr_filter(matrix_sqrt_inv, threshold)
1943 :
1944 16 : IF (unit_nr > 0) THEN
1945 8 : WRITE (unit_nr, *)
1946 8 : WRITE (unit_nr, *) "Order=", order
1947 : END IF
1948 :
1949 86 : DO i = 1, 100
1950 :
1951 86 : t1 = m_walltime()
1952 : IF (1 .EQ. 1) THEN
1953 : !build R=1-A B_K^2
1954 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, &
1955 86 : filter_eps=threshold, flop=flop1)
1956 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrixS, tmp1, 0.0_dp, Rmat, &
1957 86 : filter_eps=threshold, flop=flop2)
1958 86 : CALL dbcsr_scale(Rmat, -1.0_dp)
1959 86 : CALL dbcsr_add_on_diag(Rmat, 1.0_dp)
1960 :
1961 86 : flop4 = 0; flop5 = 0
1962 86 : CALL dbcsr_set(tmp1, 0.0_dp)
1963 86 : CALL dbcsr_add_on_diag(tmp1, 2.0_dp)
1964 :
1965 86 : flop3 = 0
1966 :
1967 274 : DO j = 2, order
1968 188 : IF (j .EQ. 2) THEN
1969 86 : CALL dbcsr_copy(tmp2, Rmat)
1970 : ELSE
1971 : f = 0
1972 102 : CALL dbcsr_copy(tmp3, tmp2)
1973 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, Rmat, 0.0_dp, tmp2, &
1974 102 : filter_eps=threshold, flop=f)
1975 102 : flop3 = flop3 + f
1976 : END IF
1977 274 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp)
1978 : END DO
1979 : ELSE
1980 : CALL dbcsr_create(BK2A, template=matrix, matrix_type=dbcsr_type_no_symmetry)
1981 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrixS, 0.0_dp, tmp3, &
1982 : filter_eps=threshold, flop=flop1)
1983 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, tmp3, 0.0_dp, BK2A, &
1984 : filter_eps=threshold, flop=flop2)
1985 : CALL dbcsr_copy(Rmat, BK2A)
1986 : CALL dbcsr_add_on_diag(Rmat, -1.0_dp)
1987 :
1988 : CALL dbcsr_set(tmp1, 0.0_dp)
1989 : CALL dbcsr_add_on_diag(tmp1, 1.0_dp)
1990 :
1991 : CALL dbcsr_set(tmp2, 0.0_dp)
1992 : CALL dbcsr_add_on_diag(tmp2, 1.0_dp)
1993 :
1994 : flop3 = 0
1995 : DO j = 1, order
1996 : !choose=factorial(order)/(factorial(j)*factorial(order-j))
1997 : choose = PRODUCT((/(ii, ii=1, order)/))/(PRODUCT((/(ii, ii=1, j)/))*PRODUCT((/(ii, ii=1, order - j)/)))
1998 : CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose)
1999 : IF (j .LT. order) THEN
2000 : f = 0
2001 : CALL dbcsr_copy(tmp3, tmp2)
2002 : CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, BK2A, 0.0_dp, tmp2, &
2003 : filter_eps=threshold, flop=f)
2004 : flop3 = flop3 + f
2005 : END IF
2006 : END DO
2007 : CALL dbcsr_release(BK2A)
2008 : END IF
2009 :
2010 86 : CALL dbcsr_copy(tmp3, matrix_sqrt_inv)
2011 : CALL dbcsr_multiply("N", "N", 0.5_dp, tmp3, tmp1, 0.0_dp, matrix_sqrt_inv, &
2012 86 : filter_eps=threshold, flop=flop4)
2013 :
2014 86 : occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2015 :
2016 : ! done iterating
2017 86 : t2 = m_walltime()
2018 :
2019 86 : conv = dbcsr_frobenius_norm(Rmat)
2020 :
2021 86 : IF (unit_nr > 0) THEN
2022 43 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "PROOT sqrt iter ", i, occ_matrix, &
2023 43 : conv, t2 - t1, &
2024 86 : (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0E6_dp*MAX(0.001_dp, t2 - t1))
2025 43 : CALL m_flush(unit_nr)
2026 : END IF
2027 :
2028 86 : IF (abnormal_value(conv)) &
2029 0 : CPABORT("conv is an abnormal value (NaN/Inf).")
2030 :
2031 : ! conv < SQRT(threshold)
2032 86 : IF ((conv*conv) < threshold) THEN
2033 16 : IF (PRESENT(converged)) converged = .TRUE.
2034 : EXIT
2035 : END IF
2036 :
2037 : END DO
2038 :
2039 : ! scale to proper end results
2040 16 : CALL dbcsr_scale(matrix_sqrt_inv, SQRT(scaling))
2041 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, &
2042 16 : filter_eps=threshold, flop=flop5)
2043 :
2044 : ! symmetrize the matrices as this is not guaranteed by the algorithm
2045 16 : IF (tsym) THEN
2046 8 : IF (unit_nr > 0) THEN
2047 4 : WRITE (unit_nr, '(A20)') "SYMMETRIZING RESULTS"
2048 : END IF
2049 8 : CALL dbcsr_transposed(tmp1, matrix_sqrt_inv)
2050 8 : CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp)
2051 8 : CALL dbcsr_transposed(tmp1, matrix_sqrt)
2052 8 : CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp)
2053 : END IF
2054 :
2055 : ! this check is not really needed
2056 : IF (test) THEN
2057 : CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, &
2058 : filter_eps=threshold)
2059 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2060 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2061 : frob_matrix = dbcsr_frobenius_norm(tmp1)
2062 : occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2063 : IF (unit_nr > 0) THEN
2064 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, &
2065 : frob_matrix/frob_matrix_base
2066 : WRITE (unit_nr, '()')
2067 : CALL m_flush(unit_nr)
2068 : END IF
2069 :
2070 : ! this check is not really needed
2071 : CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, &
2072 : filter_eps=threshold)
2073 : CALL dbcsr_multiply("N", "N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, &
2074 : filter_eps=threshold)
2075 : frob_matrix_base = dbcsr_frobenius_norm(tmp1)
2076 : CALL dbcsr_add_on_diag(tmp1, -1.0_dp)
2077 : frob_matrix = dbcsr_frobenius_norm(tmp1)
2078 : occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv)
2079 : IF (unit_nr > 0) THEN
2080 : WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, &
2081 : frob_matrix/frob_matrix_base
2082 : WRITE (unit_nr, '()')
2083 : CALL m_flush(unit_nr)
2084 : END IF
2085 : END IF
2086 :
2087 16 : CALL dbcsr_release(tmp1)
2088 16 : CALL dbcsr_release(tmp2)
2089 16 : CALL dbcsr_release(tmp3)
2090 16 : CALL dbcsr_release(Rmat)
2091 16 : CALL dbcsr_release(matrixS)
2092 :
2093 16 : CALL timestop(handle)
2094 16 : END SUBROUTINE matrix_sqrt_proot
2095 :
2096 : ! **************************************************************************************************
2097 : !> \brief ...
2098 : !> \param matrix_exp ...
2099 : !> \param matrix ...
2100 : !> \param omega ...
2101 : !> \param alpha ...
2102 : !> \param threshold ...
2103 : ! **************************************************************************************************
2104 1146 : SUBROUTINE matrix_exponential(matrix_exp, matrix, omega, alpha, threshold)
2105 : ! compute matrix_exp=omega*exp(alpha*matrix)
2106 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_exp, matrix
2107 : REAL(KIND=dp), INTENT(IN) :: omega, alpha, threshold
2108 :
2109 : CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_exponential'
2110 : REAL(dp), PARAMETER :: one = 1.0_dp, toll = 1.E-17_dp, &
2111 : zero = 0.0_dp
2112 :
2113 : INTEGER :: handle, i, k, unit_nr
2114 : REAL(dp) :: factorial, norm_C, norm_D, norm_scalar
2115 : TYPE(cp_logger_type), POINTER :: logger
2116 : TYPE(dbcsr_type) :: B, B_square, C, D, D_product
2117 :
2118 1146 : CALL timeset(routineN, handle)
2119 :
2120 1146 : logger => cp_get_default_logger()
2121 1146 : IF (logger%para_env%is_source()) THEN
2122 1058 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
2123 : ELSE
2124 : unit_nr = -1
2125 : END IF
2126 :
2127 : ! Calculate the norm of the matrix alpha*matrix, and scale it until it is less than 1.0
2128 1146 : norm_scalar = ABS(alpha)*dbcsr_frobenius_norm(matrix)
2129 :
2130 : ! k=scaling parameter
2131 1146 : k = 1
2132 1008 : DO
2133 2154 : IF ((norm_scalar/2.0_dp**k) <= one) EXIT
2134 1008 : k = k + 1
2135 : END DO
2136 :
2137 : ! copy and scale the input matrix in matrix C and in matrix D
2138 1146 : CALL dbcsr_create(C, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2139 1146 : CALL dbcsr_copy(C, matrix)
2140 1146 : CALL dbcsr_scale(C, alpha_scalar=alpha/2.0_dp**k)
2141 :
2142 1146 : CALL dbcsr_create(D, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2143 1146 : CALL dbcsr_copy(D, C)
2144 :
2145 : ! write(*,*)
2146 : ! write(*,*)
2147 : ! CALL dbcsr_print(D, nodata=.FALSE., matlab_format=.TRUE., variable_name="D", unit_nr=6)
2148 :
2149 : ! set the B matrix as B=Identity+D
2150 1146 : CALL dbcsr_create(B, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2151 1146 : CALL dbcsr_copy(B, D)
2152 1146 : CALL dbcsr_add_on_diag(B, alpha_scalar=one)
2153 :
2154 : ! CALL dbcsr_print(B, nodata=.FALSE., matlab_format=.TRUE., variable_name="B", unit_nr=6)
2155 :
2156 : ! Calculate the norm of C and moltiply by toll to be used as a threshold
2157 1146 : norm_C = toll*dbcsr_frobenius_norm(matrix)
2158 :
2159 : ! iteration for the truncated taylor series expansion
2160 1146 : CALL dbcsr_create(D_product, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2161 1146 : i = 1
2162 : DO
2163 12676 : i = i + 1
2164 : ! compute D_product=D*C
2165 : CALL dbcsr_multiply("N", "N", one, D, C, &
2166 12676 : zero, D_product, filter_eps=threshold)
2167 :
2168 : ! copy D_product in D
2169 12676 : CALL dbcsr_copy(D, D_product)
2170 :
2171 : ! calculate B=B+D_product/fat(i)
2172 12676 : factorial = ifac(i)
2173 12676 : CALL dbcsr_add(B, D_product, one, factorial)
2174 :
2175 : ! check for convergence using the norm of D (copy of the matrix D_product) and C
2176 12676 : norm_D = factorial*dbcsr_frobenius_norm(D)
2177 12676 : IF (norm_D < norm_C) EXIT
2178 : END DO
2179 :
2180 : ! start the k iteration for the squaring of the matrix
2181 1146 : CALL dbcsr_create(B_square, template=matrix, matrix_type=dbcsr_type_no_symmetry)
2182 3300 : DO i = 1, k
2183 : !compute B_square=B*B
2184 : CALL dbcsr_multiply("N", "N", one, B, B, &
2185 2154 : zero, B_square, filter_eps=threshold)
2186 : ! copy Bsquare in B to iterate
2187 3300 : CALL dbcsr_copy(B, B_square)
2188 : END DO
2189 :
2190 : ! copy B_square in matrix_exp and
2191 1146 : CALL dbcsr_copy(matrix_exp, B_square)
2192 :
2193 : ! scale matrix_exp by omega, matrix_exp=omega*B_square
2194 1146 : CALL dbcsr_scale(matrix_exp, alpha_scalar=omega)
2195 : ! write(*,*) alpha,omega
2196 :
2197 1146 : CALL dbcsr_release(B)
2198 1146 : CALL dbcsr_release(C)
2199 1146 : CALL dbcsr_release(D)
2200 1146 : CALL dbcsr_release(D_product)
2201 1146 : CALL dbcsr_release(B_square)
2202 :
2203 1146 : CALL timestop(handle)
2204 :
2205 1146 : END SUBROUTINE matrix_exponential
2206 :
2207 : ! **************************************************************************************************
2208 : !> \brief McWeeny purification of a matrix in the orthonormal basis
2209 : !> \param matrix_p Matrix to purify (needs to be almost idempotent already)
2210 : !> \param threshold Threshold used as filter_eps and convergence criteria
2211 : !> \param max_steps Max number of iterations
2212 : !> \par History
2213 : !> 2013.01 created [Florian Schiffmann]
2214 : !> 2014.07 slightly refactored [Ole Schuett]
2215 : !> \author Florian Schiffmann
2216 : ! **************************************************************************************************
2217 234 : SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps)
2218 : TYPE(dbcsr_type), DIMENSION(:) :: matrix_p
2219 : REAL(KIND=dp) :: threshold
2220 : INTEGER :: max_steps
2221 :
2222 : CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_orth'
2223 :
2224 : INTEGER :: handle, i, ispin, unit_nr
2225 : REAL(KIND=dp) :: frob_norm, trace
2226 : TYPE(cp_logger_type), POINTER :: logger
2227 : TYPE(dbcsr_type) :: matrix_pp, matrix_tmp
2228 :
2229 234 : CALL timeset(routineN, handle)
2230 234 : logger => cp_get_default_logger()
2231 234 : IF (logger%para_env%is_source()) THEN
2232 117 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
2233 : ELSE
2234 117 : unit_nr = -1
2235 : END IF
2236 :
2237 234 : CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2238 234 : CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2239 234 : CALL dbcsr_trace(matrix_p(1), trace)
2240 :
2241 476 : DO ispin = 1, SIZE(matrix_p)
2242 476 : DO i = 1, max_steps
2243 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), &
2244 242 : 0.0_dp, matrix_pp, filter_eps=threshold)
2245 :
2246 : ! test convergence
2247 242 : CALL dbcsr_copy(matrix_tmp, matrix_pp)
2248 242 : CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp)
2249 242 : frob_norm = dbcsr_frobenius_norm(matrix_tmp) ! tmp = PP - P
2250 242 : IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,f16.8)') "McWeeny: Deviation of idempotency", frob_norm
2251 242 : IF (unit_nr > 0) CALL m_flush(unit_nr)
2252 :
2253 : ! construct new P
2254 242 : CALL dbcsr_copy(matrix_tmp, matrix_pp)
2255 : CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_pp, matrix_p(ispin), &
2256 242 : 3.0_dp, matrix_tmp, filter_eps=threshold)
2257 242 : CALL dbcsr_copy(matrix_p(ispin), matrix_tmp) ! tmp = 3PP - 2PPP
2258 :
2259 : ! frob_norm < SQRT(trace*threshold)
2260 242 : IF (frob_norm*frob_norm < trace*threshold) EXIT
2261 : END DO
2262 : END DO
2263 :
2264 234 : CALL dbcsr_release(matrix_pp)
2265 234 : CALL dbcsr_release(matrix_tmp)
2266 234 : CALL timestop(handle)
2267 234 : END SUBROUTINE purify_mcweeny_orth
2268 :
2269 : ! **************************************************************************************************
2270 : !> \brief McWeeny purification of a matrix in the non-orthonormal basis
2271 : !> \param matrix_p Matrix to purify (needs to be almost idempotent already)
2272 : !> \param matrix_s Overlap-Matrix
2273 : !> \param threshold Threshold used as filter_eps and convergence criteria
2274 : !> \param max_steps Max number of iterations
2275 : !> \par History
2276 : !> 2013.01 created [Florian Schiffmann]
2277 : !> 2014.07 slightly refactored [Ole Schuett]
2278 : !> \author Florian Schiffmann
2279 : ! **************************************************************************************************
2280 196 : SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps)
2281 : TYPE(dbcsr_type), DIMENSION(:) :: matrix_p
2282 : TYPE(dbcsr_type) :: matrix_s
2283 : REAL(KIND=dp) :: threshold
2284 : INTEGER :: max_steps
2285 :
2286 : CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_nonorth'
2287 :
2288 : INTEGER :: handle, i, ispin, unit_nr
2289 : REAL(KIND=dp) :: frob_norm, trace
2290 : TYPE(cp_logger_type), POINTER :: logger
2291 : TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test
2292 :
2293 196 : CALL timeset(routineN, handle)
2294 :
2295 196 : logger => cp_get_default_logger()
2296 196 : IF (logger%para_env%is_source()) THEN
2297 98 : unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
2298 : ELSE
2299 98 : unit_nr = -1
2300 : END IF
2301 :
2302 196 : CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2303 196 : CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2304 196 : CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry)
2305 :
2306 392 : DO ispin = 1, SIZE(matrix_p)
2307 404 : DO i = 1, max_steps
2308 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_s, &
2309 208 : 0.0_dp, matrix_ps, filter_eps=threshold)
2310 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ps, matrix_p(ispin), &
2311 208 : 0.0_dp, matrix_psp, filter_eps=threshold)
2312 208 : IF (i == 1) CALL dbcsr_trace(matrix_ps, trace)
2313 :
2314 : ! test convergence
2315 208 : CALL dbcsr_copy(matrix_test, matrix_psp)
2316 208 : CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp)
2317 208 : frob_norm = dbcsr_frobenius_norm(matrix_test) ! test = PSP - P
2318 208 : IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,2f16.8)') "McWeeny: Deviation of idempotency", frob_norm
2319 208 : IF (unit_nr > 0) CALL m_flush(unit_nr)
2320 :
2321 : ! construct new P
2322 208 : CALL dbcsr_copy(matrix_p(ispin), matrix_psp)
2323 : CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_ps, matrix_psp, &
2324 208 : 3.0_dp, matrix_p(ispin), filter_eps=threshold)
2325 :
2326 : ! frob_norm < SQRT(trace*threshold)
2327 208 : IF (frob_norm*frob_norm < trace*threshold) EXIT
2328 : END DO
2329 : END DO
2330 :
2331 196 : CALL dbcsr_release(matrix_ps)
2332 196 : CALL dbcsr_release(matrix_psp)
2333 196 : CALL dbcsr_release(matrix_test)
2334 196 : CALL timestop(handle)
2335 196 : END SUBROUTINE purify_mcweeny_nonorth
2336 :
2337 0 : END MODULE iterate_matrix
|